Trac 2139 indirectly brought to light the case of a stream that uses a
authorMonty <xiphmont@xiph.org>
Thu, 26 Feb 2015 02:30:33 +0000 (02:30 +0000)
committerMonty <xiphmont@xiph.org>
Thu, 26 Feb 2015 02:30:33 +0000 (02:30 +0000)
single-entry codebook, but does not code a codeword of length 1 equal
to zero.  Such a stream could cause a stream to read garbage.

There is no apparent chance of garbage memory writes as this happen
entirely after decode setup, however there is playback DoS potential.

This commit special cases single-entry codebook setup so that decode
is well-defined for streams with single-entry codebooks, and adds some
comments to make it more clear how the case is handled.

svn path=/trunk/vorbis/; revision=19444

lib/codebook.c
lib/sharedbook.c

index 7ec6311..9076658 100644 (file)
@@ -312,6 +312,12 @@ STIN long decode_packed_entry_number(codebook *book, oggpack_buffer *b){
     hi=book->used_entries;
   }
 
+  /* Single entry codebooks use a firsttablen of 1 and a
+     dec_maxlength of 1.  If a single-entry codebook gets here (due to
+     failure to read one bit above), the next look attempt will also
+     fail and we'll correctly kick out instead of trying to walk the
+     underformed tree */
+
   lok = oggpack_look(b, read);
 
   while(lok<0 && read>1)
index 926b0fb..fa06e22 100644 (file)
@@ -123,16 +123,15 @@ ogg_uint32_t *_make_words(char *l,long n,long sparsecount){
       if(sparsecount==0)count++;
   }
 
-  /* sanity check the huffman tree; an underpopulated tree must be
-     rejected. The only exception is the one-node pseudo-nil tree,
-     which appears to be underpopulated because the tree doesn't
-     really exist; there's only one possible 'codeword' or zero bits,
-     but the above tree-gen code doesn't mark that. */
-  if(sparsecount != 1){
+  /* any underpopulated tree must be rejected. */
+  /* Single-entry codebooks are a retconned extension to the spec.
+     They have a single codeword '0' of length 1 that results in an
+     underpopulated tree.  Shield that case from the underformed tree check. */
+  if(!(count==1 && marker[2]==2)){
     for(i=1;i<33;i++)
       if(marker[i] & (0xffffffffUL>>(32-i))){
-       _ogg_free(r);
-       return(NULL);
+        _ogg_free(r);
+        return(NULL);
       }
   }
 
@@ -311,9 +310,10 @@ static int sort32a(const void *a,const void *b){
 int vorbis_book_init_decode(codebook *c,const static_codebook *s){
   int i,j,n=0,tabn;
   int *sortindex;
+
   memset(c,0,sizeof(*c));
 
-  /* count actually used entries */
+  /* count actually used entries and find max length */
   for(i=0;i<s->entries;i++)
     if(s->lengthlist[i]>0)
       n++;
@@ -323,7 +323,6 @@ int vorbis_book_init_decode(codebook *c,const static_codebook *s){
   c->dim=s->dim;
 
   if(n>0){
-
     /* two different remappings go on here.
 
     First, we collapse the likely sparse codebook down only to
@@ -359,7 +358,6 @@ int vorbis_book_init_decode(codebook *c,const static_codebook *s){
       c->codelist[sortindex[i]]=codes[i];
     _ogg_free(codes);
 
-
     c->valuelist=_book_unquantize(s,n,sortindex);
     c->dec_index=_ogg_malloc(n*sizeof(*c->dec_index));
 
@@ -368,51 +366,62 @@ int vorbis_book_init_decode(codebook *c,const static_codebook *s){
         c->dec_index[sortindex[n++]]=i;
 
     c->dec_codelengths=_ogg_malloc(n*sizeof(*c->dec_codelengths));
+    c->dec_maxlength=0;
     for(n=0,i=0;i<s->entries;i++)
-      if(s->lengthlist[i]>0)
+      if(s->lengthlist[i]>0){
         c->dec_codelengths[sortindex[n++]]=s->lengthlist[i];
+        if(s->lengthlist[i]>c->dec_maxlength)
+          c->dec_maxlength=s->lengthlist[i];
+      }
 
-    c->dec_firsttablen=ov_ilog(c->used_entries)-4; /* this is magic */
-    if(c->dec_firsttablen<5)c->dec_firsttablen=5;
-    if(c->dec_firsttablen>8)c->dec_firsttablen=8;
-
-    tabn=1<<c->dec_firsttablen;
-    c->dec_firsttable=_ogg_calloc(tabn,sizeof(*c->dec_firsttable));
-    c->dec_maxlength=0;
+    if(n==1 && c->dec_maxlength==1){
+      /* special case the 'single entry codebook' with a single bit
+       fastpath table (that always returns entry 0 )in order to use
+       unmodified decode paths. */
+      c->dec_firsttablen=1;
+      c->dec_firsttable=_ogg_calloc(2,sizeof(*c->dec_firsttable));
+      c->dec_firsttable[0]=c->dec_firsttable[1]=1;
 
-    for(i=0;i<n;i++){
-      if(c->dec_maxlength<c->dec_codelengths[i])
-        c->dec_maxlength=c->dec_codelengths[i];
-      if(c->dec_codelengths[i]<=c->dec_firsttablen){
-        ogg_uint32_t orig=bitreverse(c->codelist[i]);
-        for(j=0;j<(1<<(c->dec_firsttablen-c->dec_codelengths[i]));j++)
-          c->dec_firsttable[orig|(j<<c->dec_codelengths[i])]=i+1;
+    }else{
+      c->dec_firsttablen=ov_ilog(c->used_entries)-4; /* this is magic */
+      if(c->dec_firsttablen<5)c->dec_firsttablen=5;
+      if(c->dec_firsttablen>8)c->dec_firsttablen=8;
+
+      tabn=1<<c->dec_firsttablen;
+      c->dec_firsttable=_ogg_calloc(tabn,sizeof(*c->dec_firsttable));
+
+      for(i=0;i<n;i++){
+        if(c->dec_codelengths[i]<=c->dec_firsttablen){
+          ogg_uint32_t orig=bitreverse(c->codelist[i]);
+          for(j=0;j<(1<<(c->dec_firsttablen-c->dec_codelengths[i]));j++)
+            c->dec_firsttable[orig|(j<<c->dec_codelengths[i])]=i+1;
+        }
       }
-    }
 
-    /* now fill in 'unused' entries in the firsttable with hi/lo search
-       hints for the non-direct-hits */
-    {
-      ogg_uint32_t mask=0xfffffffeUL<<(31-c->dec_firsttablen);
-      long lo=0,hi=0;
-
-      for(i=0;i<tabn;i++){
-        ogg_uint32_t word=i<<(32-c->dec_firsttablen);
-        if(c->dec_firsttable[bitreverse(word)]==0){
-          while((lo+1)<n && c->codelist[lo+1]<=word)lo++;
-          while(    hi<n && word>=(c->codelist[hi]&mask))hi++;
-
-          /* we only actually have 15 bits per hint to play with here.
-             In order to overflow gracefully (nothing breaks, efficiency
-             just drops), encode as the difference from the extremes. */
-          {
-            unsigned long loval=lo;
-            unsigned long hival=n-hi;
-
-            if(loval>0x7fff)loval=0x7fff;
-            if(hival>0x7fff)hival=0x7fff;
-            c->dec_firsttable[bitreverse(word)]=
-              0x80000000UL | (loval<<15) | hival;
+      /* now fill in 'unused' entries in the firsttable with hi/lo search
+         hints for the non-direct-hits */
+      {
+        ogg_uint32_t mask=0xfffffffeUL<<(31-c->dec_firsttablen);
+        long lo=0,hi=0;
+
+        for(i=0;i<tabn;i++){
+          ogg_uint32_t word=i<<(32-c->dec_firsttablen);
+          if(c->dec_firsttable[bitreverse(word)]==0){
+            while((lo+1)<n && c->codelist[lo+1]<=word)lo++;
+            while(    hi<n && word>=(c->codelist[hi]&mask))hi++;
+
+            /* we only actually have 15 bits per hint to play with here.
+               In order to overflow gracefully (nothing breaks, efficiency
+               just drops), encode as the difference from the extremes. */
+            {
+              unsigned long loval=lo;
+              unsigned long hival=n-hi;
+
+              if(loval>0x7fff)loval=0x7fff;
+              if(hival>0x7fff)hival=0x7fff;
+              c->dec_firsttable[bitreverse(word)]=
+                0x80000000UL | (loval<<15) | hival;
+            }
           }
         }
       }