// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
+#nullable enable
using System.Diagnostics;
+using System.Threading;
namespace System.Net.Http.HPack
{
- internal class Huffman
+ internal static class Huffman
{
- // TODO: this can be constructed from _decodingTable
+ // HPack static huffman code. see: https://httpwg.org/specs/rfc7541.html#huffman.code
private static readonly (uint code, int bitLength)[] _encodingTable = new (uint code, int bitLength)[]
{
(0b11111111_11000000_00000000_00000000, 13),
(0b11111111_11111111_11111111_11111100, 30)
};
- private static readonly (int codeLength, int[] codes)[] _decodingTable = new[]
- {
- (5, new[] { 48, 49, 50, 97, 99, 101, 105, 111, 115, 116 }),
- (6, new[] { 32, 37, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 61, 65, 95, 98, 100, 102, 103, 104, 108, 109, 110, 112, 114, 117 }),
- (7, new[] { 58, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 89, 106, 107, 113, 118, 119, 120, 121, 122 }),
- (8, new[] { 38, 42, 44, 59, 88, 90 }),
- (10, new[] { 33, 34, 40, 41, 63 }),
- (11, new[] { 39, 43, 124 }),
- (12, new[] { 35, 62 }),
- (13, new[] { 0, 36, 64, 91, 93, 126 }),
- (14, new[] { 94, 125 }),
- (15, new[] { 60, 96, 123 }),
- (19, new[] { 92, 195, 208 }),
- (20, new[] { 128, 130, 131, 162, 184, 194, 224, 226 }),
- (21, new[] { 153, 161, 167, 172, 176, 177, 179, 209, 216, 217, 227, 229, 230 }),
- (22, new[] { 129, 132, 133, 134, 136, 146, 154, 156, 160, 163, 164, 169, 170, 173, 178, 181, 185, 186, 187, 189, 190, 196, 198, 228, 232, 233 }),
- (23, new[] { 1, 135, 137, 138, 139, 140, 141, 143, 147, 149, 150, 151, 152, 155, 157, 158, 165, 166, 168, 174, 175, 180, 182, 183, 188, 191, 197, 231, 239 }),
- (24, new[] { 9, 142, 144, 145, 148, 159, 171, 206, 215, 225, 236, 237 }),
- (25, new[] { 199, 207, 234, 235 }),
- (26, new[] { 192, 193, 200, 201, 202, 205, 210, 213, 218, 219, 238, 240, 242, 243, 255 }),
- (27, new[] { 203, 204, 211, 212, 214, 221, 222, 223, 241, 244, 245, 246, 247, 248, 250, 251, 252, 253, 254 }),
- (28, new[] { 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 220, 249 }),
- (30, new[] { 10, 13, 22, 256 })
- };
+ private static readonly ushort[] s_decodingTree = GenerateDecodingLookupTree();
public static (uint encoded, int bitLength) Encode(int data)
{
return _encodingTable[data];
}
+ private static ushort[] GenerateDecodingLookupTree()
+ {
+ // Decoding lookup tree is a tree of 8 bit lookup tables stored in
+ // one dimensional array of ushort to reduce allocations.
+ // First 256 ushort is lookup table with index 0, next 256 ushort is lookup table with index 1, etc...
+ // lookup_value = [(lookup_table_index << 8) + lookup_index]
+
+ // lookup_index is next 8 bits of huffman code, if there is less than 8 bits in source.
+ // lookup_index MUST be aligned to 8 bits with LSB bits set to anything (zeros are recommended).
+
+ // Lookup value is encoded in ushort as either.
+ // -----------------------------------------------------------------
+ // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 1 | next_lookup_table_index | not_used |
+ // +---+---------------------------+-------------------------------+
+ // or
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 0 | number_of_used_bits | octet |
+ // +---+---------------------------+-------------------------------+
+
+ // Bit 15 unset indicates a leaf value of decoding tree.
+ // For example value 0x0241 means that we have reached end of huffman code
+ // with result byte 0x41 'A' and from lookup bits only rightmost 2 bits were used
+ // and rest of bits are part of next huffman code.
+
+ // Bit 15 set indicates that code is not yet decoded and next lookup table index shall be used
+ // for next n bits of huffman code.
+ // 0 in 'next lookup table index' is considered as decoding error - invalid huffman code
+
+ // Because HPack uses static huffman code defined in RFC https://httpwg.org/specs/rfc7541.html#huffman.code
+ // it is guaranteed that for this huffman code generated decoding lookup tree MUST consist of exactly 15 lookup tables
+ var decodingTree = new ushort[15 * 256];
+
+ int allocatedLookupTableIndex = 0;
+ // Create traverse path for all 0..256 octets, 256 is EOS, see: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
+ for (int octet = 0; octet <= 256; octet++)
+ {
+ (uint code, int bitLength) = Encode(octet);
+
+ int lookupTableIndex = 0;
+ int bitsLeft = bitLength;
+ while (bitsLeft > 0)
+ {
+ // read next 8 bits from huffman code
+ int indexInLookupTable = (int)(code >> (32 - 8));
+
+ if (bitsLeft <= 8)
+ {
+ // Reached last lookup table for this huffman code.
+
+ // Identical lookup value has to be stored for every combination of unused bits,
+ // For example: 12 bit code could be looked up during decoding as this:
+ // ---------------------------------
+ // 7 6 5 4 3 2 1 0
+ // +---+---+---+---+---+---+---+---+
+ // |last_code_bits | next_code_bits|
+ // +-------------------------------+
+ // next_code_bits are 'random' bits of next huffman code, so in order for lookup
+ // to work, lookup value has to be stored for all 4 unused bits, in this case for suffix 0..15
+ int suffixCount = 1 << (8 - bitsLeft);
+ for (int suffix = 0; suffix < suffixCount; suffix++)
+ {
+ if (octet == 256)
+ {
+ // EOS (in our case 256) have special meaning in HPack static huffman code
+ // see: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
+ // > A Huffman-encoded string literal containing the EOS symbol MUST be treated as a decoding error.
+ // To force decoding error we store 0 as 'next lookup table index' which MUST be treated as decoding error.
+
+ // Invalid huffman code - EOS
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 1 | 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 |
+ // +---+---------------------------+-------------------------------+
+ decodingTree[(lookupTableIndex << 8) + (indexInLookupTable | suffix)] = 0x80ff;
+ }
+ else
+ {
+ // Leaf lookup value
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 0 | number_of_used_bits | code |
+ // +---+---------------------------+-------------------------------+
+ decodingTree[(lookupTableIndex << 8) + (indexInLookupTable | suffix)] = (ushort)((bitsLeft << 8) | octet);
+ }
+ }
+ }
+ else
+ {
+ // More than 8 bits left in huffman code means that we need to traverse to another lookup table for next 8 bits
+ ushort lookupValue = decodingTree[(lookupTableIndex << 8) + indexInLookupTable];
+
+ // Because next_lookup_table_index can not be 0, as 0 is index of root table, default value of array element
+ // means that we have not initialized it yet => lookup table MUST be allocated and its index assigned to that lookup value
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 1 | next_lookup_table_index | not_used |
+ // +---+---------------------------+-------------------------------+
+ if (lookupValue == 0)
+ {
+ ++allocatedLookupTableIndex;
+ decodingTree[(lookupTableIndex << 8) + indexInLookupTable] = (ushort)((0x80 | allocatedLookupTableIndex) << 8);
+ lookupTableIndex = allocatedLookupTableIndex;
+ }
+ else
+ {
+ lookupTableIndex = (lookupValue & 0x7f00) >> 8;
+ }
+ }
+
+ bitsLeft -= 8;
+ code <<= 8;
+ }
+ }
+
+ return decodingTree;
+ }
+
/// <summary>
/// Decodes a Huffman encoded string from a byte array.
/// </summary>
/// <returns>The number of decoded symbols.</returns>
public static int Decode(ReadOnlySpan<byte> src, ref byte[] dstArray)
{
+ // The code below implements the decoding logic for an HPack huffman encoded literal values.
+ // https://httpwg.org/specs/rfc7541.html#string.literal.representation
+ //
+ // To decode a symbol, we traverse the decoding lookup table tree by 8 bits for each lookup
+ // until we found a leaf - which contains decoded symbol (octet)
+ //
+ // see comments in GenerateDecodingLookupTree() describing decoding table
+
Span<byte> dst = dstArray;
Debug.Assert(dst != null && dst.Length > 0);
+ ushort[] decodingTree = s_decodingTree;
+
+ int lookupTableIndex = 0;
+ int lookupIndex;
+
+ uint acc = 0;
+ int bitsInAcc = 0;
+
int i = 0;
int j = 0;
- int lastDecodedBits = 0;
while (i < src.Length)
{
- // Note that if lastDecodeBits is 3 or more, then we will only get 5 bits (or less)
- // from src[i]. Thus we need to read 5 bytes here to ensure that we always have
- // at least 30 bits available for decoding.
- // TODO https://github.com/dotnet/runtime/issues/1506:
- // Rework this as part of Huffman perf improvements
- uint next = (uint)(src[i] << 24 + lastDecodedBits);
- next |= (i + 1 < src.Length ? (uint)(src[i + 1] << 16 + lastDecodedBits) : 0);
- next |= (i + 2 < src.Length ? (uint)(src[i + 2] << 8 + lastDecodedBits) : 0);
- next |= (i + 3 < src.Length ? (uint)(src[i + 3] << lastDecodedBits) : 0);
- next |= (i + 4 < src.Length ? (uint)(src[i + 4] >> (8 - lastDecodedBits)) : 0);
-
- uint ones = (uint)(int.MinValue >> (8 - lastDecodedBits - 1));
- if (i == src.Length - 1 && lastDecodedBits > 0 && (next & ones) == ones)
- {
- // The remaining 7 or less bits are all 1, which is padding.
- // We specifically check that lastDecodedBits > 0 because padding
- // longer than 7 bits should be treated as a decoding error.
- // http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
- break;
- }
-
- // The longest possible symbol size is 30 bits. If we're at the last 4 bytes
- // of the input, we need to make sure we pass the correct number of valid bits
- // left, otherwise the trailing 0s in next may form a valid symbol.
- int validBits = Math.Min(30, (8 - lastDecodedBits) + (src.Length - i - 1) * 8);
- int ch = DecodeValue(next, validBits, out int decodedBits);
+ // Load next 8 bits into accumulator.
+ acc <<= 8;
+ acc |= src[i++];
+ bitsInAcc += 8;
- if (ch == -1)
- {
- // No valid symbol could be decoded with the bits in next
- throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
- }
- else if (ch == 256)
+ // Decode bits in accumulator.
+ do
{
- // A Huffman-encoded string literal containing the EOS symbol MUST be treated as a decoding error.
- // http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
- throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
- }
+ lookupIndex = (byte)(acc >> (bitsInAcc - 8));
- if (j == dst.Length)
- {
- Array.Resize(ref dstArray, dst.Length * 2);
- dst = dstArray;
- }
-
- dst[j++] = (byte)ch;
+ int lookupValue = decodingTree[(lookupTableIndex << 8) + lookupIndex];
- // If we crossed a byte boundary, advance i so we start at the next byte that's not fully decoded.
- lastDecodedBits += decodedBits;
- i += lastDecodedBits / 8;
+ if (lookupValue < 0x80_00)
+ {
+ // Octet found.
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 0 | number_of_used_bits | octet |
+ // +---+---------------------------+-------------------------------+
+ if (j == dst.Length)
+ {
+ Array.Resize(ref dstArray, dst.Length * 2);
+ dst = dstArray;
+ }
+ dst[j++] = (byte)lookupValue;
- // Modulo 8 since we only care about how many bits were decoded in the last byte that we processed.
- lastDecodedBits %= 8;
+ // Start lookup of next symbol
+ lookupTableIndex = 0;
+ bitsInAcc -= lookupValue >> 8;
+ }
+ else
+ {
+ // Traverse to next lookup table.
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 1 | next_lookup_table_index | not_used |
+ // +---+---------------------------+-------------------------------+
+ lookupTableIndex = (lookupValue & 0x7f00) >> 8;
+ if (lookupTableIndex == 0)
+ {
+ // No valid symbol could be decoded or EOS was decoded
+ throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
+ }
+ bitsInAcc -= 8;
+ }
+ } while (bitsInAcc >= 8);
}
- return j;
- }
+ // Finish decoding last < 8 bits of src.
+ // Processing of the last byte has to handle several corner cases
+ // so it's extracted outside of the main loop for performance reasons.
+ while (bitsInAcc > 0)
+ {
+ Debug.Assert(bitsInAcc < 8);
- /// <summary>
- /// Decodes a single symbol from a 32-bit word.
- /// </summary>
- /// <param name="data">A 32-bit word containing a Huffman encoded symbol.</param>
- /// <param name="validBits">
- /// The number of bits in <paramref name="data"/> that may contain an encoded symbol.
- /// This is not the exact number of bits that encode the symbol. Instead, it prevents
- /// decoding the lower bits of <paramref name="data"/> if they don't contain any
- /// encoded data.
- /// </param>
- /// <param name="decodedBits">The number of bits decoded from <paramref name="data"/>.</param>
- /// <returns>The decoded symbol.</returns>
- internal static int DecodeValue(uint data, int validBits, out int decodedBits)
- {
- // The code below implements the decoding logic for a canonical Huffman code.
- //
- // To decode a symbol, we scan the decoding table, which is sorted by ascending symbol bit length.
- // For each bit length b, we determine the maximum b-bit encoded value, plus one (that is codeMax).
- // This is done with the following logic:
- //
- // if we're at the first entry in the table,
- // codeMax = the # of symbols encoded in b bits
- // else,
- // left-shift codeMax by the difference between b and the previous entry's bit length,
- // then increment codeMax by the # of symbols encoded in b bits
- //
- // Next, we look at the value v encoded in the highest b bits of data. If v is less than codeMax,
- // those bits correspond to a Huffman encoded symbol. We find the corresponding decoded
- // symbol in the list of values associated with bit length b in the decoding table by indexing it
- // with codeMax - v.
+ // Check for correct EOS, which is padding with ones till end of byte
+ // when we STARTED new huffman code in last 8 bits (lookupTableIndex was reset to 0 -> root lookup table).
+ if (lookupTableIndex == 0)
+ {
+ // Check if all remaining bits are ones.
+ uint ones = uint.MaxValue >> (32 - bitsInAcc);
+ if ((acc & ones) == ones)
+ {
+ // Is it a EOS. See: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
+ break;
+ }
+ }
- int codeMax = 0;
+ // Lookup index has to be 8 bits aligned to MSB
+ lookupIndex = (byte)(acc << (8 - bitsInAcc));
- for (int i = 0; i < _decodingTable.Length && _decodingTable[i].codeLength <= validBits; i++)
- {
- (int codeLength, int[] codes) = _decodingTable[i];
+ int lookupValue = decodingTree[(lookupTableIndex << 8) + lookupIndex];
- if (i > 0)
+ if (lookupValue < 0x80_00)
{
- codeMax <<= codeLength - _decodingTable[i - 1].codeLength;
- }
+ // Octet found.
+ // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+ // | 0 | number_of_used_bits | octet |
+ // +---+---------------------------+-------------------------------+
+ bitsInAcc -= lookupValue >> 8;
- codeMax += codes.Length;
+ if (bitsInAcc < 0)
+ {
+ // Last looked up code had more bits than was left in accumulator which indicated invalid or incomplete source
+ throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
+ }
- int mask = int.MinValue >> (codeLength - 1);
- long masked = (data & mask) >> (32 - codeLength);
+ if (j == dst.Length)
+ {
+ Array.Resize(ref dstArray, dst.Length * 2);
+ dst = dstArray;
+ }
+ dst[j++] = (byte)lookupValue;
- if (masked < codeMax)
+ // Set table index to root - start of new huffman code.
+ lookupTableIndex = 0;
+ }
+ else
{
- decodedBits = codeLength;
- return codes[codes.Length - (codeMax - masked)];
+ // Src was depleted in middle of lookup tree or EOS was decoded.
+ throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
}
}
- decodedBits = 0;
- return -1;
+ if (lookupTableIndex != 0)
+ {
+ // Finished in middle of traversing - no valid symbol could be decoded
+ // or too long EOS padding (7 bits plus). See: http://httpwg.org/specs/rfc7541.html#rfc.section.5.2
+ throw new HuffmanDecodingException(SR.net_http_hpack_huffman_decode_failed);
+ }
+
+ return j;
}
}
}