package brotli /* Copyright 2013 Google Inc. All Rights Reserved. Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Functions to estimate the bit cost of Huffman trees. */ func shannonEntropy(population []uint32, size uint, total *uint) float64 { var sum uint = 0 var retval float64 = 0 var population_end []uint32 = population[size:] var p uint for -cap(population) < -cap(population_end) { p = uint(population[0]) population = population[1:] sum += p retval -= float64(p) * fastLog2(p) } if sum != 0 { retval += float64(sum) * fastLog2(sum) } *total = sum return retval } func bitsEntropy(population []uint32, size uint) float64 { var sum uint var retval float64 = shannonEntropy(population, size, &sum) if retval < float64(sum) { /* At least one bit per literal is needed. */ retval = float64(sum) } return retval } const kOneSymbolHistogramCost float64 = 12 const kTwoSymbolHistogramCost float64 = 20 const kThreeSymbolHistogramCost float64 = 28 const kFourSymbolHistogramCost float64 = 37 func populationCostLiteral(histogram *histogramLiteral) float64 { var data_size uint = histogramDataSizeLiteral() var count int = 0 var s [5]uint var bits float64 = 0.0 var i uint if histogram.total_count_ == 0 { return kOneSymbolHistogramCost } for i = 0; i < data_size; i++ { if histogram.data_[i] > 0 { s[count] = i count++ if count > 4 { break } } } if count == 1 { return kOneSymbolHistogramCost } if count == 2 { return kTwoSymbolHistogramCost + float64(histogram.total_count_) } if count == 3 { var histo0 uint32 = histogram.data_[s[0]] var histo1 uint32 = histogram.data_[s[1]] var histo2 uint32 = histogram.data_[s[2]] var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) } if count == 4 { var histo [4]uint32 var h23 uint32 var histomax uint32 for i = 0; i < 4; i++ { histo[i] = histogram.data_[s[i]] } /* Sort */ for i = 0; i < 4; i++ { var j uint for j = i + 1; j < 4; j++ { if histo[j] > histo[i] { var tmp uint32 = histo[j] histo[j] = histo[i] histo[i] = tmp } } } h23 = histo[2] + histo[3] histomax = brotli_max_uint32_t(h23, histo[0]) return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) } { var max_depth uint = 1 var depth_histo = [codeLengthCodes]uint32{0} /* In this loop we compute the entropy of the histogram and simultaneously build a simplified histogram of the code length codes where we use the zero repeat code 17, but we don't use the non-zero repeat code 16. */ var log2total float64 = fastLog2(histogram.total_count_) for i = 0; i < data_size; { if histogram.data_[i] > 0 { var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = = log2(total_count) - log2(count(symbol)) */ var depth uint = uint(log2p + 0.5) /* Approximate the bit depth by round(-log2(P(symbol))) */ bits += float64(histogram.data_[i]) * log2p if depth > 15 { depth = 15 } if depth > max_depth { max_depth = depth } depth_histo[depth]++ i++ } else { var reps uint32 = 1 /* Compute the run length of zeros and add the appropriate number of 0 and 17 code length codes to the code length code histogram. */ var k uint for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { reps++ } i += uint(reps) if i == data_size { /* Don't add any cost for the last zero run, since these are encoded only implicitly. */ break } if reps < 3 { depth_histo[0] += reps } else { reps -= 2 for reps > 0 { depth_histo[repeatZeroCodeLength]++ /* Add the 3 extra bits for the 17 code length code. */ bits += 3 reps >>= 3 } } } } /* Add the estimated encoding cost of the code length code histogram. */ bits += float64(18 + 2*max_depth) /* Add the entropy of the code length code histogram. */ bits += bitsEntropy(depth_histo[:], codeLengthCodes) } return bits } func populationCostCommand(histogram *histogramCommand) float64 { var data_size uint = histogramDataSizeCommand() var count int = 0 var s [5]uint var bits float64 = 0.0 var i uint if histogram.total_count_ == 0 { return kOneSymbolHistogramCost } for i = 0; i < data_size; i++ { if histogram.data_[i] > 0 { s[count] = i count++ if count > 4 { break } } } if count == 1 { return kOneSymbolHistogramCost } if count == 2 { return kTwoSymbolHistogramCost + float64(histogram.total_count_) } if count == 3 { var histo0 uint32 = histogram.data_[s[0]] var histo1 uint32 = histogram.data_[s[1]] var histo2 uint32 = histogram.data_[s[2]] var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) } if count == 4 { var histo [4]uint32 var h23 uint32 var histomax uint32 for i = 0; i < 4; i++ { histo[i] = histogram.data_[s[i]] } /* Sort */ for i = 0; i < 4; i++ { var j uint for j = i + 1; j < 4; j++ { if histo[j] > histo[i] { var tmp uint32 = histo[j] histo[j] = histo[i] histo[i] = tmp } } } h23 = histo[2] + histo[3] histomax = brotli_max_uint32_t(h23, histo[0]) return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) } { var max_depth uint = 1 var depth_histo = [codeLengthCodes]uint32{0} /* In this loop we compute the entropy of the histogram and simultaneously build a simplified histogram of the code length codes where we use the zero repeat code 17, but we don't use the non-zero repeat code 16. */ var log2total float64 = fastLog2(histogram.total_count_) for i = 0; i < data_size; { if histogram.data_[i] > 0 { var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = = log2(total_count) - log2(count(symbol)) */ var depth uint = uint(log2p + 0.5) /* Approximate the bit depth by round(-log2(P(symbol))) */ bits += float64(histogram.data_[i]) * log2p if depth > 15 { depth = 15 } if depth > max_depth { max_depth = depth } depth_histo[depth]++ i++ } else { var reps uint32 = 1 /* Compute the run length of zeros and add the appropriate number of 0 and 17 code length codes to the code length code histogram. */ var k uint for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { reps++ } i += uint(reps) if i == data_size { /* Don't add any cost for the last zero run, since these are encoded only implicitly. */ break } if reps < 3 { depth_histo[0] += reps } else { reps -= 2 for reps > 0 { depth_histo[repeatZeroCodeLength]++ /* Add the 3 extra bits for the 17 code length code. */ bits += 3 reps >>= 3 } } } } /* Add the estimated encoding cost of the code length code histogram. */ bits += float64(18 + 2*max_depth) /* Add the entropy of the code length code histogram. */ bits += bitsEntropy(depth_histo[:], codeLengthCodes) } return bits } func populationCostDistance(histogram *histogramDistance) float64 { var data_size uint = histogramDataSizeDistance() var count int = 0 var s [5]uint var bits float64 = 0.0 var i uint if histogram.total_count_ == 0 { return kOneSymbolHistogramCost } for i = 0; i < data_size; i++ { if histogram.data_[i] > 0 { s[count] = i count++ if count > 4 { break } } } if count == 1 { return kOneSymbolHistogramCost } if count == 2 { return kTwoSymbolHistogramCost + float64(histogram.total_count_) } if count == 3 { var histo0 uint32 = histogram.data_[s[0]] var histo1 uint32 = histogram.data_[s[1]] var histo2 uint32 = histogram.data_[s[2]] var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) } if count == 4 { var histo [4]uint32 var h23 uint32 var histomax uint32 for i = 0; i < 4; i++ { histo[i] = histogram.data_[s[i]] } /* Sort */ for i = 0; i < 4; i++ { var j uint for j = i + 1; j < 4; j++ { if histo[j] > histo[i] { var tmp uint32 = histo[j] histo[j] = histo[i] histo[i] = tmp } } } h23 = histo[2] + histo[3] histomax = brotli_max_uint32_t(h23, histo[0]) return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) } { var max_depth uint = 1 var depth_histo = [codeLengthCodes]uint32{0} /* In this loop we compute the entropy of the histogram and simultaneously build a simplified histogram of the code length codes where we use the zero repeat code 17, but we don't use the non-zero repeat code 16. */ var log2total float64 = fastLog2(histogram.total_count_) for i = 0; i < data_size; { if histogram.data_[i] > 0 { var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = = log2(total_count) - log2(count(symbol)) */ var depth uint = uint(log2p + 0.5) /* Approximate the bit depth by round(-log2(P(symbol))) */ bits += float64(histogram.data_[i]) * log2p if depth > 15 { depth = 15 } if depth > max_depth { max_depth = depth } depth_histo[depth]++ i++ } else { var reps uint32 = 1 /* Compute the run length of zeros and add the appropriate number of 0 and 17 code length codes to the code length code histogram. */ var k uint for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { reps++ } i += uint(reps) if i == data_size { /* Don't add any cost for the last zero run, since these are encoded only implicitly. */ break } if reps < 3 { depth_histo[0] += reps } else { reps -= 2 for reps > 0 { depth_histo[repeatZeroCodeLength]++ /* Add the 3 extra bits for the 17 code length code. */ bits += 3 reps >>= 3 } } } } /* Add the estimated encoding cost of the code length code histogram. */ bits += float64(18 + 2*max_depth) /* Add the entropy of the code length code histogram. */ bits += bitsEntropy(depth_histo[:], codeLengthCodes) } return bits }