1
2
3
4
5 package flate
6
7 import (
8 "io"
9 "math"
10 "os"
11 "strconv"
12 )
13
14 const (
15
16 offsetCodeCount = 30
17
18
19 endBlockMarker = 256
20
21
22 lengthCodesStart = 257
23
24
25 codegenCodeCount = 19
26 badCode = 255
27 )
28
29
30 var lengthExtraBits = []int8{
31 0, 0, 0,
32 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
33 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
34 4, 5, 5, 5, 5, 0,
35 }
36
37
38 var lengthBase = []uint32{
39 0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
40 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
41 64, 80, 96, 112, 128, 160, 192, 224, 255,
42 }
43
44
45 var offsetExtraBits = []int8{
46 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
47 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
48 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
49
50 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
51 }
52
53 var offsetBase = []uint32{
54
55 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
56 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
57 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
58 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
59 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
60 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
61
62
63 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
64 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
65 0x100000, 0x180000, 0x200000, 0x300000,
66 }
67
68
69 var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
70
71 type huffmanBitWriter struct {
72 w io.Writer
73
74
75 bits uint32
76 nbits uint32
77 bytes [64]byte
78 nbytes int
79 literalFreq []int32
80 offsetFreq []int32
81 codegen []uint8
82 codegenFreq []int32
83 literalEncoding *huffmanEncoder
84 offsetEncoding *huffmanEncoder
85 codegenEncoding *huffmanEncoder
86 err os.Error
87 }
88
89 type WrongValueError struct {
90 name string
91 from int32
92 to int32
93 value int32
94 }
95
96 func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
97 return &huffmanBitWriter{
98 w: w,
99 literalFreq: make([]int32, maxLit),
100 offsetFreq: make([]int32, offsetCodeCount),
101 codegen: make([]uint8, maxLit+offsetCodeCount+1),
102 codegenFreq: make([]int32, codegenCodeCount),
103 literalEncoding: newHuffmanEncoder(maxLit),
104 offsetEncoding: newHuffmanEncoder(offsetCodeCount),
105 codegenEncoding: newHuffmanEncoder(codegenCodeCount),
106 }
107 }
108
109 func (err WrongValueError) String() string {
110 return "huffmanBitWriter: " + err.name + " should belong to [" + strconv.Itoa64(int64(err.from)) + ";" +
111 strconv.Itoa64(int64(err.to)) + "] but actual value is " + strconv.Itoa64(int64(err.value))
112 }
113
114 func (w *huffmanBitWriter) flushBits() {
115 if w.err != nil {
116 w.nbits = 0
117 return
118 }
119 bits := w.bits
120 w.bits >>= 16
121 w.nbits -= 16
122 n := w.nbytes
123 w.bytes[n] = byte(bits)
124 w.bytes[n+1] = byte(bits >> 8)
125 if n += 2; n >= len(w.bytes) {
126 _, w.err = w.w.Write(w.bytes[0:])
127 n = 0
128 }
129 w.nbytes = n
130 }
131
132 func (w *huffmanBitWriter) flush() {
133 if w.err != nil {
134 w.nbits = 0
135 return
136 }
137 n := w.nbytes
138 if w.nbits > 8 {
139 w.bytes[n] = byte(w.bits)
140 w.bits >>= 8
141 w.nbits -= 8
142 n++
143 }
144 if w.nbits > 0 {
145 w.bytes[n] = byte(w.bits)
146 w.nbits = 0
147 n++
148 }
149 w.bits = 0
150 _, w.err = w.w.Write(w.bytes[0:n])
151 w.nbytes = 0
152 }
153
154 func (w *huffmanBitWriter) writeBits(b, nb int32) {
155 w.bits |= uint32(b) << w.nbits
156 if w.nbits += uint32(nb); w.nbits >= 16 {
157 w.flushBits()
158 }
159 }
160
161 func (w *huffmanBitWriter) writeBytes(bytes []byte) {
162 if w.err != nil {
163 return
164 }
165 n := w.nbytes
166 if w.nbits == 8 {
167 w.bytes[n] = byte(w.bits)
168 w.nbits = 0
169 n++
170 }
171 if w.nbits != 0 {
172 w.err = InternalError("writeBytes with unfinished bits")
173 return
174 }
175 if n != 0 {
176 _, w.err = w.w.Write(w.bytes[0:n])
177 if w.err != nil {
178 return
179 }
180 }
181 w.nbytes = 0
182 _, w.err = w.w.Write(bytes)
183 }
184
185
186
187
188
189
190
191
192
193
194
195
196 func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int) {
197 fillInt32s(w.codegenFreq, 0)
198
199
200
201
202 codegen := w.codegen
203
204 copyUint8s(codegen[0:numLiterals], w.literalEncoding.codeBits)
205 copyUint8s(codegen[numLiterals:numLiterals+numOffsets], w.offsetEncoding.codeBits)
206 codegen[numLiterals+numOffsets] = badCode
207
208 size := codegen[0]
209 count := 1
210 outIndex := 0
211 for inIndex := 1; size != badCode; inIndex++ {
212
213
214 nextSize := codegen[inIndex]
215 if nextSize == size {
216 count++
217 continue
218 }
219
220 if size != 0 {
221 codegen[outIndex] = size
222 outIndex++
223 w.codegenFreq[size]++
224 count--
225 for count >= 3 {
226 n := min(count, 6)
227 codegen[outIndex] = 16
228 outIndex++
229 codegen[outIndex] = uint8(n - 3)
230 outIndex++
231 w.codegenFreq[16]++
232 count -= n
233 }
234 } else {
235 for count >= 11 {
236 n := min(count, 138)
237 codegen[outIndex] = 18
238 outIndex++
239 codegen[outIndex] = uint8(n - 11)
240 outIndex++
241 w.codegenFreq[18]++
242 count -= n
243 }
244 if count >= 3 {
245
246 codegen[outIndex] = 17
247 outIndex++
248 codegen[outIndex] = uint8(count - 3)
249 outIndex++
250 w.codegenFreq[17]++
251 count = 0
252 }
253 }
254 count--
255 for ; count >= 0; count-- {
256 codegen[outIndex] = size
257 outIndex++
258 w.codegenFreq[size]++
259 }
260
261 size = nextSize
262 count = 1
263 }
264
265 codegen[outIndex] = badCode
266 }
267
268 func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) {
269 if w.err != nil {
270 return
271 }
272 w.writeBits(int32(code.code[literal]), int32(code.codeBits[literal]))
273 }
274
275
276
277
278
279
280 func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
281 if w.err != nil {
282 return
283 }
284 var firstBits int32 = 4
285 if isEof {
286 firstBits = 5
287 }
288 w.writeBits(firstBits, 3)
289 w.writeBits(int32(numLiterals-257), 5)
290 w.writeBits(int32(numOffsets-1), 5)
291 w.writeBits(int32(numCodegens-4), 4)
292
293 for i := 0; i < numCodegens; i++ {
294 value := w.codegenEncoding.codeBits[codegenOrder[i]]
295 w.writeBits(int32(value), 3)
296 }
297
298 i := 0
299 for {
300 var codeWord int = int(w.codegen[i])
301 i++
302 if codeWord == badCode {
303 break
304 }
305
306 w.writeCode(w.codegenEncoding, uint32(codeWord))
307
308 switch codeWord {
309 case 16:
310 w.writeBits(int32(w.codegen[i]), 2)
311 i++
312 break
313 case 17:
314 w.writeBits(int32(w.codegen[i]), 3)
315 i++
316 break
317 case 18:
318 w.writeBits(int32(w.codegen[i]), 7)
319 i++
320 break
321 }
322 }
323 }
324
325 func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
326 if w.err != nil {
327 return
328 }
329 var flag int32
330 if isEof {
331 flag = 1
332 }
333 w.writeBits(flag, 3)
334 w.flush()
335 w.writeBits(int32(length), 16)
336 w.writeBits(int32(^uint16(length)), 16)
337 }
338
339 func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
340 if w.err != nil {
341 return
342 }
343
344 var value int32 = 2
345 if isEof {
346 value = 3
347 }
348 w.writeBits(value, 3)
349 }
350
351 func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
352 if w.err != nil {
353 return
354 }
355 fillInt32s(w.literalFreq, 0)
356 fillInt32s(w.offsetFreq, 0)
357
358 n := len(tokens)
359 tokens = tokens[0 : n+1]
360 tokens[n] = endBlockMarker
361
362 for _, t := range tokens {
363 switch t.typ() {
364 case literalType:
365 w.literalFreq[t.literal()]++
366 case matchType:
367 length := t.length()
368 offset := t.offset()
369 w.literalFreq[lengthCodesStart+lengthCode(length)]++
370 w.offsetFreq[offsetCode(offset)]++
371 }
372 }
373
374
375 numLiterals := len(w.literalFreq)
376 for w.literalFreq[numLiterals-1] == 0 {
377 numLiterals--
378 }
379
380 numOffsets := len(w.offsetFreq)
381 for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
382 numOffsets--
383 }
384 if numOffsets == 0 {
385
386
387 w.offsetFreq[0] = 1
388 numOffsets = 1
389 }
390
391 w.literalEncoding.generate(w.literalFreq, 15)
392 w.offsetEncoding.generate(w.offsetFreq, 15)
393
394 storedBytes := 0
395 if input != nil {
396 storedBytes = len(input)
397 }
398 var extraBits int64
399 var storedSize int64 = math.MaxInt64
400 if storedBytes <= maxStoreBlockSize && input != nil {
401 storedSize = int64((storedBytes + 5) * 8)
402
403
404
405
406 for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
407
408 extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart])
409 }
410 for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
411
412 extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode])
413 }
414 }
415
416
417
418 var size = int64(3) +
419 fixedLiteralEncoding.bitLength(w.literalFreq) +
420 fixedOffsetEncoding.bitLength(w.offsetFreq) +
421 extraBits
422 var literalEncoding = fixedLiteralEncoding
423 var offsetEncoding = fixedOffsetEncoding
424
425
426 var numCodegens int
427
428
429
430 w.generateCodegen(numLiterals, numOffsets)
431 w.codegenEncoding.generate(w.codegenFreq, 7)
432 numCodegens = len(w.codegenFreq)
433 for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
434 numCodegens--
435 }
436 dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
437 w.codegenEncoding.bitLength(w.codegenFreq) +
438 int64(extraBits) +
439 int64(w.codegenFreq[16]*2) +
440 int64(w.codegenFreq[17]*3) +
441 int64(w.codegenFreq[18]*7)
442 dynamicSize := dynamicHeader +
443 w.literalEncoding.bitLength(w.literalFreq) +
444 w.offsetEncoding.bitLength(w.offsetFreq)
445
446 if dynamicSize < size {
447 size = dynamicSize
448 literalEncoding = w.literalEncoding
449 offsetEncoding = w.offsetEncoding
450 }
451
452
453 if storedSize < size {
454 w.writeStoredHeader(storedBytes, eof)
455 w.writeBytes(input[0:storedBytes])
456 return
457 }
458
459
460 if literalEncoding == fixedLiteralEncoding {
461 w.writeFixedHeader(eof)
462 } else {
463 w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
464 }
465 for _, t := range tokens {
466 switch t.typ() {
467 case literalType:
468 w.writeCode(literalEncoding, t.literal())
469 break
470 case matchType:
471
472 length := t.length()
473 lengthCode := lengthCode(length)
474 w.writeCode(literalEncoding, lengthCode+lengthCodesStart)
475 extraLengthBits := int32(lengthExtraBits[lengthCode])
476 if extraLengthBits > 0 {
477 extraLength := int32(length - lengthBase[lengthCode])
478 w.writeBits(extraLength, extraLengthBits)
479 }
480
481 offset := t.offset()
482 offsetCode := offsetCode(offset)
483 w.writeCode(offsetEncoding, offsetCode)
484 extraOffsetBits := int32(offsetExtraBits[offsetCode])
485 if extraOffsetBits > 0 {
486 extraOffset := int32(offset - offsetBase[offsetCode])
487 w.writeBits(extraOffset, extraOffsetBits)
488 }
489 break
490 default:
491 panic("unknown token type: " + string(t))
492 }
493 }
494 }