Line data Source code
1 :
2 : /*-------------------------------------------------------------*/
3 : /*--- Huffman coding low-level stuff ---*/
4 : /*--- huffman.c ---*/
5 : /*-------------------------------------------------------------*/
6 :
7 : /* ------------------------------------------------------------------
8 : This file is part of bzip2/libbzip2, a program and library for
9 : lossless, block-sorting data compression.
10 :
11 : bzip2/libbzip2 version 1.0.8 of 13 July 2019
12 : Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13 :
14 : Please read the WARNING, DISCLAIMER and PATENTS sections in the
15 : README file.
16 :
17 : This program is released under the terms of the license contained
18 : in the file LICENSE.
19 : ------------------------------------------------------------------ */
20 :
21 :
22 : #include "bzlib_private.h"
23 :
24 : /*---------------------------------------------------*/
25 0 : #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
26 : #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
27 0 : #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28 :
29 : #define ADDWEIGHTS(zw1,zw2) \
30 0 : (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
31 0 : (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32 :
33 0 : #define UPHEAP(z) \
34 0 : { \
35 0 : Int32 zz, tmp; \
36 0 : zz = z; tmp = heap[zz]; \
37 0 : while (weight[tmp] < weight[heap[zz >> 1]]) { \
38 0 : heap[zz] = heap[zz >> 1]; \
39 0 : zz >>= 1; \
40 0 : } \
41 0 : heap[zz] = tmp; \
42 0 : }
43 :
44 0 : #define DOWNHEAP(z) \
45 0 : { \
46 0 : Int32 zz, yy, tmp; \
47 0 : zz = z; tmp = heap[zz]; \
48 0 : while (True) { \
49 0 : yy = zz << 1; \
50 0 : if (yy > nHeap) break; \
51 0 : if (yy < nHeap && \
52 0 : weight[heap[yy+1]] < weight[heap[yy]]) \
53 0 : yy++; \
54 0 : if (weight[tmp] < weight[heap[yy]]) break; \
55 0 : heap[zz] = heap[yy]; \
56 0 : zz = yy; \
57 0 : } \
58 0 : heap[zz] = tmp; \
59 0 : }
60 :
61 :
62 : /*---------------------------------------------------*/
63 : void BZ2_hbMakeCodeLengths ( UChar *len,
64 : Int32 *freq,
65 : Int32 alphaSize,
66 : Int32 maxLen )
67 0 : {
68 : /*--
69 : Nodes and heap entries run from 1. Entry 0
70 : for both the heap and nodes is a sentinel.
71 : --*/
72 0 : Int32 nNodes, nHeap, n1, n2, i, j, k;
73 0 : Bool tooLong;
74 :
75 0 : Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
76 0 : Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77 0 : Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78 :
79 0 : for (i = 0; i < alphaSize; i++)
80 0 : weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81 :
82 0 : while (True) {
83 :
84 0 : nNodes = alphaSize;
85 0 : nHeap = 0;
86 :
87 0 : heap[0] = 0;
88 0 : weight[0] = 0;
89 0 : parent[0] = -2;
90 :
91 0 : for (i = 1; i <= alphaSize; i++) {
92 0 : parent[i] = -1;
93 0 : nHeap++;
94 0 : heap[nHeap] = i;
95 0 : UPHEAP(nHeap);
96 0 : }
97 :
98 0 : AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99 :
100 0 : while (nHeap > 1) {
101 0 : n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102 0 : n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103 0 : nNodes++;
104 0 : parent[n1] = parent[n2] = nNodes;
105 0 : weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106 0 : parent[nNodes] = -1;
107 0 : nHeap++;
108 0 : heap[nHeap] = nNodes;
109 0 : UPHEAP(nHeap);
110 0 : }
111 :
112 0 : AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113 :
114 0 : tooLong = False;
115 0 : for (i = 1; i <= alphaSize; i++) {
116 0 : j = 0;
117 0 : k = i;
118 0 : while (parent[k] >= 0) { k = parent[k]; j++; }
119 0 : len[i-1] = j;
120 0 : if (j > maxLen) tooLong = True;
121 0 : }
122 :
123 0 : if (! tooLong) break;
124 :
125 : /* 17 Oct 04: keep-going condition for the following loop used
126 : to be 'i < alphaSize', which missed the last element,
127 : theoretically leading to the possibility of the compressor
128 : looping. However, this count-scaling step is only needed if
129 : one of the generated Huffman code words is longer than
130 : maxLen, which up to and including version 1.0.2 was 20 bits,
131 : which is extremely unlikely. In version 1.0.3 maxLen was
132 : changed to 17 bits, which has minimal effect on compression
133 : ratio, but does mean this scaling step is used from time to
134 : time, enough to verify that it works.
135 :
136 : This means that bzip2-1.0.3 and later will only produce
137 : Huffman codes with a maximum length of 17 bits. However, in
138 : order to preserve backwards compatibility with bitstreams
139 : produced by versions pre-1.0.3, the decompressor must still
140 : handle lengths of up to 20. */
141 :
142 0 : for (i = 1; i <= alphaSize; i++) {
143 0 : j = weight[i] >> 8;
144 0 : j = 1 + (j / 2);
145 0 : weight[i] = j << 8;
146 0 : }
147 0 : }
148 0 : }
149 :
150 :
151 : /*---------------------------------------------------*/
152 : void BZ2_hbAssignCodes ( Int32 *code,
153 : UChar *length,
154 : Int32 minLen,
155 : Int32 maxLen,
156 : Int32 alphaSize )
157 0 : {
158 0 : Int32 n, vec, i;
159 :
160 0 : vec = 0;
161 0 : for (n = minLen; n <= maxLen; n++) {
162 0 : for (i = 0; i < alphaSize; i++)
163 0 : if (length[i] == n) { code[i] = vec; vec++; };
164 0 : vec <<= 1;
165 0 : }
166 0 : }
167 :
168 :
169 : /*---------------------------------------------------*/
170 : void BZ2_hbCreateDecodeTables ( Int32 *limit,
171 : Int32 *base,
172 : Int32 *perm,
173 : UChar *length,
174 : Int32 minLen,
175 : Int32 maxLen,
176 : Int32 alphaSize )
177 0 : {
178 0 : Int32 pp, i, j, vec;
179 :
180 0 : pp = 0;
181 0 : for (i = minLen; i <= maxLen; i++)
182 0 : for (j = 0; j < alphaSize; j++)
183 0 : if (length[j] == i) { perm[pp] = j; pp++; };
184 :
185 0 : for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186 0 : for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187 :
188 0 : for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189 :
190 0 : for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191 0 : vec = 0;
192 :
193 0 : for (i = minLen; i <= maxLen; i++) {
194 0 : vec += (base[i+1] - base[i]);
195 0 : limit[i] = vec-1;
196 0 : vec <<= 1;
197 0 : }
198 0 : for (i = minLen + 1; i <= maxLen; i++)
199 0 : base[i] = ((limit[i-1] + 1) << 1) - base[i];
200 0 : }
201 :
202 :
203 : /*-------------------------------------------------------------*/
204 : /*--- end huffman.c ---*/
205 : /*-------------------------------------------------------------*/
|