Line data Source code
1 :
2 : /*-------------------------------------------------------------*/
3 : /*--- Block sorting machinery ---*/
4 : /*--- blocksort.c ---*/
5 : /*-------------------------------------------------------------*/
6 :
7 : /* ------------------------------------------------------------------
8 : This file is part of bzip2/libbzip2, a program and library for
9 : lossless, block-sorting data compression.
10 :
11 : bzip2/libbzip2 version 1.0.8 of 13 July 2019
12 : Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13 :
14 : Please read the WARNING, DISCLAIMER and PATENTS sections in the
15 : README file.
16 :
17 : This program is released under the terms of the license contained
18 : in the file LICENSE.
19 : ------------------------------------------------------------------ */
20 :
21 :
22 : #include "bzlib_private.h"
23 :
24 : /*---------------------------------------------*/
25 : /*--- Fallback O(N log(N)^2) sorting ---*/
26 : /*--- algorithm, for repetitive blocks ---*/
27 : /*---------------------------------------------*/
28 :
29 : /*---------------------------------------------*/
30 : static
31 : __inline__
32 : void fallbackSimpleSort ( UInt32* fmap,
33 : UInt32* eclass,
34 : Int32 lo,
35 : Int32 hi )
36 0 : {
37 0 : Int32 i, j, tmp;
38 0 : UInt32 ec_tmp;
39 :
40 0 : if (lo == hi) return;
41 :
42 0 : if (hi - lo > 3) {
43 0 : for ( i = hi-4; i >= lo; i-- ) {
44 0 : tmp = fmap[i];
45 0 : ec_tmp = eclass[tmp];
46 0 : for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47 0 : fmap[j-4] = fmap[j];
48 0 : fmap[j-4] = tmp;
49 0 : }
50 0 : }
51 :
52 0 : for ( i = hi-1; i >= lo; i-- ) {
53 0 : tmp = fmap[i];
54 0 : ec_tmp = eclass[tmp];
55 0 : for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56 0 : fmap[j-1] = fmap[j];
57 0 : fmap[j-1] = tmp;
58 0 : }
59 0 : }
60 :
61 :
62 : /*---------------------------------------------*/
63 : #define fswap(zz1, zz2) \
64 0 : { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65 :
66 0 : #define fvswap(zzp1, zzp2, zzn) \
67 0 : { \
68 0 : Int32 yyp1 = (zzp1); \
69 0 : Int32 yyp2 = (zzp2); \
70 0 : Int32 yyn = (zzn); \
71 0 : while (yyn > 0) { \
72 0 : fswap(fmap[yyp1], fmap[yyp2]); \
73 0 : yyp1++; yyp2++; yyn--; \
74 0 : } \
75 0 : }
76 :
77 :
78 0 : #define fmin(a,b) ((a) < (b)) ? (a) : (b)
79 :
80 0 : #define fpush(lz,hz) { stackLo[sp] = lz; \
81 0 : stackHi[sp] = hz; \
82 0 : sp++; }
83 :
84 0 : #define fpop(lz,hz) { sp--; \
85 0 : lz = stackLo[sp]; \
86 0 : hz = stackHi[sp]; }
87 :
88 0 : #define FALLBACK_QSORT_SMALL_THRESH 10
89 : #define FALLBACK_QSORT_STACK_SIZE 100
90 :
91 :
92 : static
93 : void fallbackQSort3 ( UInt32* fmap,
94 : UInt32* eclass,
95 : Int32 loSt,
96 : Int32 hiSt )
97 0 : {
98 0 : Int32 unLo, unHi, ltLo, gtHi, n, m;
99 0 : Int32 sp, lo, hi;
100 0 : UInt32 med, r, r3;
101 0 : Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
102 0 : Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
103 :
104 0 : r = 0;
105 :
106 0 : sp = 0;
107 0 : fpush ( loSt, hiSt );
108 :
109 0 : while (sp > 0) {
110 :
111 0 : AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112 :
113 0 : fpop ( lo, hi );
114 0 : if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115 0 : fallbackSimpleSort ( fmap, eclass, lo, hi );
116 0 : continue;
117 0 : }
118 :
119 : /* Random partitioning. Median of 3 sometimes fails to
120 : avoid bad cases. Median of 9 seems to help but
121 : looks rather expensive. This too seems to work but
122 : is cheaper. Guidance for the magic constants
123 : 7621 and 32768 is taken from Sedgewick's algorithms
124 : book, chapter 35.
125 : */
126 0 : r = ((r * 7621) + 1) % 32768;
127 0 : r3 = r % 3;
128 0 : if (r3 == 0) med = eclass[fmap[lo]]; else
129 0 : if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130 0 : med = eclass[fmap[hi]];
131 :
132 0 : unLo = ltLo = lo;
133 0 : unHi = gtHi = hi;
134 :
135 0 : while (1) {
136 0 : while (1) {
137 0 : if (unLo > unHi) break;
138 0 : n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139 0 : if (n == 0) {
140 0 : fswap(fmap[unLo], fmap[ltLo]);
141 0 : ltLo++; unLo++;
142 0 : continue;
143 0 : };
144 0 : if (n > 0) break;
145 0 : unLo++;
146 0 : }
147 0 : while (1) {
148 0 : if (unLo > unHi) break;
149 0 : n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150 0 : if (n == 0) {
151 0 : fswap(fmap[unHi], fmap[gtHi]);
152 0 : gtHi--; unHi--;
153 0 : continue;
154 0 : };
155 0 : if (n < 0) break;
156 0 : unHi--;
157 0 : }
158 0 : if (unLo > unHi) break;
159 0 : fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160 0 : }
161 :
162 0 : AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163 :
164 0 : if (gtHi < ltLo) continue;
165 :
166 0 : n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167 0 : m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168 :
169 0 : n = lo + unLo - ltLo - 1;
170 0 : m = hi - (gtHi - unHi) + 1;
171 :
172 0 : if (n - lo > hi - m) {
173 0 : fpush ( lo, n );
174 0 : fpush ( m, hi );
175 0 : } else {
176 0 : fpush ( m, hi );
177 0 : fpush ( lo, n );
178 0 : }
179 0 : }
180 0 : }
181 :
182 : #undef fmin
183 : #undef fpush
184 : #undef fpop
185 : #undef fswap
186 : #undef fvswap
187 : #undef FALLBACK_QSORT_SMALL_THRESH
188 : #undef FALLBACK_QSORT_STACK_SIZE
189 :
190 :
191 : /*---------------------------------------------*/
192 : /* Pre:
193 : nblock > 0
194 : eclass exists for [0 .. nblock-1]
195 : ((UChar*)eclass) [0 .. nblock-1] holds block
196 : ptr exists for [0 .. nblock-1]
197 :
198 : Post:
199 : ((UChar*)eclass) [0 .. nblock-1] holds block
200 : All other areas of eclass destroyed
201 : fmap [0 .. nblock-1] holds sorted order
202 : bhtab [ 0 .. 2+(nblock/32) ] destroyed
203 : */
204 :
205 0 : #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
206 0 : #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
207 0 : #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
208 0 : #define WORD_BH(zz) bhtab[(zz) >> 5]
209 0 : #define UNALIGNED_BH(zz) ((zz) & 0x01f)
210 :
211 : static
212 : void fallbackSort ( UInt32* fmap,
213 : UInt32* eclass,
214 : UInt32* bhtab,
215 : Int32 nblock,
216 : Int32 verb )
217 0 : {
218 0 : Int32 ftab[257];
219 0 : Int32 ftabCopy[256];
220 0 : Int32 H, i, j, k, l, r, cc, cc1;
221 0 : Int32 nNotDone;
222 0 : Int32 nBhtab;
223 0 : UChar* eclass8 = (UChar*)eclass;
224 :
225 : /*--
226 : Initial 1-char radix sort to generate
227 : initial fmap and initial BH bits.
228 : --*/
229 0 : if (verb >= 4)
230 0 : VPrintf0 ( " bucket sorting ...\n" );
231 0 : for (i = 0; i < 257; i++) ftab[i] = 0;
232 0 : for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233 0 : for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
234 0 : for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
235 :
236 0 : for (i = 0; i < nblock; i++) {
237 0 : j = eclass8[i];
238 0 : k = ftab[j] - 1;
239 0 : ftab[j] = k;
240 0 : fmap[k] = i;
241 0 : }
242 :
243 0 : nBhtab = 2 + (nblock / 32);
244 0 : for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245 0 : for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246 :
247 : /*--
248 : Inductively refine the buckets. Kind-of an
249 : "exponential radix sort" (!), inspired by the
250 : Manber-Myers suffix array construction algorithm.
251 : --*/
252 :
253 : /*-- set sentinel bits for block-end detection --*/
254 0 : for (i = 0; i < 32; i++) {
255 0 : SET_BH(nblock + 2*i);
256 0 : CLEAR_BH(nblock + 2*i + 1);
257 0 : }
258 :
259 : /*-- the log(N) loop --*/
260 0 : H = 1;
261 0 : while (1) {
262 :
263 0 : if (verb >= 4)
264 0 : VPrintf1 ( " depth %6d has ", H );
265 :
266 0 : j = 0;
267 0 : for (i = 0; i < nblock; i++) {
268 0 : if (ISSET_BH(i)) j = i;
269 0 : k = fmap[i] - H; if (k < 0) k += nblock;
270 0 : eclass[k] = j;
271 0 : }
272 :
273 0 : nNotDone = 0;
274 0 : r = -1;
275 0 : while (1) {
276 :
277 : /*-- find the next non-singleton bucket --*/
278 0 : k = r + 1;
279 0 : while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280 0 : if (ISSET_BH(k)) {
281 0 : while (WORD_BH(k) == 0xffffffff) k += 32;
282 0 : while (ISSET_BH(k)) k++;
283 0 : }
284 0 : l = k - 1;
285 0 : if (l >= nblock) break;
286 0 : while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287 0 : if (!ISSET_BH(k)) {
288 0 : while (WORD_BH(k) == 0x00000000) k += 32;
289 0 : while (!ISSET_BH(k)) k++;
290 0 : }
291 0 : r = k - 1;
292 0 : if (r >= nblock) break;
293 :
294 : /*-- now [l, r] bracket current bucket --*/
295 0 : if (r > l) {
296 0 : nNotDone += (r - l + 1);
297 0 : fallbackQSort3 ( fmap, eclass, l, r );
298 :
299 : /*-- scan bucket and generate header bits-- */
300 0 : cc = -1;
301 0 : for (i = l; i <= r; i++) {
302 0 : cc1 = eclass[fmap[i]];
303 0 : if (cc != cc1) { SET_BH(i); cc = cc1; };
304 0 : }
305 0 : }
306 0 : }
307 :
308 0 : if (verb >= 4)
309 0 : VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310 :
311 0 : H *= 2;
312 0 : if (H > nblock || nNotDone == 0) break;
313 0 : }
314 :
315 : /*--
316 : Reconstruct the original block in
317 : eclass8 [0 .. nblock-1], since the
318 : previous phase destroyed it.
319 : --*/
320 0 : if (verb >= 4)
321 0 : VPrintf0 ( " reconstructing block ...\n" );
322 0 : j = 0;
323 0 : for (i = 0; i < nblock; i++) {
324 0 : while (ftabCopy[j] == 0) j++;
325 0 : ftabCopy[j]--;
326 0 : eclass8[fmap[i]] = (UChar)j;
327 0 : }
328 0 : AssertH ( j < 256, 1005 );
329 0 : }
330 :
331 : #undef SET_BH
332 : #undef CLEAR_BH
333 : #undef ISSET_BH
334 : #undef WORD_BH
335 : #undef UNALIGNED_BH
336 :
337 :
338 : /*---------------------------------------------*/
339 : /*--- The main, O(N^2 log(N)) sorting ---*/
340 : /*--- algorithm. Faster for "normal" ---*/
341 : /*--- non-repetitive blocks. ---*/
342 : /*---------------------------------------------*/
343 :
344 : /*---------------------------------------------*/
345 : static
346 : __inline__
347 : Bool mainGtU ( UInt32 i1,
348 : UInt32 i2,
349 : UChar* block,
350 : UInt16* quadrant,
351 : UInt32 nblock,
352 : Int32* budget )
353 0 : {
354 0 : Int32 k;
355 0 : UChar c1, c2;
356 0 : UInt16 s1, s2;
357 :
358 0 : AssertD ( i1 != i2, "mainGtU" );
359 : /* 1 */
360 0 : c1 = block[i1]; c2 = block[i2];
361 0 : if (c1 != c2) return (c1 > c2);
362 0 : i1++; i2++;
363 : /* 2 */
364 0 : c1 = block[i1]; c2 = block[i2];
365 0 : if (c1 != c2) return (c1 > c2);
366 0 : i1++; i2++;
367 : /* 3 */
368 0 : c1 = block[i1]; c2 = block[i2];
369 0 : if (c1 != c2) return (c1 > c2);
370 0 : i1++; i2++;
371 : /* 4 */
372 0 : c1 = block[i1]; c2 = block[i2];
373 0 : if (c1 != c2) return (c1 > c2);
374 0 : i1++; i2++;
375 : /* 5 */
376 0 : c1 = block[i1]; c2 = block[i2];
377 0 : if (c1 != c2) return (c1 > c2);
378 0 : i1++; i2++;
379 : /* 6 */
380 0 : c1 = block[i1]; c2 = block[i2];
381 0 : if (c1 != c2) return (c1 > c2);
382 0 : i1++; i2++;
383 : /* 7 */
384 0 : c1 = block[i1]; c2 = block[i2];
385 0 : if (c1 != c2) return (c1 > c2);
386 0 : i1++; i2++;
387 : /* 8 */
388 0 : c1 = block[i1]; c2 = block[i2];
389 0 : if (c1 != c2) return (c1 > c2);
390 0 : i1++; i2++;
391 : /* 9 */
392 0 : c1 = block[i1]; c2 = block[i2];
393 0 : if (c1 != c2) return (c1 > c2);
394 0 : i1++; i2++;
395 : /* 10 */
396 0 : c1 = block[i1]; c2 = block[i2];
397 0 : if (c1 != c2) return (c1 > c2);
398 0 : i1++; i2++;
399 : /* 11 */
400 0 : c1 = block[i1]; c2 = block[i2];
401 0 : if (c1 != c2) return (c1 > c2);
402 0 : i1++; i2++;
403 : /* 12 */
404 0 : c1 = block[i1]; c2 = block[i2];
405 0 : if (c1 != c2) return (c1 > c2);
406 0 : i1++; i2++;
407 :
408 0 : k = nblock + 8;
409 :
410 0 : do {
411 : /* 1 */
412 0 : c1 = block[i1]; c2 = block[i2];
413 0 : if (c1 != c2) return (c1 > c2);
414 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
415 0 : if (s1 != s2) return (s1 > s2);
416 0 : i1++; i2++;
417 : /* 2 */
418 0 : c1 = block[i1]; c2 = block[i2];
419 0 : if (c1 != c2) return (c1 > c2);
420 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
421 0 : if (s1 != s2) return (s1 > s2);
422 0 : i1++; i2++;
423 : /* 3 */
424 0 : c1 = block[i1]; c2 = block[i2];
425 0 : if (c1 != c2) return (c1 > c2);
426 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
427 0 : if (s1 != s2) return (s1 > s2);
428 0 : i1++; i2++;
429 : /* 4 */
430 0 : c1 = block[i1]; c2 = block[i2];
431 0 : if (c1 != c2) return (c1 > c2);
432 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
433 0 : if (s1 != s2) return (s1 > s2);
434 0 : i1++; i2++;
435 : /* 5 */
436 0 : c1 = block[i1]; c2 = block[i2];
437 0 : if (c1 != c2) return (c1 > c2);
438 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
439 0 : if (s1 != s2) return (s1 > s2);
440 0 : i1++; i2++;
441 : /* 6 */
442 0 : c1 = block[i1]; c2 = block[i2];
443 0 : if (c1 != c2) return (c1 > c2);
444 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
445 0 : if (s1 != s2) return (s1 > s2);
446 0 : i1++; i2++;
447 : /* 7 */
448 0 : c1 = block[i1]; c2 = block[i2];
449 0 : if (c1 != c2) return (c1 > c2);
450 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
451 0 : if (s1 != s2) return (s1 > s2);
452 0 : i1++; i2++;
453 : /* 8 */
454 0 : c1 = block[i1]; c2 = block[i2];
455 0 : if (c1 != c2) return (c1 > c2);
456 0 : s1 = quadrant[i1]; s2 = quadrant[i2];
457 0 : if (s1 != s2) return (s1 > s2);
458 0 : i1++; i2++;
459 :
460 0 : if (i1 >= nblock) i1 -= nblock;
461 0 : if (i2 >= nblock) i2 -= nblock;
462 :
463 0 : k -= 8;
464 0 : (*budget)--;
465 0 : }
466 0 : while (k >= 0);
467 :
468 0 : return False;
469 0 : }
470 :
471 :
472 : /*---------------------------------------------*/
473 : /*--
474 : Knuth's increments seem to work better
475 : than Incerpi-Sedgewick here. Possibly
476 : because the number of elems to sort is
477 : usually small, typically <= 20.
478 : --*/
479 : static
480 : Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481 : 9841, 29524, 88573, 265720,
482 : 797161, 2391484 };
483 :
484 : static
485 : void mainSimpleSort ( UInt32* ptr,
486 : UChar* block,
487 : UInt16* quadrant,
488 : Int32 nblock,
489 : Int32 lo,
490 : Int32 hi,
491 : Int32 d,
492 : Int32* budget )
493 0 : {
494 0 : Int32 i, j, h, bigN, hp;
495 0 : UInt32 v;
496 :
497 0 : bigN = hi - lo + 1;
498 0 : if (bigN < 2) return;
499 :
500 0 : hp = 0;
501 0 : while (incs[hp] < bigN) hp++;
502 0 : hp--;
503 :
504 0 : for (; hp >= 0; hp--) {
505 0 : h = incs[hp];
506 :
507 0 : i = lo + h;
508 0 : while (True) {
509 :
510 : /*-- copy 1 --*/
511 0 : if (i > hi) break;
512 0 : v = ptr[i];
513 0 : j = i;
514 0 : while ( mainGtU (
515 0 : ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516 0 : ) ) {
517 0 : ptr[j] = ptr[j-h];
518 0 : j = j - h;
519 0 : if (j <= (lo + h - 1)) break;
520 0 : }
521 0 : ptr[j] = v;
522 0 : i++;
523 :
524 : /*-- copy 2 --*/
525 0 : if (i > hi) break;
526 0 : v = ptr[i];
527 0 : j = i;
528 0 : while ( mainGtU (
529 0 : ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530 0 : ) ) {
531 0 : ptr[j] = ptr[j-h];
532 0 : j = j - h;
533 0 : if (j <= (lo + h - 1)) break;
534 0 : }
535 0 : ptr[j] = v;
536 0 : i++;
537 :
538 : /*-- copy 3 --*/
539 0 : if (i > hi) break;
540 0 : v = ptr[i];
541 0 : j = i;
542 0 : while ( mainGtU (
543 0 : ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544 0 : ) ) {
545 0 : ptr[j] = ptr[j-h];
546 0 : j = j - h;
547 0 : if (j <= (lo + h - 1)) break;
548 0 : }
549 0 : ptr[j] = v;
550 0 : i++;
551 :
552 0 : if (*budget < 0) return;
553 0 : }
554 0 : }
555 0 : }
556 :
557 :
558 : /*---------------------------------------------*/
559 : /*--
560 : The following is an implementation of
561 : an elegant 3-way quicksort for strings,
562 : described in a paper "Fast Algorithms for
563 : Sorting and Searching Strings", by Robert
564 : Sedgewick and Jon L. Bentley.
565 : --*/
566 :
567 : #define mswap(zz1, zz2) \
568 0 : { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569 :
570 0 : #define mvswap(zzp1, zzp2, zzn) \
571 0 : { \
572 0 : Int32 yyp1 = (zzp1); \
573 0 : Int32 yyp2 = (zzp2); \
574 0 : Int32 yyn = (zzn); \
575 0 : while (yyn > 0) { \
576 0 : mswap(ptr[yyp1], ptr[yyp2]); \
577 0 : yyp1++; yyp2++; yyn--; \
578 0 : } \
579 0 : }
580 :
581 : static
582 : __inline__
583 : UChar mmed3 ( UChar a, UChar b, UChar c )
584 0 : {
585 0 : UChar t;
586 0 : if (a > b) { t = a; a = b; b = t; };
587 0 : if (b > c) {
588 0 : b = c;
589 0 : if (a > b) b = a;
590 0 : }
591 0 : return b;
592 0 : }
593 :
594 0 : #define mmin(a,b) ((a) < (b)) ? (a) : (b)
595 :
596 0 : #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597 0 : stackHi[sp] = hz; \
598 0 : stackD [sp] = dz; \
599 0 : sp++; }
600 :
601 0 : #define mpop(lz,hz,dz) { sp--; \
602 0 : lz = stackLo[sp]; \
603 0 : hz = stackHi[sp]; \
604 0 : dz = stackD [sp]; }
605 :
606 :
607 0 : #define mnextsize(az) (nextHi[az]-nextLo[az])
608 :
609 : #define mnextswap(az,bz) \
610 0 : { Int32 tz; \
611 0 : tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612 0 : tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613 0 : tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614 :
615 :
616 0 : #define MAIN_QSORT_SMALL_THRESH 20
617 0 : #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618 : #define MAIN_QSORT_STACK_SIZE 100
619 :
620 : static
621 : void mainQSort3 ( UInt32* ptr,
622 : UChar* block,
623 : UInt16* quadrant,
624 : Int32 nblock,
625 : Int32 loSt,
626 : Int32 hiSt,
627 : Int32 dSt,
628 : Int32* budget )
629 0 : {
630 0 : Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631 0 : Int32 sp, lo, hi, d;
632 :
633 0 : Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634 0 : Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635 0 : Int32 stackD [MAIN_QSORT_STACK_SIZE];
636 :
637 0 : Int32 nextLo[3];
638 0 : Int32 nextHi[3];
639 0 : Int32 nextD [3];
640 :
641 0 : sp = 0;
642 0 : mpush ( loSt, hiSt, dSt );
643 :
644 0 : while (sp > 0) {
645 :
646 0 : AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647 :
648 0 : mpop ( lo, hi, d );
649 0 : if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
650 0 : d > MAIN_QSORT_DEPTH_THRESH) {
651 0 : mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652 0 : if (*budget < 0) return;
653 0 : continue;
654 0 : }
655 :
656 0 : med = (Int32)
657 0 : mmed3 ( block[ptr[ lo ]+d],
658 0 : block[ptr[ hi ]+d],
659 0 : block[ptr[ (lo+hi)>>1 ]+d] );
660 :
661 0 : unLo = ltLo = lo;
662 0 : unHi = gtHi = hi;
663 :
664 0 : while (True) {
665 0 : while (True) {
666 0 : if (unLo > unHi) break;
667 0 : n = ((Int32)block[ptr[unLo]+d]) - med;
668 0 : if (n == 0) {
669 0 : mswap(ptr[unLo], ptr[ltLo]);
670 0 : ltLo++; unLo++; continue;
671 0 : };
672 0 : if (n > 0) break;
673 0 : unLo++;
674 0 : }
675 0 : while (True) {
676 0 : if (unLo > unHi) break;
677 0 : n = ((Int32)block[ptr[unHi]+d]) - med;
678 0 : if (n == 0) {
679 0 : mswap(ptr[unHi], ptr[gtHi]);
680 0 : gtHi--; unHi--; continue;
681 0 : };
682 0 : if (n < 0) break;
683 0 : unHi--;
684 0 : }
685 0 : if (unLo > unHi) break;
686 0 : mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687 0 : }
688 :
689 0 : AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690 :
691 0 : if (gtHi < ltLo) {
692 0 : mpush(lo, hi, d+1 );
693 0 : continue;
694 0 : }
695 :
696 0 : n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697 0 : m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698 :
699 0 : n = lo + unLo - ltLo - 1;
700 0 : m = hi - (gtHi - unHi) + 1;
701 :
702 0 : nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
703 0 : nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
704 0 : nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705 :
706 0 : if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707 0 : if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708 0 : if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709 :
710 0 : AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711 0 : AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712 :
713 0 : mpush (nextLo[0], nextHi[0], nextD[0]);
714 0 : mpush (nextLo[1], nextHi[1], nextD[1]);
715 0 : mpush (nextLo[2], nextHi[2], nextD[2]);
716 0 : }
717 0 : }
718 :
719 : #undef mswap
720 : #undef mvswap
721 : #undef mpush
722 : #undef mpop
723 : #undef mmin
724 : #undef mnextsize
725 : #undef mnextswap
726 : #undef MAIN_QSORT_SMALL_THRESH
727 : #undef MAIN_QSORT_DEPTH_THRESH
728 : #undef MAIN_QSORT_STACK_SIZE
729 :
730 :
731 : /*---------------------------------------------*/
732 : /* Pre:
733 : nblock > N_OVERSHOOT
734 : block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735 : ((UChar*)block32) [0 .. nblock-1] holds block
736 : ptr exists for [0 .. nblock-1]
737 :
738 : Post:
739 : ((UChar*)block32) [0 .. nblock-1] holds block
740 : All other areas of block32 destroyed
741 : ftab [0 .. 65536 ] destroyed
742 : ptr [0 .. nblock-1] holds sorted order
743 : if (*budget < 0), sorting was abandoned
744 : */
745 :
746 0 : #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747 0 : #define SETMASK (1 << 21)
748 0 : #define CLEARMASK (~(SETMASK))
749 :
750 : static
751 : void mainSort ( UInt32* ptr,
752 : UChar* block,
753 : UInt16* quadrant,
754 : UInt32* ftab,
755 : Int32 nblock,
756 : Int32 verb,
757 : Int32* budget )
758 0 : {
759 0 : Int32 i, j, k, ss, sb;
760 0 : Int32 runningOrder[256];
761 0 : Bool bigDone[256];
762 0 : Int32 copyStart[256];
763 0 : Int32 copyEnd [256];
764 0 : UChar c1;
765 0 : Int32 numQSorted;
766 0 : UInt16 s;
767 0 : if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
768 :
769 : /*-- set up the 2-byte frequency table --*/
770 0 : for (i = 65536; i >= 0; i--) ftab[i] = 0;
771 :
772 0 : j = block[0] << 8;
773 0 : i = nblock-1;
774 0 : for (; i >= 3; i -= 4) {
775 0 : quadrant[i] = 0;
776 0 : j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777 0 : ftab[j]++;
778 0 : quadrant[i-1] = 0;
779 0 : j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780 0 : ftab[j]++;
781 0 : quadrant[i-2] = 0;
782 0 : j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783 0 : ftab[j]++;
784 0 : quadrant[i-3] = 0;
785 0 : j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786 0 : ftab[j]++;
787 0 : }
788 0 : for (; i >= 0; i--) {
789 0 : quadrant[i] = 0;
790 0 : j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791 0 : ftab[j]++;
792 0 : }
793 :
794 : /*-- (emphasises close relationship of block & quadrant) --*/
795 0 : for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796 0 : block [nblock+i] = block[i];
797 0 : quadrant[nblock+i] = 0;
798 0 : }
799 :
800 0 : if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
801 :
802 : /*-- Complete the initial radix sort --*/
803 0 : for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804 :
805 0 : s = block[0] << 8;
806 0 : i = nblock-1;
807 0 : for (; i >= 3; i -= 4) {
808 0 : s = (s >> 8) | (block[i] << 8);
809 0 : j = ftab[s] -1;
810 0 : ftab[s] = j;
811 0 : ptr[j] = i;
812 0 : s = (s >> 8) | (block[i-1] << 8);
813 0 : j = ftab[s] -1;
814 0 : ftab[s] = j;
815 0 : ptr[j] = i-1;
816 0 : s = (s >> 8) | (block[i-2] << 8);
817 0 : j = ftab[s] -1;
818 0 : ftab[s] = j;
819 0 : ptr[j] = i-2;
820 0 : s = (s >> 8) | (block[i-3] << 8);
821 0 : j = ftab[s] -1;
822 0 : ftab[s] = j;
823 0 : ptr[j] = i-3;
824 0 : }
825 0 : for (; i >= 0; i--) {
826 0 : s = (s >> 8) | (block[i] << 8);
827 0 : j = ftab[s] -1;
828 0 : ftab[s] = j;
829 0 : ptr[j] = i;
830 0 : }
831 :
832 : /*--
833 : Now ftab contains the first loc of every small bucket.
834 : Calculate the running order, from smallest to largest
835 : big bucket.
836 : --*/
837 0 : for (i = 0; i <= 255; i++) {
838 0 : bigDone [i] = False;
839 0 : runningOrder[i] = i;
840 0 : }
841 :
842 0 : {
843 0 : Int32 vv;
844 0 : Int32 h = 1;
845 0 : do h = 3 * h + 1; while (h <= 256);
846 0 : do {
847 0 : h = h / 3;
848 0 : for (i = h; i <= 255; i++) {
849 0 : vv = runningOrder[i];
850 0 : j = i;
851 0 : while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852 0 : runningOrder[j] = runningOrder[j-h];
853 0 : j = j - h;
854 0 : if (j <= (h - 1)) goto zero;
855 0 : }
856 0 : zero:
857 0 : runningOrder[j] = vv;
858 0 : }
859 0 : } while (h != 1);
860 0 : }
861 :
862 : /*--
863 : The main sorting loop.
864 : --*/
865 :
866 0 : numQSorted = 0;
867 :
868 0 : for (i = 0; i <= 255; i++) {
869 :
870 : /*--
871 : Process big buckets, starting with the least full.
872 : Basically this is a 3-step process in which we call
873 : mainQSort3 to sort the small buckets [ss, j], but
874 : also make a big effort to avoid the calls if we can.
875 : --*/
876 0 : ss = runningOrder[i];
877 :
878 : /*--
879 : Step 1:
880 : Complete the big bucket [ss] by quicksorting
881 : any unsorted small buckets [ss, j], for j != ss.
882 : Hopefully previous pointer-scanning phases have already
883 : completed many of the small buckets [ss, j], so
884 : we don't have to sort them at all.
885 : --*/
886 0 : for (j = 0; j <= 255; j++) {
887 0 : if (j != ss) {
888 0 : sb = (ss << 8) + j;
889 0 : if ( ! (ftab[sb] & SETMASK) ) {
890 0 : Int32 lo = ftab[sb] & CLEARMASK;
891 0 : Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892 0 : if (hi > lo) {
893 0 : if (verb >= 4)
894 0 : VPrintf4 ( " qsort [0x%x, 0x%x] "
895 0 : "done %d this %d\n",
896 0 : ss, j, numQSorted, hi - lo + 1 );
897 0 : mainQSort3 (
898 0 : ptr, block, quadrant, nblock,
899 0 : lo, hi, BZ_N_RADIX, budget
900 0 : );
901 0 : numQSorted += (hi - lo + 1);
902 0 : if (*budget < 0) return;
903 0 : }
904 0 : }
905 0 : ftab[sb] |= SETMASK;
906 0 : }
907 0 : }
908 :
909 0 : AssertH ( !bigDone[ss], 1006 );
910 :
911 : /*--
912 : Step 2:
913 : Now scan this big bucket [ss] so as to synthesise the
914 : sorted order for small buckets [t, ss] for all t,
915 : including, magically, the bucket [ss,ss] too.
916 : This will avoid doing Real Work in subsequent Step 1's.
917 : --*/
918 0 : {
919 0 : for (j = 0; j <= 255; j++) {
920 0 : copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
921 0 : copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922 0 : }
923 0 : for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924 0 : k = ptr[j]-1; if (k < 0) k += nblock;
925 0 : c1 = block[k];
926 0 : if (!bigDone[c1])
927 0 : ptr[ copyStart[c1]++ ] = k;
928 0 : }
929 0 : for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930 0 : k = ptr[j]-1; if (k < 0) k += nblock;
931 0 : c1 = block[k];
932 0 : if (!bigDone[c1])
933 0 : ptr[ copyEnd[c1]-- ] = k;
934 0 : }
935 0 : }
936 :
937 0 : AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938 0 : ||
939 : /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940 : Necessity for this case is demonstrated by compressing
941 : a sequence of approximately 48.5 million of character
942 : 251; 1.0.0/1.0.1 will then die here. */
943 0 : (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944 0 : 1007 )
945 :
946 0 : for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947 :
948 : /*--
949 : Step 3:
950 : The [ss] big bucket is now done. Record this fact,
951 : and update the quadrant descriptors. Remember to
952 : update quadrants in the overshoot area too, if
953 : necessary. The "if (i < 255)" test merely skips
954 : this updating for the last bucket processed, since
955 : updating for the last bucket is pointless.
956 :
957 : The quadrant array provides a way to incrementally
958 : cache sort orderings, as they appear, so as to
959 : make subsequent comparisons in fullGtU() complete
960 : faster. For repetitive blocks this makes a big
961 : difference (but not big enough to be able to avoid
962 : the fallback sorting mechanism, exponential radix sort).
963 :
964 : The precise meaning is: at all times:
965 :
966 : for 0 <= i < nblock and 0 <= j <= nblock
967 :
968 : if block[i] != block[j],
969 :
970 : then the relative values of quadrant[i] and
971 : quadrant[j] are meaningless.
972 :
973 : else {
974 : if quadrant[i] < quadrant[j]
975 : then the string starting at i lexicographically
976 : precedes the string starting at j
977 :
978 : else if quadrant[i] > quadrant[j]
979 : then the string starting at j lexicographically
980 : precedes the string starting at i
981 :
982 : else
983 : the relative ordering of the strings starting
984 : at i and j has not yet been determined.
985 : }
986 : --*/
987 0 : bigDone[ss] = True;
988 :
989 0 : if (i < 255) {
990 0 : Int32 bbStart = ftab[ss << 8] & CLEARMASK;
991 0 : Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992 0 : Int32 shifts = 0;
993 :
994 0 : while ((bbSize >> shifts) > 65534) shifts++;
995 :
996 0 : for (j = bbSize-1; j >= 0; j--) {
997 0 : Int32 a2update = ptr[bbStart + j];
998 0 : UInt16 qVal = (UInt16)(j >> shifts);
999 0 : quadrant[a2update] = qVal;
1000 0 : if (a2update < BZ_N_OVERSHOOT)
1001 0 : quadrant[a2update + nblock] = qVal;
1002 0 : }
1003 0 : AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004 0 : }
1005 :
1006 0 : }
1007 :
1008 0 : if (verb >= 4)
1009 0 : VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010 0 : nblock, numQSorted, nblock - numQSorted );
1011 0 : }
1012 :
1013 : #undef BIGFREQ
1014 : #undef SETMASK
1015 : #undef CLEARMASK
1016 :
1017 :
1018 : /*---------------------------------------------*/
1019 : /* Pre:
1020 : nblock > 0
1021 : arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022 : ((UChar*)arr2) [0 .. nblock-1] holds block
1023 : arr1 exists for [0 .. nblock-1]
1024 :
1025 : Post:
1026 : ((UChar*)arr2) [0 .. nblock-1] holds block
1027 : All other areas of block destroyed
1028 : ftab [ 0 .. 65536 ] destroyed
1029 : arr1 [0 .. nblock-1] holds sorted order
1030 : */
1031 : void BZ2_blockSort ( EState* s )
1032 0 : {
1033 0 : UInt32* ptr = s->ptr;
1034 0 : UChar* block = s->block;
1035 0 : UInt32* ftab = s->ftab;
1036 0 : Int32 nblock = s->nblock;
1037 0 : Int32 verb = s->verbosity;
1038 0 : Int32 wfact = s->workFactor;
1039 0 : UInt16* quadrant;
1040 0 : Int32 budget;
1041 0 : Int32 budgetInit;
1042 0 : Int32 i;
1043 :
1044 0 : if (nblock < 10000) {
1045 0 : fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046 0 : } else {
1047 : /* Calculate the location for quadrant, remembering to get
1048 : the alignment right. Assumes that &(block[0]) is at least
1049 : 2-byte aligned -- this should be ok since block is really
1050 : the first section of arr2.
1051 : */
1052 0 : i = nblock+BZ_N_OVERSHOOT;
1053 0 : if (i & 1) i++;
1054 0 : quadrant = (UInt16*)(&(block[i]));
1055 :
1056 : /* (wfact-1) / 3 puts the default-factor-30
1057 : transition point at very roughly the same place as
1058 : with v0.1 and v0.9.0.
1059 : Not that it particularly matters any more, since the
1060 : resulting compressed stream is now the same regardless
1061 : of whether or not we use the main sort or fallback sort.
1062 : */
1063 0 : if (wfact < 1 ) wfact = 1;
1064 0 : if (wfact > 100) wfact = 100;
1065 0 : budgetInit = nblock * ((wfact-1) / 3);
1066 0 : budget = budgetInit;
1067 :
1068 0 : mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069 0 : if (verb >= 3)
1070 0 : VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071 0 : budgetInit - budget,
1072 0 : nblock,
1073 0 : (float)(budgetInit - budget) /
1074 0 : (float)(nblock==0 ? 1 : nblock) );
1075 0 : if (budget < 0) {
1076 0 : if (verb >= 2)
1077 0 : VPrintf0 ( " too repetitive; using fallback"
1078 0 : " sorting algorithm\n" );
1079 0 : fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080 0 : }
1081 0 : }
1082 :
1083 0 : s->origPtr = -1;
1084 0 : for (i = 0; i < s->nblock; i++)
1085 0 : if (ptr[i] == 0)
1086 0 : { s->origPtr = i; break; };
1087 :
1088 0 : AssertH( s->origPtr != -1, 1003 );
1089 0 : }
1090 :
1091 :
1092 : /*-------------------------------------------------------------*/
1093 : /*--- end blocksort.c ---*/
1094 : /*-------------------------------------------------------------*/
|