1 # HG changeset patch |
|
2 # User Michael Gerdts <[email protected]> |
|
3 # Date 1412600364 25200 |
|
4 # Mon Oct 06 05:59:24 2014 -0700 |
|
5 # Node ID ba834c48cee26e4c43976af8477dd34863b40f18 |
|
6 # Parent 1debb63439545fd0b30153eb68e884623d06c531 |
|
7 parallel uncompress - developed by Oracle |
1 parallel uncompress - developed by Oracle |
8 Offered to upstream at https://github.com/mgerdts/pigz |
2 Offered to upstream at https://github.com/mgerdts/pigz |
9 - Branch mt-uncompress-2.2 forked from https://github.com/madler/pigz v. 2.2.6 |
3 - Branch mt-uncompress-2.2 forked from https://github.com/madler/pigz v. 2.2.6 |
10 - Branch mt-uncompress forked from https://github.com/madler/pigz branch master |
4 - Branch mt-uncompress forked from https://github.com/madler/pigz branch master |
11 |
5 |
12 diff -r 1debb6343954 -r ba834c48cee2 Makefile |
6 The following generated with: |
|
7 |
|
8 git diff -W 8041c56eca89c427aa0a67f40e92675f3584b4bd \ |
|
9 be138d877c14c5a3f58c67939bf822d83e342947 |
|
10 |
|
11 diff --git a/Makefile b/Makefile |
|
12 index 822902c..0c904a6 100644 |
13 --- a/Makefile |
13 --- a/Makefile |
14 +++ b/Makefile |
14 +++ b/Makefile |
15 @@ -44,6 +44,15 @@ |
15 @@ -44,6 +44,15 @@ test: pigz |
16 compress -f < pigz.c | ./unpigz | cmp - pigz.c ;\ |
16 compress -f < pigz.c | ./unpigz | cmp - pigz.c ;\ |
17 fi |
17 fi |
18 @rm -f pigz.c.gz pigz.c.zz pigz.c.zip |
18 @rm -f pigz.c.gz pigz.c.zz pigz.c.zip |
19 + @rm -rf d/1 d/2 |
19 + @rm -rf d/1 d/2 |
20 + (mkdir -p d/1; cd d/1; tar xzf ../../../../pigz-2.2.5.tar.gz; \ |
20 + (mkdir -p d/1; cd d/1; tar xzf ../../../../pigz-2.2.5.tar.gz; \ |
82 + and combines the individual block checksums into a per-file checksum. The |
84 + and combines the individual block checksums into a per-file checksum. The |
83 + per-file checksum is compared to the checksum in the stream's trailer. |
85 + per-file checksum is compared to the checksum in the stream's trailer. |
84 |
86 |
85 pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing |
87 pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing |
86 raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib |
88 raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib |
87 @@ -259,13 +273,14 @@ |
89 @@ -260,13 +274,14 @@ |
88 can't get way ahead of the write thread and build up a large backlog of |
90 can't get way ahead of the write thread and build up a large backlog of |
89 unwritten compressed data. The write thread will write the compressed data, |
91 unwritten compressed data. The write thread will write the compressed data, |
90 drop the output buffer, and then wait for the check value to be unlocked |
92 drop the output buffer, and then wait for the check value to be unlocked |
91 - by the compress thread. Then the write thread combines the check value for |
93 - by the compress thread. Then the write thread combines the check value for |
92 - this chunk with the total check value for eventual use in the trailer. If |
94 - this chunk with the total check value for eventual use in the trailer. If |
104 + write thread writes the appropriate header and trailer around the compressed |
106 + write thread writes the appropriate header and trailer around the compressed |
105 + data. |
107 + data. |
106 |
108 |
107 The input and output buffers are reused through their collection in pools. |
109 The input and output buffers are reused through their collection in pools. |
108 Each buffer has a use count, which when decremented to zero returns the |
110 Each buffer has a use count, which when decremented to zero returns the |
109 @@ -313,6 +328,9 @@ |
111 @@ -314,6 +329,9 @@ |
110 #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 |
112 #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 |
111 # include <inttypes.h> /* intmax_t */ |
113 # include <inttypes.h> /* intmax_t */ |
112 #endif |
114 #endif |
113 +#include <stddef.h> /* offsetof() */ |
115 +#include <stddef.h> /* offsetof() */ |
114 +#include <sys/mman.h> /* mmap() */ |
116 +#include <sys/mman.h> /* mmap() */ |
115 +#include <netinet/in.h> /* htonl() */ |
117 +#include <netinet/in.h> /* htonl() */ |
116 |
118 |
117 #ifdef __hpux |
119 #ifdef __hpux |
118 # include <sys/param.h> |
120 # include <sys/param.h> |
119 @@ -420,8 +438,10 @@ |
121 @@ -421,8 +439,10 @@ |
120 local char *prog; /* name by which pigz was invoked */ |
122 local char *prog; /* name by which pigz was invoked */ |
121 local int ind; /* input file descriptor */ |
123 local int ind; /* input file descriptor */ |
122 local int outd; /* output file descriptor */ |
124 local int outd; /* output file descriptor */ |
123 +local int idxd; /* index file descriptor */ |
125 +local int idxd; /* index file descriptor */ |
124 local char in[PATH_MAX+1]; /* input file name (accommodate recursion) */ |
126 local char in[PATH_MAX+1]; /* input file name (accommodate recursion) */ |
125 local char *out = NULL; /* output file name (allocated if not NULL) */ |
127 local char *out = NULL; /* output file name (allocated if not NULL) */ |
126 +local char *index = NULL; /* index file name template (may have %f, %z) */ |
128 +local char *index = NULL; /* index file name template (may have %f, %z) */ |
127 local int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */ |
129 local int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */ |
128 local int headis; /* 1 to store name, 2 to store date, 3 both */ |
130 local int headis; /* 1 to store name, 2 to store date, 3 both */ |
129 local int pipeout; /* write output to stdout even if file */ |
131 local int pipeout; /* write output to stdout even if file */ |
130 @@ -467,9 +487,12 @@ |
132 @@ -468,9 +488,12 @@ local int complain(char *fmt, ...) |
131 return 0; |
133 return 0; |
132 } |
134 } |
133 |
135 |
134 +local void idx_abort(void); |
136 +local void idx_abort(void); |
135 + |
137 + |
163 + |
165 + |
164 + |
166 + |
165 /* write a gzip, zlib, or zip header using the information in the globals */ |
167 /* write a gzip, zlib, or zip header using the information in the globals */ |
166 local unsigned long put_header(void) |
168 local unsigned long put_header(void) |
167 { |
169 { |
168 @@ -982,7 +1017,7 @@ |
170 @@ -983,7 +1018,7 @@ local void new_pool(struct pool *pool, size_t size, int limit) |
169 |
171 |
170 /* get a space from a pool -- the use count is initially set to one, so there |
172 /* get a space from a pool -- the use count is initially set to one, so there |
171 is no need to call use_space() for the first use */ |
173 is no need to call use_space() for the first use */ |
172 -local struct space *get_space(struct pool *pool) |
174 -local struct space *get_space(struct pool *pool) |
173 +local struct space *get_space_size(struct pool *pool, size_t size) |
175 +local struct space *get_space_size(struct pool *pool, size_t size) |
174 { |
176 { |
175 struct space *space; |
177 struct space *space; |
176 |
178 |
177 @@ -995,6 +1030,15 @@ |
179 @@ -996,6 +1031,15 @@ local struct space *get_space(struct pool *pool) |
178 if (pool->head != NULL) { |
180 if (pool->head != NULL) { |
179 space = pool->head; |
181 space = pool->head; |
180 possess(space->use); |
182 possess(space->use); |
181 + /* If there's not enough space, free and malloc rather than realloc to |
183 + /* If there's not enough space, free and malloc rather than realloc to |
182 + avoid the potential of an unnecessary memory copy. */ |
184 + avoid the potential of an unnecessary memory copy. */ |
188 + space->size = size; |
190 + space->size = size; |
189 + } |
191 + } |
190 pool->head = space->next; |
192 pool->head = space->next; |
191 twist(pool->have, BY, -1); /* one less in pool */ |
193 twist(pool->have, BY, -1); /* one less in pool */ |
192 twist(space->use, TO, 1); /* initially one user */ |
194 twist(space->use, TO, 1); /* initially one user */ |
193 @@ -1012,15 +1056,20 @@ |
195 @@ -1013,15 +1057,20 @@ local struct space *get_space(struct pool *pool) |
194 if (space == NULL) |
196 if (space == NULL) |
195 bail("not enough memory", ""); |
197 bail("not enough memory", ""); |
196 space->use = new_lock(1); /* initially one user */ |
198 space->use = new_lock(1); /* initially one user */ |
197 - space->buf = malloc(pool->size); |
199 - space->buf = malloc(pool->size); |
198 + space->buf = malloc(size); |
200 + space->buf = malloc(size); |
248 + this is the last chunk, which after writing tells compress_write_thread to |
250 + this is the last chunk, which after writing tells compress_write_thread to |
249 + return */ |
251 + return */ |
250 struct job { |
252 struct job { |
251 long seq; /* sequence number */ |
253 long seq; /* sequence number */ |
252 int more; /* true if this is not the last chunk */ |
254 int more; /* true if this is not the last chunk */ |
253 @@ -1166,6 +1233,7 @@ |
255 @@ -1167,6 +1234,7 @@ local void setup_jobs(void) |
254 new_pool(&out_pool, OUTPOOL(size), -1); |
256 new_pool(&out_pool, OUTPOOL(size), -1); |
255 new_pool(&dict_pool, DICT, -1); |
257 new_pool(&dict_pool, DICT, -1); |
256 new_pool(&lens_pool, size >> (RSYNCBITS - 1), -1); |
258 new_pool(&lens_pool, size >> (RSYNCBITS - 1), -1); |
257 + new_pool(&idx_pool, 1, -1); |
259 + new_pool(&idx_pool, 1, -1); |
258 } |
260 } |
259 |
261 |
260 /* command the compress threads to all return, then join them all (call from |
262 /* command the compress threads to all return, then join them all (call from |
261 @@ -1202,6 +1270,8 @@ |
263 @@ -1203,6 +1271,8 @@ local void finish_jobs(void) |
262 Trace(("-- freed %d output buffers", caught)); |
264 Trace(("-- freed %d output buffers", caught)); |
263 caught = free_pool(&in_pool); |
265 caught = free_pool(&in_pool); |
264 Trace(("-- freed %d input buffers", caught)); |
266 Trace(("-- freed %d input buffers", caught)); |
265 + caught = free_pool(&idx_pool); |
267 + caught = free_pool(&idx_pool); |
266 + Trace(("-- freed %d index buffers", caught)); |
268 + Trace(("-- freed %d index buffers", caught)); |
267 free_lock(write_first); |
269 free_lock(write_first); |
268 free_lock(compress_have); |
270 free_lock(compress_have); |
269 compress_have = NULL; |
271 compress_have = NULL; |
270 @@ -1395,18 +1465,483 @@ |
272 @@ -1396,18 +1466,483 @@ local void compress_thread(void *dummy) |
271 (void)deflateEnd(&strm); |
273 (void)deflateEnd(&strm); |
272 } |
274 } |
273 |
275 |
274 +/* Block Index |
276 +/* Block Index |
275 + |
277 + |
783 + idx_add(len, olen, job->check); |
785 + idx_add(len, olen, job->check); |
784 + |
786 + |
785 /* free the job */ |
787 /* free the job */ |
786 free_lock(job->calc); |
788 free_lock(job->calc); |
787 free(job); |
789 free(job); |
788 @@ -1517,7 +2056,7 @@ |
790 @@ -1518,7 +2057,7 @@ local void parallel_compress(void) |
789 setup_jobs(); |
791 setup_jobs(); |
790 |
792 |
791 /* start write thread */ |
793 /* start write thread */ |
792 - writeth = launch(write_thread, NULL); |
794 - writeth = launch(write_thread, NULL); |
793 + writeth = launch(compress_write_thread, NULL); |
795 + writeth = launch(compress_write_thread, NULL); |
794 |
796 |
795 /* read from input and start compress threads (write thread will pick up |
797 /* read from input and start compress threads (write thread will pick up |
796 the output of the compress threads) */ |
798 the output of the compress threads) */ |
797 @@ -1913,7 +2452,7 @@ |
799 @@ -1914,7 +2453,7 @@ local size_t load(void) |
798 #ifndef NOTHREAD |
800 #ifndef NOTHREAD |
799 /* if first time in or procs == 1, read a buffer to have something to |
801 /* if first time in or procs == 1, read a buffer to have something to |
800 return, otherwise wait for the previous read job to complete */ |
802 return, otherwise wait for the previous read job to complete */ |
801 - if (procs > 1) { |
803 - if (procs > 1) { |
802 + if (procs > 1 && index == NULL && !ind_has_index()) { |
804 + if (procs > 1 && index == NULL && !ind_has_index()) { |
803 /* if first time, fire up the read thread, ask for a read */ |
805 /* if first time, fire up the read thread, ask for a read */ |
804 if (in_which == -1) { |
806 if (in_which == -1) { |
805 in_which = 1; |
807 in_which = 1; |
806 @@ -1995,12 +2534,6 @@ |
808 @@ -1996,12 +2535,6 @@ local void in_init(void) |
807 in_next += togo; \ |
809 in_next += togo; \ |
808 } while (0) |
810 } while (0) |
809 |
811 |
810 -/* pull LSB order or MSB order integers from an unsigned char buffer */ |
812 -/* pull LSB order or MSB order integers from an unsigned char buffer */ |
811 -#define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8)) |
813 -#define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8)) |
888 +} |
890 +} |
889 + |
891 + |
890 /* inflate for decompression or testing -- decompress from ind to outd unless |
892 /* inflate for decompression or testing -- decompress from ind to outd unless |
891 decode != 1, in which case just test ind, and then also list if list != 0; |
893 decode != 1, in which case just test ind, and then also list if list != 0; |
892 look for and decode multiple, concatenated gzip and/or zlib streams; |
894 look for and decode multiple, concatenated gzip and/or zlib streams; |
893 @@ -2620,10 +3220,8 @@ |
895 @@ -2621,10 +3221,8 @@ local int outb(void *desc, unsigned char *buf, unsigned len) |
894 local void infchk(void) |
896 local void infchk(void) |
895 { |
897 { |
896 int ret, cont; |
898 int ret, cont; |
897 - unsigned long check, len; |
899 - unsigned long check, len; |
898 + unsigned long check; |
900 + unsigned long check; |
1230 + if (allow_overwrite(out)) |
1232 + if (allow_overwrite(out)) |
1231 + outd = open(out, O_CREAT | O_TRUNC | O_WRONLY, 0600); |
1233 + outd = open(out, O_CREAT | O_TRUNC | O_WRONLY, 0600); |
1232 } |
1234 } |
1233 |
1235 |
1234 /* if exists and no overwrite, report and go on to next */ |
1236 /* if exists and no overwrite, report and go on to next */ |
1235 @@ -3254,17 +4008,22 @@ |
1237 @@ -3255,17 +4009,21 @@ local void process(char *path) |
1236 /* process ind to outd */ |
1238 /* process ind to outd */ |
1237 if (verbosity > 1) |
1239 if (verbosity > 1) |
1238 fprintf(stderr, "%s to %s ", in, out); |
1240 fprintf(stderr, "%s to %s ", in, out); |
1239 + |
1241 + |
1240 if (decode) { |
1242 if (decode) { |
1241 - if (method == 8) |
1243 if (method == 8) |
1242 - infchk(); |
1244 - infchk(); |
1243 + if (method == 8) { |
|
1244 + best_infchk(); |
1245 + best_infchk(); |
1245 + } |
|
1246 else if (method == 256) |
1246 else if (method == 256) |
1247 unlzw(); |
1247 unlzw(); |
1248 else |
1248 else |
1249 cat(); |
1249 cat(); |
1250 } |
1250 } |
1251 #ifndef NOTHREAD |
1251 #ifndef NOTHREAD |
1252 - else if (procs > 1) |
1252 - else if (procs > 1) |
1253 + else if (procs > 1) { |
1253 + else if (index != NULL) { |
1254 + if (index != NULL && idx_open(index) != 0) |
1254 + if (idx_open(index) != 0) |
1255 + bail("invalid index file", ""); |
1255 + bail("invalid index file", ""); |
1256 parallel_compress(); |
1256 parallel_compress(); |
1257 + } |
1257 + } |
1258 #endif |
1258 #endif |
1259 else |
1259 else |
1260 single_compress(0); |
1260 single_compress(0); |
1261 @@ -3273,6 +4032,10 @@ |
1261 @@ -3274,6 +4032,10 @@ local void process(char *path) |
1262 fflush(stderr); |
1262 fflush(stderr); |
1263 } |
1263 } |
1264 |
1264 |
1265 + /* close index file - this may append the index to outd */ |
1265 + /* close index file - this may append the index to outd */ |
1266 + if (idx.valid) |
1266 + if (idx.valid) |
1267 + idx_close(); |
1267 + idx_close(); |
1268 + |
1268 + |
1269 /* finish up, copy attributes, set times, delete original */ |
1269 /* finish up, copy attributes, set times, delete original */ |
1270 if (ind != 0) |
1270 if (ind != 0) |
1271 close(ind); |
1271 close(ind); |
1272 @@ -3331,6 +4094,9 @@ |
1272 @@ -3332,6 +4094,9 @@ local char *helptext[] = { |
1273 " -v, --verbose Provide more verbose output", |
1273 " -v, --verbose Provide more verbose output", |
1274 #endif |
1274 #endif |
1275 " -V --version Show the version of pigz", |
1275 " -V --version Show the version of pigz", |
1276 +" -X --index file Create or use parallel uncompression index file.", |
1276 +" -X --index file Create or use parallel uncompression index file.", |
1277 +" %f and %z are replaced by uncompressed and compressed", |
1277 +" %f and %z are replaced by uncompressed and compressed", |
1278 +" file names", |
1278 +" file names", |
1279 " -z, --zlib Compress to zlib (.zz) instead of gzip format", |
1279 " -z, --zlib Compress to zlib (.zz) instead of gzip format", |
1280 " -- All arguments after \"--\" are treated as files" |
1280 " -- All arguments after \"--\" are treated as files" |
1281 }; |
1281 }; |
1282 @@ -3400,11 +4166,11 @@ |
1282 @@ -3401,11 +4166,11 @@ local void defaults(void) |
1283 local char *longopts[][2] = { |
1283 local char *longopts[][2] = { |
1284 {"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"}, |
1284 {"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"}, |
1285 {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"force", "f"}, |
1285 {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"force", "f"}, |
1286 - {"help", "h"}, {"independent", "i"}, {"keep", "k"}, {"license", "L"}, |
1286 - {"help", "h"}, {"independent", "i"}, {"keep", "k"}, {"license", "L"}, |
1287 - {"list", "l"}, {"name", "N"}, {"no-name", "n"}, {"no-time", "T"}, |
1287 - {"list", "l"}, {"name", "N"}, {"no-name", "n"}, {"no-time", "T"}, |
1294 + {"rsyncable", "R"}, {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, |
1294 + {"rsyncable", "R"}, {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, |
1295 + {"test", "t"}, {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"}, |
1295 + {"test", "t"}, {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"}, |
1296 {"version", "V"}, {"zip", "K"}, {"zlib", "z"}}; |
1296 {"version", "V"}, {"zip", "K"}, {"zlib", "z"}}; |
1297 #define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1)) |
1297 #define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1)) |
1298 |
1298 |
1299 @@ -3444,7 +4210,7 @@ |
1299 @@ -3445,7 +4210,7 @@ local int option(char *arg) |
1300 |
1300 |
1301 /* if no argument or dash option, check status of get */ |
1301 /* if no argument or dash option, check status of get */ |
1302 if (get && (arg == NULL || *arg == '-')) { |
1302 if (get && (arg == NULL || *arg == '-')) { |
1303 - bad[1] = "bpS"[get - 1]; |
1303 - bad[1] = "bpS"[get - 1]; |
1304 + bad[1] = "bpSX"[get - 1]; |
1304 + bad[1] = "bpSX"[get - 1]; |
1305 bail("missing parameter after ", bad); |
1305 bail("missing parameter after ", bad); |
1306 } |
1306 } |
1307 if (arg == NULL) |
1307 if (arg == NULL) |
1308 @@ -3503,6 +4269,7 @@ |
1308 @@ -3504,6 +4269,7 @@ local int option(char *arg) |
1309 case 'R': rsync = 1; break; |
1309 case 'R': rsync = 1; break; |
1310 case 'S': get = 3; break; |
1310 case 'S': get = 3; break; |
1311 case 'V': fputs(VERSION, stderr); exit(0); |
1311 case 'V': fputs(VERSION, stderr); exit(0); |
1312 + case 'X': setdict = 0; get = 4; break; |
1312 + case 'X': setdict = 0; get = 4; break; |
1313 case 'Z': |
1313 case 'Z': |
1314 bail("invalid option: LZW output not supported: ", bad); |
1314 bail("invalid option: LZW output not supported: ", bad); |
1315 case 'a': |
1315 case 'a': |
1316 @@ -3530,7 +4297,7 @@ |
1316 @@ -3531,7 +4297,7 @@ local int option(char *arg) |
1317 return 0; |
1317 return 0; |
1318 } |
1318 } |
1319 |
1319 |
1320 - /* process option parameter for -b, -p, or -S */ |
1320 - /* process option parameter for -b, -p, or -S */ |
1321 + /* process option parameter for -b, -p, -S, or -X */ |
1321 + /* process option parameter for -b, -p, -S, or -X */ |
1322 if (get) { |
1322 if (get) { |
1323 size_t n; |
1323 size_t n; |
1324 |
1324 |
1325 @@ -3543,7 +4310,7 @@ |
1325 @@ -3544,7 +4310,7 @@ local int option(char *arg) |
1326 OUTPOOL(size) < size || |
1326 OUTPOOL(size) < size || |
1327 (ssize_t)OUTPOOL(size) < 0 || |
1327 (ssize_t)OUTPOOL(size) < 0 || |
1328 size > (1UL << 22)) |
1328 size > (1UL << 22)) |
1329 - bail("block size too large: ", arg); |
1329 - bail("block size too large: ", arg); |
1330 + bail("block size too large:", arg); |
1330 + bail("block size too large:", arg); |
1331 new_opts(); |
1331 new_opts(); |
1332 } |
1332 } |
1333 else if (get == 2) { |
1333 else if (get == 2) { |
1334 @@ -3561,6 +4328,9 @@ |
1334 @@ -3562,6 +4328,9 @@ local int option(char *arg) |
1335 } |
1335 } |
1336 else if (get == 3) |
1336 else if (get == 3) |
1337 sufx = arg; /* gz suffix */ |
1337 sufx = arg; /* gz suffix */ |
1338 + else if (get == 4) |
1338 + else if (get == 4) |
1339 + index = arg; /* index file */ |
1339 + index = arg; /* index file */ |