Skip to content

Commit

Permalink
Add support for multithreaded XZ compression.
Browse files Browse the repository at this point in the history
  • Loading branch information
dennis95 committed Mar 1, 2022
1 parent 7e2120e commit c6fce6c
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 5 deletions.
4 changes: 3 additions & 1 deletion algorithm.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2020 Dennis Wölfing
/* Copyright (c) 2020, 2022 Dennis Wölfing
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
Expand Down Expand Up @@ -65,6 +65,8 @@ extern const struct algorithm algoDeflate;
extern const struct algorithm algoLzw;
extern const struct algorithm algoXz;

extern int maxThreads;

int openOutputFile(const char* outputName, struct outputinfo* oinfo);
ssize_t writeAll(int fd, const void* buffer, size_t size);

Expand Down
24 changes: 23 additions & 1 deletion compress.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.Dd October 30, 2020
.Dd March 01, 2022
.Dt COMPRESS 1
.Os Dennix
.Sh NAME
Expand All @@ -13,6 +13,7 @@
.Op Fl m Ar algo
.Op Fl o Ar filename
.Op Fl S Ar suffix
.Op Fl T Ar threads
.Op Ar files...
.Nm uncompress
.Op Fl cfklnNqrtv
Expand Down Expand Up @@ -281,6 +282,23 @@ The algorithm for decompression is then determined by the file contents instead
of by the suffix.
.It Fl t , -test
Test the integrity of compressed files without changing any files.
.It Fl T Ar threads , Fl -threads Ns = Ns Ar threads
Use up to
.Ar threads
threads for compression.
Multithreading is currently only supported for XZ compression.
When
.Ar threads
is 0
.Nm
will use one thread per CPU core.
.Pp
If the
.Fl T
option is not used
.Nm
will use a number of threads determined by the number of CPUs and the amount of
available memory.
.It Fl v , -verbose
For each file print the size reduction or expansion of the file.
Undo the effects of any previously specified
Expand Down Expand Up @@ -348,3 +366,7 @@ implementation that implemented all new requirements from POSIX.1-202x draft 1.
It was also the first
.Nm
implementation that supported XZ compression.
.Pp
Support for multithreaded XZ compression and the
.Fl T
option were added in dxcompress 1.1.
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ AC_ARG_WITH([liblzma], [AS_HELP_STRING([--without-liblzma],
AS_IF([test "$with_liblzma" != no],
[DX_PKG_CONFIG_LIB([liblzma],
[AC_DEFINE([WITH_LIBLZMA], [1], [Define to 1 if building with liblzma.])
AC_CHECK_FUNCS([lzma_stream_encoder_mt])
])])

AC_ARG_WITH([zlib], [AS_HELP_STRING([--without-zlib],
Expand Down
14 changes: 13 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ static bool force = false;
static const char* givenOutputName = NULL;
static bool keep = false;
static int level = -1;
int maxThreads = -1;
static int mode = MODE_COMPRESS;
static bool restoreName = false;
static bool saveName = true;
Expand Down Expand Up @@ -106,6 +107,7 @@ int main(int argc, char* argv[]) {
{ "stdout", no_argument, 0, 'c' },
{ "suffix", required_argument, 0, 'S' },
{ "test", no_argument, 0, 't' },
{ "threads", required_argument, 0, 'T' },
{ "to-stdout", no_argument, 0, 'c' },
{ "uncompress", no_argument, 0, 'd' },
{ "verbose", no_argument, 0, 'v' },
Expand All @@ -116,7 +118,7 @@ int main(int argc, char* argv[]) {
const char* algorithmName = NULL;

int c;
const char* opts = "0123456789ab:cdfghklm:nNo:OqrS:tvV";
const char* opts = "0123456789ab:cdfghklm:nNo:OqrS:tT:vV";
while ((c = getopt_long(argc, argv, opts, longopts, NULL)) != -1) {
switch (c) {
case 1: // undocumented --argv0 option for internal use only
Expand Down Expand Up @@ -169,6 +171,7 @@ int main(int argc, char* argv[]) {
" -r, --recursive recursively (de)compress files in directories\n"
" -S, --suffix=SUFFIX use SUFFIX as suffix for compressed files\n"
" -t, --test check file integrity\n"
" -T, --threads=THREADS use up to the given number of threads\n"
" -v, --verbose print filenames and compression ratios\n"
" -V, --version display version info\n",
argv[0]);
Expand Down Expand Up @@ -210,6 +213,15 @@ argv[0]);
case 't':
mode = MODE_TEST;
break;
case 'T': {
char* end;
unsigned long value = strtoul(optarg, &end, 10);
if (value > INT_MAX || *end) {
printWarning("invalid number of threads: '%s'", optarg);
return 1;
}
maxThreads = value;
} break;
case 'v':
quiet = false;
verbose = true;
Expand Down
42 changes: 40 additions & 2 deletions xz.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2020 Dennis Wölfing
/* Copyright (c) 2020, 2022 Dennis Wölfing
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
Expand Down Expand Up @@ -49,10 +49,48 @@ static bool xzProbe(const unsigned char* buffer, size_t bufferSize) {
return bufferSize >= 6 && memcmp(buffer, XZMAGIC, 6) == 0;
}

#if WITH_LIBLZMA
static lzma_ret createEncoder(lzma_stream* stream, int level) {
#if HAVE_LZMA_STREAM_ENCODER_MT
lzma_mt mt = {0};
mt.preset = level;
mt.check = LZMA_CHECK_CRC64;

if (maxThreads > 0) {
mt.threads = maxThreads;
} else {
mt.threads = lzma_cputhreads();
}

if (maxThreads == -1) {
// When the -T option was not given we still want to use multiple
// threads but we should limit the number of threads to avoid high
// memory usage. We try to limit our memory usage to one third of the
// available memory.
uint64_t memoryAvailable = lzma_physmem();
uint64_t memoryUsage = lzma_stream_encoder_mt_memusage(&mt);

while (memoryUsage > memoryAvailable / 3 && mt.threads > 1) {
mt.threads--;
memoryUsage = lzma_stream_encoder_mt_memusage(&mt);
}
}

if (mt.threads > 1) {
if (lzma_stream_encoder_mt(stream, &mt) == LZMA_OK) {
return LZMA_OK;
}
}
#endif

return lzma_easy_encoder(stream, level, LZMA_CHECK_CRC64);
}
#endif

static int xzCompress(int input, int output, int level, struct fileinfo* info) {
#if WITH_LIBLZMA
lzma_stream stream = LZMA_STREAM_INIT;
lzma_ret status = lzma_easy_encoder(&stream, level, LZMA_CHECK_CRC64);
lzma_ret status = createEncoder(&stream, level);
if (status == LZMA_MEM_ERROR) return RESULT_OUT_OF_MEMORY;
if (status != LZMA_OK) return RESULT_UNKNOWN_ERROR;

Expand Down

0 comments on commit c6fce6c

Please sign in to comment.