From ce2300b1163cbcab13b478902ee8e2f7a515ceca Mon Sep 17 00:00:00 2001 From: Martin Michelsen Date: Sun, 24 Mar 2024 21:59:28 -0700 Subject: [PATCH] add pessimal compression --- src/Compression.cc | 39 +++++++++++++++++++++++++++++++++++++++ src/Compression.hh | 4 ++++ src/Main.cc | 3 +++ tests/compression.sh | 12 ++++++++++++ 4 files changed, 58 insertions(+) diff --git a/src/Compression.cc b/src/Compression.cc index 75262373..6d88ea6f 100644 --- a/src/Compression.cc +++ b/src/Compression.cc @@ -438,6 +438,45 @@ string prs_compress_optimal(const string& data, ProgressCallback progress_fn) { return prs_compress_optimal(data.data(), data.size(), progress_fn); } +string prs_compress_pessimal(const void* vdata, size_t size) { + const uint8_t* in_data = reinterpret_cast(vdata); + + // The worst possible encoding we can do is a literal byte when no byte with + // the same value is within the window, or an extended copy if there is a byte + // with the same value in the window. + WindowIndex<0x1FFF, 1> window(in_data, size); + LZSSInterleavedWriter w; + for (size_t z = 0; z < size; z++) { + auto match = window.get_best_match(); + if (match.second >= 1) { + // Write extended copy + int16_t offset = match.first - window.offset; + w.write_control(false); + w.flush_if_ready(); + w.write_control(true); + uint16_t a = (offset << 3); + w.write_data(a & 0xFF); + w.write_data(a >> 8); + w.write_data(0); + } else { + // Write literal + w.write_control(true); + w.write_data(in_data[z]); + } + w.flush_if_ready(); + window.advance(); + } + + // Write stop command + w.write_control(false); + w.flush_if_ready(); + w.write_control(true); + w.write_data(0); + w.write_data(0); + + return std::move(w.close()); +} + PRSCompressor::PRSCompressor( ssize_t compression_level, ProgressCallback progress_fn) : compression_level(compression_level), diff --git a/src/Compression.hh b/src/Compression.hh index 684691a5..efbd46a9 100644 --- a/src/Compression.hh +++ b/src/Compression.hh @@ -177,6 +177,10 @@ std::string prs_compress_indexed( std::string prs_compress_optimal(const void* vdata, size_t size, ProgressCallback progress_fn = nullptr); std::string prs_compress_optimal(const std::string& data, ProgressCallback progress_fn = nullptr); +// Compresses data using PRS to the LARGEST possible output size. There is no +// practical use for this function except for amusement. +std::string prs_compress_pessimal(const void* vdata, size_t size); + // Decompresses PRS-compressed data. struct PRSDecompressResult { std::string data; diff --git a/src/Main.cc b/src/Main.cc index 0ff3803d..1bf0b2b3 100644 --- a/src/Main.cc +++ b/src/Main.cc @@ -266,6 +266,7 @@ static void a_compress_decompress_fn(Arguments& args) { bool is_decompress = starts_with(action, "decompress-"); bool is_big_endian = args.get("big-endian"); bool is_optimal = args.get("optimal"); + bool is_pessimal = args.get("pessimal"); int8_t compression_level = args.get("compression-level", 0); size_t bytes = args.get("bytes", 0); string seed = args.get("seed"); @@ -298,6 +299,8 @@ static void a_compress_decompress_fn(Arguments& args) { if (!is_decompress && (is_prs || is_pr2 || is_prc)) { if (is_optimal) { data = prs_compress_optimal(data.data(), data.size(), optimal_progress_fn); + } else if (is_pessimal) { + data = prs_compress_pessimal(data.data(), data.size()); } else { data = prs_compress(data, compression_level, progress_fn); } diff --git a/tests/compression.sh b/tests/compression.sh index ebe8e228..1ef63939 100755 --- a/tests/compression.sh +++ b/tests/compression.sh @@ -20,6 +20,10 @@ echo "... compress with level=0" $EXECUTABLE compress-$SCHEME --compression-level=0 $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.l0 echo "... compress with level=1" $EXECUTABLE compress-$SCHEME --compression-level=1 $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.l1 +echo "... compress optimally" +$EXECUTABLE compress-$SCHEME --optimal $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.lo +echo "... compress pessimally" +$EXECUTABLE compress-$SCHEME --pessimal $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.lp echo "... decompress from level=-1 (no compression)" $EXECUTABLE decompress-$SCHEME $BASENAME.mnrd.$SCHEME.lN $BASENAME.mnrd.$SCHEME.lN.dec @@ -27,6 +31,10 @@ echo "... decompress from level=0" $EXECUTABLE decompress-$SCHEME $BASENAME.mnrd.$SCHEME.l0 $BASENAME.mnrd.$SCHEME.l0.dec echo "... decompress from level=1" $EXECUTABLE decompress-$SCHEME $BASENAME.mnrd.$SCHEME.l1 $BASENAME.mnrd.$SCHEME.l1.dec +echo "... decompress from optimal" +$EXECUTABLE decompress-$SCHEME $BASENAME.mnrd.$SCHEME.lo $BASENAME.mnrd.$SCHEME.lo.dec +echo "... decompress from pessimal" +$EXECUTABLE decompress-$SCHEME $BASENAME.mnrd.$SCHEME.lp $BASENAME.mnrd.$SCHEME.lp.dec echo "... check result from level=-1 (no compression)" diff $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.lN.dec @@ -34,6 +42,10 @@ echo "... check result from level=0" diff $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.l0.dec echo "... check result from level=1" diff $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.l1.dec +echo "... check result from optimal" +diff $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.lo.dec +echo "... check result from pessimal" +diff $BASENAME.mnrd $BASENAME.mnrd.$SCHEME.lp.dec echo "... clean up" rm $BASENAME.mnrd \