fix inefficiency in prs_compress_indexed

This commit is contained in:
Martin Michelsen
2023-08-26 21:00:16 -07:00
parent d93866146a
commit 897ff4c9ff
+111 -78
View File
@@ -29,7 +29,7 @@ const char* name_for_enum<CompressPhase>(CompressPhase v) {
}
}
template <size_t WindowLength, size_t MaxMatchLength, bool UseLatestBestMatch = false>
template <size_t WindowLength, size_t MaxMatchLength>
struct WindowIndex {
const uint8_t* data;
size_t size;
@@ -95,29 +95,22 @@ struct WindowIndex {
// default.
size_t match_offset = 0;
size_t match_size = 0;
auto start_it = this->index.upper_bound(this->offset);
for (auto it = start_it; it != this->index.end(); it++) {
auto it = this->index.upper_bound(this->offset);
if (it != this->index.end()) {
size_t new_match_offset = *it;
size_t new_match_size = this->get_match_length(new_match_offset);
if ((new_match_size > match_size) || (new_match_size == match_size && new_match_offset > match_offset)) {
match_offset = new_match_offset;
match_size = new_match_size;
} else if (!UseLatestBestMatch || (new_match_size < match_size)) {
// In PRS, using the latest of a set of equivalent matches may be
// advantageous because it may be possible to encode it with fewer bits.
// All backreferences are the same length in BC0, so this doesn't apply.
break;
}
}
for (auto it = start_it; it != this->index.begin();) {
if (it != this->index.begin()) {
it--;
size_t new_match_offset = *it;
size_t new_match_size = this->get_match_length(new_match_offset);
if ((new_match_size > match_size) || (new_match_size == match_size && new_match_offset > match_offset)) {
match_offset = new_match_offset;
match_size = new_match_size;
} else if (!UseLatestBestMatch || (new_match_size < match_size)) {
break;
}
}
return make_pair(match_offset, match_size);
@@ -668,82 +661,122 @@ string prs_compress_indexed(
const uint8_t* in_data = reinterpret_cast<const uint8_t*>(in_data_v);
LZSSInterleavedWriter w;
WindowIndex<0x1FFF, 0x100, true> window(in_data_v, in_size);
WindowIndex<0x100, 5> w_short(in_data_v, in_size);
WindowIndex<0x1FFF, 9> w_long(in_data_v, in_size);
WindowIndex<0x1FFF, 0x100> w_extended(in_data_v, in_size);
size_t last_progress_fn_call_offset = 0;
while (window.offset < in_size) {
if (progress_fn && ((last_progress_fn_call_offset & ~0xFFF) != (window.offset & ~0xFFF))) {
last_progress_fn_call_offset = window.offset;
progress_fn(CompressPhase::GENERATE_RESULT, window.offset, in_size, w.size());
while (w_short.offset < in_size) {
if (progress_fn && ((last_progress_fn_call_offset & ~0xFFF) != (w_short.offset & ~0xFFF))) {
last_progress_fn_call_offset = w_short.offset;
progress_fn(CompressPhase::GENERATE_RESULT, w_short.offset, in_size, w.size());
}
auto match = window.get_best_match();
// Look ahead by 1 literal to see if there's a significantly better match.
window.advance();
auto advanced_match = window.get_best_match();
if (advanced_match.second > match.second + 1) {
match.second = 1;
}
// If there is a suitable match, write a backreference; otherwise, write a
// literal. The backreference should be encoded:
// - As a short copy if offset in [-0x100, -1] and size in [2, 5]
// - As a long copy if offset in [-0x1FFF, -1] and size in [3, 9]
// - As an extended copy if offset in [-0x1FFF, -1] and size in [10, 0x100]
// Technically an extended copy can be used for sizes 1-9 as well, but if
// size is 1 or 2, writing literals is better (since it uses fewer data
// bytes and control bits), and a long copy can cover sizes 3-9 (and also
// uses fewer data bytes and control bits).
ssize_t backreference_offset = match.first - (window.offset - 1);
if (match.second < 2) {
// The match is too small; a literal would use fewer bits
w.write_control(true);
w.write_data(in_data[window.offset - 1]);
match.second = 1;
} else if ((backreference_offset >= -0x100) && (match.second <= 5)) {
uint8_t encoded_size = match.second - 2;
w.write_control(false);
w.flush_if_ready();
w.write_control(false);
w.flush_if_ready();
w.write_control(encoded_size & 2);
w.flush_if_ready();
w.write_control(encoded_size & 1);
w.write_data(backreference_offset & 0xFF);
} else if (match.second < 3) {
// We can't use a long copy for size 2, and it's not worth it to use an
// extended copy for this either (as noted above), so write a literal
w.write_control(true);
w.write_data(in_data[window.offset - 1]);
match.second = 1;
} else if ((backreference_offset >= -0x1FFF) && (match.second <= 9)) {
w.write_control(false);
w.flush_if_ready();
w.write_control(true);
uint16_t a = (backreference_offset << 3) | (match.second - 2);
w.write_data(a & 0xFF);
w.write_data(a >> 8);
} else if ((backreference_offset >= -0x1FFF) && (match.second <= 0x100)) {
w.write_control(false);
w.flush_if_ready();
w.write_control(true);
uint16_t a = (backreference_offset << 3);
w.write_data(a & 0xFF);
w.write_data(a >> 8);
w.write_data(match.second - 1);
auto m_short = w_short.get_best_match();
auto m_long = w_long.get_best_match();
auto m_extended = w_extended.get_best_match();
// Write the match that achieves the best ratio of output bytes to
// compressed bits used. To do this without floating-point math, we multiply
// the output byte count for each type of command by 468 / (command_bits),
// since 468 is the least common multiple of the number of bits for each
// command type. The command type with the highest score is the one we'll
// use, breaking ties by choosing the shorter command type. Note that the
// size of any copy type can be zero if no match was found; if no matches
// were found at all, then we can always write a literal.
size_t score_literal = 52;
size_t score_short = m_short.second * 39;
size_t score_long = m_long.second * 26;
size_t score_extended = m_extended.second * 18;
PRSPathNode::CommandType command_type = PRSPathNode::CommandType::NONE;
if (score_literal < score_short) {
if (score_short < score_long) {
if (score_long < score_extended) {
command_type = PRSPathNode::CommandType::EXTENDED_COPY;
} else {
command_type = PRSPathNode::CommandType::LONG_COPY;
}
} else {
if (score_short < score_extended) {
command_type = PRSPathNode::CommandType::EXTENDED_COPY;
} else {
command_type = PRSPathNode::CommandType::SHORT_COPY;
}
}
} else {
throw logic_error("invalid best match");
if (score_literal < score_long) {
if (score_long < score_extended) {
command_type = PRSPathNode::CommandType::EXTENDED_COPY;
} else {
command_type = PRSPathNode::CommandType::LONG_COPY;
}
} else {
if (score_literal < score_extended) {
command_type = PRSPathNode::CommandType::EXTENDED_COPY;
} else {
command_type = PRSPathNode::CommandType::LITERAL;
}
}
}
size_t bytes_consumed = 0;
switch (command_type) {
case PRSPathNode::CommandType::LITERAL:
w.write_control(true);
w.write_data(in_data[w_short.offset]);
bytes_consumed = 1;
break;
case PRSPathNode::CommandType::SHORT_COPY: {
ssize_t backreference_offset = m_short.first - w_short.offset;
uint8_t encoded_size = m_short.second - 2;
w.write_control(false);
w.flush_if_ready();
w.write_control(false);
w.flush_if_ready();
w.write_control(encoded_size & 2);
w.flush_if_ready();
w.write_control(encoded_size & 1);
w.write_data(backreference_offset & 0xFF);
bytes_consumed = m_short.second;
break;
}
case PRSPathNode::CommandType::LONG_COPY: {
ssize_t backreference_offset = m_long.first - w_long.offset;
w.write_control(false);
w.flush_if_ready();
w.write_control(true);
uint16_t a = (backreference_offset << 3) | (m_long.second - 2);
w.write_data(a & 0xFF);
w.write_data(a >> 8);
bytes_consumed = m_long.second;
break;
}
case PRSPathNode::CommandType::EXTENDED_COPY: {
ssize_t backreference_offset = m_extended.first - w_extended.offset;
w.write_control(false);
w.flush_if_ready();
w.write_control(true);
uint16_t a = (backreference_offset << 3);
w.write_data(a & 0xFF);
w.write_data(a >> 8);
w.write_data(m_extended.second - 1);
bytes_consumed = m_extended.second;
break;
}
case PRSPathNode::CommandType::NONE:
default:
throw logic_error("invalid command type");
}
w.flush_if_ready();
for (size_t z = 1; z < match.second; z++) {
window.advance();
if (bytes_consumed == 0) {
throw logic_error("no input data was consumed");
}
for (size_t z = 0; z < bytes_consumed; z++) {
w_short.advance();
w_long.advance();
w_extended.advance();
}
}