#include "Compression.hh" #include #include #include #include #include #include #include #include "Text.hh" using namespace std; PRSCompressor::PRSCompressor( size_t compression_level, function progress_fn) : compression_level(compression_level), progress_fn(progress_fn), closed(false), control_byte_offset(0), pending_control_bits(0), input_bytes(0) { this->output.put_u8(0); } void PRSCompressor::add(const void* data, size_t size) { if (this->closed) { throw logic_error("compressor is closed"); } StringReader r(data, size); while (!r.eof()) { this->add_byte(r.get_u8()); } } void PRSCompressor::add(const string& data) { this->add(data.data(), data.size()); } void PRSCompressor::add_byte(uint8_t v) { if (this->reverse_log.end_offset() + this->forward_log.data.size() <= this->input_bytes) { this->advance(); } this->forward_log.at(this->input_bytes) = v; this->input_bytes++; } void PRSCompressor::advance() { // Search for a match in the decompressed data history size_t best_match_size = 0; size_t best_match_offset = 0; size_t best_match_literals = 0; for (size_t num_literals = 0; num_literals < this->compression_level; num_literals++) { for (size_t z = 0; z < num_literals; z++) { this->reverse_log.push_back(this->forward_log.at(this->reverse_log.end_offset())); } size_t compression_offset = reverse_log.end_offset(); uint8_t first_v = this->forward_log.at(compression_offset); const auto& start_offsets = this->reverse_log.find(first_v); for (auto it = start_offsets.begin(); (it != start_offsets.end()) && (best_match_size < 0x100); it++) { size_t match_offset = *it; if (match_offset + 0x2000 <= compression_offset) { continue; } size_t match_size = 0; size_t match_loop_bytes = compression_offset - match_offset; while ((match_size < 0x100) && (compression_offset + match_size < this->input_bytes) && (this->reverse_log.at(match_offset + (match_size % match_loop_bytes)) == this->forward_log.at(compression_offset + match_size))) { match_size++; } // If there are multiple matches of the longest length, use the latest one, // since it's more likely that it can be expressed as a short copy instead // of a long copy. if (match_size >= (best_match_size + best_match_literals)) { best_match_offset = match_offset; best_match_size = match_size; best_match_literals = num_literals; } } for (size_t z = 0; z < num_literals; z++) { this->reverse_log.pop_back(); } } // If the best match has literals preceding it, write those literals for (size_t z = 0; z < best_match_literals; z++) { this->advance_literal(); } // If there is a suitable match, write a backreference; otherwise, write a // literal. The backreference should be encoded: // - As a short copy if offset in [-0x100, -1] and size in [2, 5] // - As a long copy if offset in [-0x1FFF, -1] and size in [3, 9] // - As an extended copy if offset in [-0x1FFF, -1] and size in [10, 0x100] // Technically an extended copy can be used for sizes 1-9 as well, but if // size is 1 or 2, writing literals is better (since it uses fewer data // bytes and control bits), and a long copy can cover sizes 3-9 (and also // uses fewer data bytes and control bits). ssize_t backreference_offset = best_match_offset - this->reverse_log.end_offset(); if (best_match_size < 2) { // The match is too small; a literal would use fewer bits this->advance_literal(); } else if ((backreference_offset >= -0x100) && (best_match_size <= 5)) { this->advance_short_copy(backreference_offset, best_match_size); } else if (best_match_size < 3) { // We can't use a long copy for size 2, and it's not worth it to use an // extended copy for this either (as noted above), so write a literal this->advance_literal(); } else if ((backreference_offset >= -0x1FFF) && (best_match_size <= 9)) { this->advance_long_copy(backreference_offset, best_match_size); } else if ((backreference_offset >= -0x1FFF) && (best_match_size <= 0x100)) { this->advance_extended_copy(backreference_offset, best_match_size); } else { throw logic_error("invalid best match"); } } void PRSCompressor::move_forward_data_to_reverse_log(size_t size) { for (; size > 0; size--) { this->reverse_log.push_back(this->forward_log.at(this->reverse_log.end_offset())); if (this->progress_fn && ((this->reverse_log.end_offset() & 0xFFF) == 0)) { this->progress_fn(this->reverse_log.end_offset(), this->output.size()); } } } void PRSCompressor::advance_literal() { this->write_control(true); this->output.put_u8(this->forward_log.at(this->reverse_log.end_offset())); this->move_forward_data_to_reverse_log(1); } void PRSCompressor::advance_short_copy(ssize_t offset, size_t size) { uint8_t encoded_size = size - 2; this->write_control(false); this->write_control(false); this->write_control(encoded_size & 2); this->write_control(encoded_size & 1); this->output.put_u8(offset & 0xFF); this->move_forward_data_to_reverse_log(size); } void PRSCompressor::advance_long_copy(ssize_t offset, size_t size) { this->write_control(false); this->write_control(true); uint16_t a = (offset << 3) | (size - 2); this->output.put_u8(a & 0xFF); this->output.put_u8(a >> 8); this->move_forward_data_to_reverse_log(size); } void PRSCompressor::advance_extended_copy(ssize_t offset, size_t size) { this->write_control(false); this->write_control(true); uint16_t a = (offset << 3); this->output.put_u8(a & 0xFF); this->output.put_u8(a >> 8); this->output.put_u8(size - 1); this->move_forward_data_to_reverse_log(size); } string& PRSCompressor::close() { if (!this->closed) { // Advance until all input is consumed while (this->reverse_log.end_offset() < this->input_bytes) { this->advance(); } // Write stop command this->write_control(false); this->write_control(true); this->output.put_u8(0); this->output.put_u8(0); // Write remaining control bits this->flush_control(); this->closed = true; } return this->output.str(); } void PRSCompressor::write_control(bool z) { if (this->pending_control_bits & 0x0100) { this->output.pput_u8( this->control_byte_offset, this->pending_control_bits & 0xFF); this->control_byte_offset = this->output.size(); this->output.put_u8(0); this->pending_control_bits = z ? 0x8080 : 0x8000; } else { this->pending_control_bits = (this->pending_control_bits >> 1) | (z ? 0x8080 : 0x8000); } } void PRSCompressor::flush_control() { if (this->pending_control_bits & 0xFF00) { while (!(this->pending_control_bits & 0x0100)) { this->pending_control_bits >>= 1; } this->output.pput_u8( this->control_byte_offset, this->pending_control_bits & 0xFF); } else { if (this->control_byte_offset != this->output.size() - 1) { throw logic_error("data written without control bits"); } this->output.str().resize(this->output.str().size() - 1); } } string prs_compress( const void* vdata, size_t size, size_t compression_level, function progress_fn) { PRSCompressor prs(compression_level, progress_fn); prs.add(vdata, size); return std::move(prs.close()); } string prs_compress( const string& data, size_t compression_level, function progress_fn) { return prs_compress(data.data(), data.size(), compression_level, progress_fn); } class ControlStreamReader { public: ControlStreamReader(StringReader& r) : r(r), bits(0x0000) {} bool read() { if (!(this->bits & 0x0100)) { this->bits = 0xFF00 | this->r.get_u8(); } bool ret = this->bits & 1; this->bits >>= 1; return ret; } uint8_t buffered_bits() const { uint16_t z = this->bits; uint8_t ret = 0; for (; z & 0x0100; z >>= 1, ret++) { } return ret; } private: StringReader& r; uint16_t bits; }; string prs_decompress(const void* data, size_t size, size_t max_output_size) { // PRS is an LZ77-based compression algorithm. Compressed data is split into // two streams: a control stream and a data stream. The control stream is read // one bit at a time, and the data stream is read one byte at a time. The // streams are interleaved such that the decompressor never has to move // backward in the input stream - when the decompressor needs a control bit // and there are no unused bits from the previous byte of the control stream, // it reads a byte from the input and treats it as the next 8 control bits. // There are 3 distinct commands in PRS, labeled here with their control bits: // 1 - Literal byte. The decompressor copies one byte from the input data // stream to the output. // 00 - Short backreference. The decompressor reads two control bits and adds // 2 to this value to determine the number of bytes to copy, then reads // one byte from the data stream to determine how far back in the output // to copy from. This byte is treated as an 8-bit negative number - so // 0xF7, for example, means to start copying data from 9 bytes before the // end of the output. The range must start before the end of the output, // but the end of the range may be beyond the end of the output. In this // case, the bytes between the beginning of the range and original end of // the output are simply repeated. // 01 - Long backreference. The decompressor reads two bytes from the data and // byteswaps the resulting 16-bit value (that is, the low byte is read // first). The start offset (again, as a negative number) is the top 13 // bits of this value; the size is the low 3 bits of this value, plus 2. // If the size bits are all zero, an additional byte is read from the // data stream and 1 is added to it to determine the backreference size // (we call this an extended backreference). Therefore, the maximum // backreference size is 256 bytes. // Decompression ends when either there are no more input bytes to read, or // when a long backreference is read with all zeroes in its offset field. The // original implementation stops decompression successfully when any attempt // to read from the input encounters the end of the stream, but newserv's // implementation only allows this at the end of an opcode - if end-of-stream // is encountered partway through an opcode, we throw instead, because it's // likely the input has been truncated or is malformed in some way. StringWriter w; StringReader r(data, size); ControlStreamReader cr(r); while (!r.eof()) { // Control 1 = literal byte if (cr.read()) { if (max_output_size && w.size() == max_output_size) { throw runtime_error("maximum output size exceeded"); } w.put_u8(r.get_u8()); } else { ssize_t offset; size_t count; // Control 01 = long backreference if (cr.read()) { // The bits stored in the data stream are AAAAABBBCCCCCCCC, which we // rearrange into offset = CCCCCCCCAAAAA and size = BBB. uint16_t a = r.get_u8(); a |= (r.get_u8() << 8); offset = (a >> 3) | (~0x1FFF); // If offset is zero, it's a stop opcode if (offset == ~0x1FFF) { break; } // If the size field is zero, it's an extended backreference (size comes // from another byte in the data stream) count = (a & 7) ? ((a & 7) + 2) : (r.get_u8() + 1); // Control 00 = short backreference } else { // Count comes from 2 bits in the control stream instead of from the // data stream (and 2 is added). Importantly, the control stream bits // are read first - this may involve reading another control stream // byte, which happens before the offset is read from the data stream. count = cr.read() << 1; count = (count | cr.read()) + 2; offset = r.get_u8() | (~0xFF); } // Copy bytes from the referenced location in the output. Importantly, // copy only one byte at a time, in order to support ranges that cover the // current end of the output. size_t read_offset = w.size() + offset; if (read_offset >= w.size()) { throw runtime_error("backreference offset beyond beginning of output"); } for (size_t z = 0; z < count; z++) { if (max_output_size && w.size() == max_output_size) { throw runtime_error("maximum output size exceeded"); } w.put_u8(w.str()[read_offset + z]); } } } return std::move(w.str()); } string prs_decompress(const string& data, size_t max_output_size) { return prs_decompress(data.data(), data.size(), max_output_size); } size_t prs_decompress_size(const void* data, size_t size, size_t max_output_size) { size_t ret = 0; StringReader r(data, size); ControlStreamReader cr(r); while (!r.eof()) { if (cr.read()) { ret++; r.get_u8(); } else { ssize_t offset; size_t count; if (cr.read()) { uint16_t a = r.get_u8(); a |= (r.get_u8() << 8); offset = (a >> 3) | (~0x1FFF); if (offset == ~0x1FFF) { break; } count = (a & 7) ? ((a & 7) + 2) : (r.get_u8() + 1); } else { count = cr.read() << 1; count = (count | cr.read()) + 2; offset = r.get_u8() | (~0xFF); } size_t read_offset = ret + offset; if (read_offset >= ret) { throw runtime_error("backreference offset beyond beginning of output"); } ret += count; } if (max_output_size && ret > max_output_size) { throw runtime_error("maximum output size exceeded"); } } return ret; } size_t prs_decompress_size(const string& data, size_t max_output_size) { return prs_decompress_size(data.data(), data.size(), max_output_size); } void prs_disassemble(FILE* stream, const void* data, size_t size) { size_t output_bytes = 0; StringReader r(data, size); ControlStreamReader cr(r); while (!r.eof()) { size_t r_offset = r.where(); uint8_t buffered_bits = cr.buffered_bits(); size_t input_bits = 8 * r_offset + (buffered_bits ? (8 - buffered_bits) : 0); if (cr.read()) { fprintf(stream, "[%zX / %zX => %zX] literal %02hhX\n", r_offset, input_bits, output_bytes, r.get_u8()); output_bytes++; } else { ssize_t offset; size_t count; bool is_long_copy = cr.read(); if (is_long_copy) { uint16_t a = r.get_u8(); a |= (r.get_u8() << 8); offset = (a >> 3) | (~0x1FFF); if (offset == ~0x1FFF) { fprintf(stream, "[%zX / %zX => %zX] end\n", r_offset, input_bits, output_bytes); break; } count = (a & 7) ? ((a & 7) + 2) : (r.get_u8() + 1); } else { count = cr.read() << 1; count = (count | cr.read()) + 2; offset = r.get_u8() | (~0xFF); } size_t read_offset = output_bytes + offset; fprintf(stream, "[%zX / %zX => %zX] %s copy -%zX (from %zX) %zX\n", r_offset, input_bits, output_bytes, is_long_copy ? "long" : "short", -offset, read_offset, count); if (read_offset >= output_bytes) { throw runtime_error("backreference offset beyond beginning of output"); } output_bytes += count; } } } void prs_disassemble(FILE* stream, const std::string& data) { return prs_disassemble(stream, data.data(), data.size()); } // BC0 is a compression algorithm fairly similar to PRS, but with a simpler set // of commands. Like PRS, there is a control stream, indicating when to copy a // literal byte from the input and when to copy from a backreference; unlike // PRS, there is only one type of backreference. Also, there is no stop opcode; // the decompressor simply stops when there are no more input bytes to read. // TODO: bc0_compress produces slightly larger output than Sega's compressor. // Reverse-engineer their implementation and fix this. template struct LZSSInterleavedWriter { StringWriter w; parray buf; size_t buf_offset; uint8_t next_control_bit; LZSSInterleavedWriter() : buf(0), buf_offset(1), next_control_bit(1) {} void flush_if_ready() { if (this->next_control_bit == 0) { this->w.write(this->buf.data(), this->buf_offset); this->buf[0] = 0; this->buf_offset = 1; this->next_control_bit = 1; } } std::string&& close() { if (this->buf_offset > 1 || this->next_control_bit != 1) { this->w.write(this->buf.data(), this->buf_offset); } return std::move(this->w.str()); } void write_control(bool v) { if (this->next_control_bit == 0) { throw logic_error("write_control called with no space to write"); } if (v) { this->buf[0] |= this->next_control_bit; } this->next_control_bit <<= 1; } void write_data(uint8_t v) { this->buf[this->buf_offset++] = v; } size_t size() const { return this->w.size() + this->buf_offset; } }; string bc0_compress(const string& data, function progress_fn) { return bc0_compress(data.data(), data.size(), progress_fn); } string bc0_compress(const void* in_data_v, size_t in_size, function progress_fn) { const uint8_t* in_data = reinterpret_cast(in_data_v); LZSSInterleavedWriter<2> w; size_t read_offset = 0; // The data structure we want is a binaary-searchable set of all strings // starting at all possible offsets within the sliding window, and we need // to be able to search lexicographically but insert and delete by offset. // A std::map would accomplish this, but would be // horrendously inefficient: we'd have to copy strings far too much. We can // solve this by instead storing the offset of each string as keys in a set // and using a custom comparator to treat them as references to binary // strings within the data. auto set_comparator = [&](size_t a, size_t b) -> bool { size_t max_length = min(0x12, in_size - max(a, b)); size_t end_a = a + max_length; for (; a < end_a; a++, b++) { uint8_t data_a = static_cast(in_data[a]); uint8_t data_b = static_cast(in_data[b]); if (data_a < data_b) { return true; // a comes before b lexicographically } else if (data_a > data_b) { return false; // a comes after b lexicographically } } return a < b; // Maximum-length match; order them by offset }; multiset> window_index(set_comparator); auto get_match_length = [&](size_t a, size_t b) -> size_t { size_t ret = 0; while ((ret < 0x12) && (a + ret < in_size) && (b + ret < in_size) && (in_data[a + ret] == in_data[b + ret])) { ret++; } return ret; }; size_t last_progress_fn_call_offset = 0; while (read_offset < in_size) { if (progress_fn && ((last_progress_fn_call_offset & ~0xFFF) != (read_offset & ~0xFFF))) { last_progress_fn_call_offset = read_offset; progress_fn(read_offset, w.size()); } // Find the best match from the index. It's unlikely that we'll get an // exact match, so check the entry before the lower_bound result too. size_t match_offset = SIZE_T_MAX; size_t match_size = 0; // string hex_search_data = format_data_string(data.substr(read_offset, 0x12)); // fprintf(stderr, "[%zX] match SEARCH %s\n", read_offset, hex_search_data.c_str()); auto match_it = window_index.lower_bound(read_offset); if (match_it != window_index.end()) { match_offset = *match_it; match_size = get_match_length(read_offset, match_offset); // fprintf(stderr, "[%zX] match AFTER %zX %zX\n", read_offset, match_offset, match_size); } if (match_it != window_index.begin()) { match_it--; size_t before_match_offset = *match_it; size_t before_match_size = get_match_length(read_offset, before_match_offset); // fprintf(stderr, "[%zX] match BEFORE %zX %zX\n", read_offset, before_match_offset, before_match_size); if (before_match_size > match_size) { match_offset = before_match_offset; match_size = before_match_size; } } // fprintf(stderr, "[%zX] match OVERALL %zX %zX\n", read_offset, match_offset, match_size); if (match_size < 3) { match_size = 1; } // Write a backreference if a match was found; otherwise, write a literal if (match_size >= 3) { w.write_control(false); size_t memo_offset = match_offset - 0x12; w.write_data(memo_offset & 0xFF); w.write_data(((memo_offset >> 4) & 0xF0) | (match_size - 3)); // fprintf(stderr, "[%zX] backreference %03zX %zX\n", read_offset, memo_offset, match_size); } else { w.write_control(true); w.write_data(in_data[read_offset]); // fprintf(stderr, "[%zX] literal %02hhX\n", read_offset, data[read_offset]); } w.flush_if_ready(); // Update the index and advance read_offset for (size_t z = 0; z < match_size; z++, read_offset++) { if (read_offset >= 0x1000) { window_index.erase(read_offset - 0x1000); } window_index.emplace(read_offset); // fprintf(stderr, "[%zX] Index state updated (%zX):\n", read_offset, window_index.size()); // for (size_t it : window_index) { // string index_data = data.substr(it, 0x12); // string hex_data = format_data_string(index_data); // fprintf(stderr, "[%zX] %05zX => %s\n", read_offset, it, hex_data.c_str()); // } } } return std::move(w.close()); } // The BC0 decompression implementation in PSO GC is vulnerable to overflow // attacks - there is no bounds checking on the output buffer. It is unlikely // that this can be usefully exploited (e.g. for RCE) because the output pointer // is loaded from memory before every byte is written, so we cannot change the // output pointer to any arbitrary address. string bc0_decompress(const string& data) { return bc0_decompress(data.data(), data.size()); } string bc0_decompress(const void* data, size_t size) { StringReader r(data, size); StringWriter w; // Unlike PRS, BC0 uses a memo which "rolls over" every 0x1000 bytes. The // boundaries of these "memo pages" are offset by -0x12 bytes for some reason, // so the first output byte corresponds to position 0xFEE on the first memo // page. Backreferences refer to offsets based on the start of memo pages; for // example, if the current output offset is 0x1234, a backreference with // offset 0x123 refers to the byte that was written at offset 0x1112 (because // that byte is at offset 0x112 in the memo, because the memo rolls over every // 0x1000 bytes and the first memo byte was 0x12 bytes before the beginning of // the next page). The memo is initially zeroed from 0 to 0xFEE; it seems PSO // GC doesn't initialize the last 0x12 bytes of the first memo page. For this // reason, we avoid generating backreferences that refer to those bytes. parray memo; uint16_t memo_offset = 0x0FEE; // The low byte of this value contains the control stream data; the high bits // specify which low bits are valid. When the last 1 is shifted out of the // high byte, we need to read a new control stream byte to get the next set of // control bits. uint16_t control_stream_bits = 0x0000; while (!r.eof()) { // Read control stream bits if needed control_stream_bits >>= 1; if ((control_stream_bits & 0x100) == 0) { control_stream_bits = 0xFF00 | r.get_u8(); if (r.eof()) { break; } } // Control bit 0 means to perform a backreference copy. The offset and // size are stored in two bytes in the input stream, laid out as follows: // a1 = 0bBBBBBBBB // a2 = 0bAAAACCCC // The offset is the concatenation of bits AAAABBBBBBBB, which refers to a // position in the memo; the number of bytes to copy is (CCCC + 3). The // decompressor copies that many bytes from that offset in the memo, and // writes them to the output and to the current position in the memo. if ((control_stream_bits & 1) == 0) { uint8_t a1 = r.get_u8(); if (r.eof()) { break; } uint8_t a2 = r.get_u8(); size_t count = (a2 & 0x0F) + 3; size_t backreference_offset = a1 | ((a2 << 4) & 0xF00); for (size_t z = 0; z < count; z++) { uint8_t v = memo[(backreference_offset + z) & 0x0FFF]; w.put_u8(v); memo[memo_offset] = v; memo_offset = (memo_offset + 1) & 0x0FFF; } // Control bit 1 means to write a byte directly from the input to the // output. As above, the byte is also written to the memo. } else { uint8_t v = r.get_u8(); w.put_u8(v); memo[memo_offset] = v; memo_offset = (memo_offset + 1) & 0x0FFF; } } return std::move(w.str()); } void bc0_disassemble(FILE* stream, const string& data) { bc0_disassemble(stream, data.data(), data.size()); } void bc0_disassemble(FILE* stream, const void* data, size_t size) { StringReader r(data, size); uint16_t control_stream_bits = 0x0000; size_t output_bytes = 0; while (!r.eof()) { // size_t opcode_offset = r.where(); control_stream_bits >>= 1; if ((control_stream_bits & 0x100) == 0) { control_stream_bits = 0xFF00 | r.get_u8(); if (r.eof()) { break; } } if ((control_stream_bits & 1) == 0) { uint8_t a1 = r.get_u8(); if (r.eof()) { break; } (void)a1; uint8_t a2 = r.get_u8(); size_t count = (a2 & 0x0F) + 3; // size_t backreference_offset = a1 | ((a2 << 4) & 0xF00); fprintf(stream, "[%zX] backreference %02zX\n", output_bytes, count); output_bytes += count; } else { fprintf(stream, "[%zX] literal %02hhX\n", output_bytes, r.get_u8()); output_bytes++; } } }