diff --git a/src/Compression.cc b/src/Compression.cc index 80243481..ace24499 100644 --- a/src/Compression.cc +++ b/src/Compression.cc @@ -375,14 +375,44 @@ size_t prs_decompress_size(const string& data, size_t max_output_size) { +// BC0 is a compression algorithm fairly similar to PRS, but with a simpler set +// of commands. Like PRS, there is a control stream, indicating when to copy a +// literal byte from the input and when to copy from a backreference; unlike +// PRS, there is only one type of backreference. Also, there is no stop opcode; +// the decompressor simply stops when there are no more input bytes to read. + +// The BC0 decompression implementation in PSO GC is vulnerable to overflow +// attacks - there is no bounds checking on the output buffer. It is unlikely +// that this can be usefully exploited (e.g. for RCE) because the output pointer +// is checked before every byte is written, so we cannot change the output +// pointer to any arbitrary address. + string bc0_decompress(const string& data) { StringReader r(data); StringWriter w; + // Unlike PRS, BC0 uses a memo which "rolls over" every 0x1000 bytes. The + // boundaries of these "memo pages" are offset by -0x12 bytes for some reason, + // so the first output byte corresponds to position 0xFEE on the first memo + // page. Backreferences refer to offsets based on the start of memo pages; for + // example, if the current output offset is 0x1234, a backreference with + // offset 0x123 refers to the byte that was written at offset 0x1112 (because + // that byte is at offset 0x112 in the memo, because the memo rolls over every + // 0x1000 bytes and the first memo byte was 0x12 bytes before the beginning of + // the next page). The memo is initially zeroed from 0 to 0xFEE; it seems PSO + // GC doesn't initialize the last 0x12 bytes of the first memo page. (Here, we + // implicitly initialize them with zeroes.) parray memo; uint16_t memo_offset = 0x0FEE; + + // The low byte of this value contains the control stream data; the high bits + // specify which low bits are valid. When the last FF is shifted out of the + // high bit, we need to read a new control stream byte to get the next set of + // control bits. uint16_t control_stream_bits = 0x0000; + while (!r.eof()) { + // Read control stream bits if needed control_stream_bits >>= 1; if ((control_stream_bits & 0x100) == 0) { control_stream_bits = 0xFF00 | r.get_u8(); @@ -390,19 +420,32 @@ string bc0_decompress(const string& data) { break; } } + + // Control bit 0 means to perform a backreference copy. The offset and + // length are stored in two bytes in the input stream, laid out as follows: + // a1 = 0bBBBBBBBB + // a2 = 0bAAAACCCC + // The offset is the concatenation of bits AAAABBBBBBBB, which refers to a + // position in the memo; the number of bytes to copy is (C + 3). The + // decompressor copies that many bytes from that offset in the memo, and + // writes them to the output and to the current position in the memo. if ((control_stream_bits & 1) == 0) { uint8_t a1 = r.get_u8(); if (r.eof()) { break; } uint8_t a2 = r.get_u8(); - size_t count = (a2 & 0x0F) + 2; - for (size_t z = 0; z <= count; z++) { - uint8_t v = memo[((a1 | ((a2 << 4) & 0xF00)) + z) & 0x0FFF]; + size_t count = (a2 & 0x0F) + 3; + size_t backreference_offset = a1 | ((a2 << 4) & 0xF00); + for (size_t z = 0; z < count; z++) { + uint8_t v = memo[(backreference_offset + z) & 0x0FFF]; w.put_u8(v); memo[memo_offset] = v; memo_offset = (memo_offset + 1) & 0x0FFF; } + + // Control stream 1 means to write a byte directly from the input to the + // output. As above, the byte is also written to the memo. } else { uint8_t v = r.get_u8(); w.put_u8(v); @@ -413,3 +456,91 @@ string bc0_decompress(const string& data) { return move(w.str()); } + + + +string bc0_compress(const string& data) { + StringReader r(data); + StringWriter w; + + parray memo; + uint16_t memo_offset = 0x0FEE; + + size_t next_control_byte_offset = w.size(); + w.put_u8(0); + uint16_t pending_control_bits = 0x0000; + + parray match_buf; + while (!r.eof()) { + // Search in the memo for the longest string matching the upcoming data, of + // length 3-17 bytes + size_t best_match_offset = 0; + size_t best_match_length = 0; + size_t max_match_length = min(r.remaining(), 17); + r.readx(match_buf.data(), max_match_length, false); + for (size_t match_length = 3; match_length <= max_match_length; match_length++) { + + // Forbid matches that span the current memo position, or that cover the + // uninitialized part of the memo when the client decompresses + size_t start_offset = (r.where() < 0x12) ? 0 : memo_offset; + size_t end_offset = (memo_offset - match_length + 1) & 0xFFF; + + for (size_t offset = start_offset; offset != end_offset; offset = (offset + 1) & 0xFFF) { + bool match_found = true; + for (size_t z = 0; z < match_length; z++) { + if (match_buf[z] != memo[(offset + z) & 0xFFF]) { + match_found = false; + break; + } + } + // If a match was found at this length, don't bother looking for other + // matches of the same length - one will suffice + if (match_found) { + best_match_length = match_length; + best_match_offset = offset; + break; + } + } + // If no matches were found at the current length, don't bother looking + // for longer matches + if (best_match_length < match_length) { + break; + } + } + + // Write a backreference if a match was found; otherwise write a literal + if (best_match_length >= 3) { + pending_control_bits = (pending_control_bits >> 1) | 0x8000; + w.put_u8(best_match_offset & 0xFF); + w.put_u8(((best_match_offset >> 4) & 0xF0) | (best_match_length - 3)); + for (size_t z = 0; z < best_match_length; z++) { + memo[memo_offset] = r.get_u8(); + memo_offset = (memo_offset + 1) & 0xFFF; + } + } else { + pending_control_bits = (pending_control_bits >> 1) | 0x8080; + uint8_t v = r.get_u8(); + w.put_u8(v); + memo[memo_offset] = v; + memo_offset = (memo_offset + 1) & 0xFFF; + } + + // Write control byte if needed + if (pending_control_bits & 0x0100) { + w.pput_u8(next_control_byte_offset, pending_control_bits & 0xFF); + next_control_byte_offset = w.size(); + w.put_u8(0); + pending_control_bits = 0x0000; + } + } + + // Write the final control byte if needed + if (pending_control_bits & 0xFF00) { + while (!(pending_control_bits & 0x0100)) { + pending_control_bits >>= 1; + } + w.pput_u8(next_control_byte_offset, pending_control_bits & 0xFF); + } + + return move(w.str()); +} diff --git a/src/Compression.hh b/src/Compression.hh index 200cf014..1435d6d2 100644 --- a/src/Compression.hh +++ b/src/Compression.hh @@ -14,3 +14,4 @@ std::string prs_decompress(const std::string& data, size_t max_output_size = 0); size_t prs_decompress_size(const std::string& data, size_t max_output_size = 0); std::string bc0_decompress(const std::string& data); +std::string bc0_compress(const std::string& data); diff --git a/src/Main.cc b/src/Main.cc index 46d5ad5d..4a277bfa 100644 --- a/src/Main.cc +++ b/src/Main.cc @@ -231,11 +231,10 @@ similarly, if output-filename is missing or is '-', newserv writes to stdout.\n\ The options are:\n\ --compress-prs\n\ --decompress-prs\n\ - Compress or decompress data using the PRS algorithm. Both input-filename\n\ - and output-filename may be specified.\n\ + --compress-bc0 [input-filename [output-filename]]\n\ --decompress-bc0 [input-filename [output-filename]]\n\ - Decompress data using the BC0 algorithm. Both input-filename and\n\ - output-filename may be specified.\n\ + Compress or decompress data using the PRS or BC0 algorithms. Both\n\ + input-filename and output-filename may be specified.\n\ --encrypt-data\n\ --decrypt-data\n\ Encrypt or decrypt data using PSO's standard network protocol encryption.\n\ @@ -312,6 +311,7 @@ enum class Behavior { RUN_SERVER = 0, COMPRESS_PRS, DECOMPRESS_PRS, + COMPRESS_BC0, DECOMPRESS_BC0, ENCRYPT_DATA, DECRYPT_DATA, @@ -327,6 +327,7 @@ enum class Behavior { static bool behavior_takes_input_filename(Behavior b) { return (b == Behavior::COMPRESS_PRS) || (b == Behavior::DECOMPRESS_PRS) || + (b == Behavior::COMPRESS_BC0) || (b == Behavior::DECOMPRESS_BC0) || (b == Behavior::ENCRYPT_DATA) || (b == Behavior::DECRYPT_DATA) || @@ -339,6 +340,7 @@ static bool behavior_takes_input_filename(Behavior b) { static bool behavior_takes_output_filename(Behavior b) { return (b == Behavior::COMPRESS_PRS) || (b == Behavior::DECOMPRESS_PRS) || + (b == Behavior::COMPRESS_BC0) || (b == Behavior::DECOMPRESS_BC0) || (b == Behavior::ENCRYPT_DATA) || (b == Behavior::DECRYPT_DATA) || @@ -378,6 +380,8 @@ int main(int argc, char** argv) { behavior = Behavior::COMPRESS_PRS; } else if (!strcmp(argv[x], "--decompress-prs")) { behavior = Behavior::DECOMPRESS_PRS; + } else if (!strcmp(argv[x], "--compress-bc0")) { + behavior = Behavior::COMPRESS_BC0; } else if (!strcmp(argv[x], "--decompress-bc0")) { behavior = Behavior::DECOMPRESS_BC0; } else if (!strcmp(argv[x], "--encrypt-data")) { @@ -479,6 +483,7 @@ int main(int argc, char** argv) { switch (behavior) { case Behavior::COMPRESS_PRS: case Behavior::DECOMPRESS_PRS: + case Behavior::COMPRESS_BC0: case Behavior::DECOMPRESS_BC0: { string data = read_input_data(); if (parse_data) { @@ -490,6 +495,8 @@ int main(int argc, char** argv) { data = prs_compress(data); } else if (behavior == Behavior::DECOMPRESS_PRS) { data = prs_decompress(data); + } else if (behavior == Behavior::COMPRESS_BC0) { + data = bc0_compress(data); } else if (behavior == Behavior::DECOMPRESS_BC0) { data = bc0_decompress(data); } else {