write bc0 compressor

This commit is contained in:
Martin Michelsen
2022-10-10 12:20:31 -07:00
parent 0033cb2eda
commit 4f63d3672e
3 changed files with 146 additions and 7 deletions
+134 -3
View File
@@ -375,14 +375,44 @@ size_t prs_decompress_size(const string& data, size_t max_output_size) {
// BC0 is a compression algorithm fairly similar to PRS, but with a simpler set
// of commands. Like PRS, there is a control stream, indicating when to copy a
// literal byte from the input and when to copy from a backreference; unlike
// PRS, there is only one type of backreference. Also, there is no stop opcode;
// the decompressor simply stops when there are no more input bytes to read.
// The BC0 decompression implementation in PSO GC is vulnerable to overflow
// attacks - there is no bounds checking on the output buffer. It is unlikely
// that this can be usefully exploited (e.g. for RCE) because the output pointer
// is checked before every byte is written, so we cannot change the output
// pointer to any arbitrary address.
string bc0_decompress(const string& data) {
StringReader r(data);
StringWriter w;
// Unlike PRS, BC0 uses a memo which "rolls over" every 0x1000 bytes. The
// boundaries of these "memo pages" are offset by -0x12 bytes for some reason,
// so the first output byte corresponds to position 0xFEE on the first memo
// page. Backreferences refer to offsets based on the start of memo pages; for
// example, if the current output offset is 0x1234, a backreference with
// offset 0x123 refers to the byte that was written at offset 0x1112 (because
// that byte is at offset 0x112 in the memo, because the memo rolls over every
// 0x1000 bytes and the first memo byte was 0x12 bytes before the beginning of
// the next page). The memo is initially zeroed from 0 to 0xFEE; it seems PSO
// GC doesn't initialize the last 0x12 bytes of the first memo page. (Here, we
// implicitly initialize them with zeroes.)
parray<uint8_t, 0x1000> memo;
uint16_t memo_offset = 0x0FEE;
// The low byte of this value contains the control stream data; the high bits
// specify which low bits are valid. When the last FF is shifted out of the
// high bit, we need to read a new control stream byte to get the next set of
// control bits.
uint16_t control_stream_bits = 0x0000;
while (!r.eof()) {
// Read control stream bits if needed
control_stream_bits >>= 1;
if ((control_stream_bits & 0x100) == 0) {
control_stream_bits = 0xFF00 | r.get_u8();
@@ -390,19 +420,32 @@ string bc0_decompress(const string& data) {
break;
}
}
// Control bit 0 means to perform a backreference copy. The offset and
// length are stored in two bytes in the input stream, laid out as follows:
// a1 = 0bBBBBBBBB
// a2 = 0bAAAACCCC
// The offset is the concatenation of bits AAAABBBBBBBB, which refers to a
// position in the memo; the number of bytes to copy is (C + 3). The
// decompressor copies that many bytes from that offset in the memo, and
// writes them to the output and to the current position in the memo.
if ((control_stream_bits & 1) == 0) {
uint8_t a1 = r.get_u8();
if (r.eof()) {
break;
}
uint8_t a2 = r.get_u8();
size_t count = (a2 & 0x0F) + 2;
for (size_t z = 0; z <= count; z++) {
uint8_t v = memo[((a1 | ((a2 << 4) & 0xF00)) + z) & 0x0FFF];
size_t count = (a2 & 0x0F) + 3;
size_t backreference_offset = a1 | ((a2 << 4) & 0xF00);
for (size_t z = 0; z < count; z++) {
uint8_t v = memo[(backreference_offset + z) & 0x0FFF];
w.put_u8(v);
memo[memo_offset] = v;
memo_offset = (memo_offset + 1) & 0x0FFF;
}
// Control stream 1 means to write a byte directly from the input to the
// output. As above, the byte is also written to the memo.
} else {
uint8_t v = r.get_u8();
w.put_u8(v);
@@ -413,3 +456,91 @@ string bc0_decompress(const string& data) {
return move(w.str());
}
string bc0_compress(const string& data) {
StringReader r(data);
StringWriter w;
parray<uint8_t, 0x1000> memo;
uint16_t memo_offset = 0x0FEE;
size_t next_control_byte_offset = w.size();
w.put_u8(0);
uint16_t pending_control_bits = 0x0000;
parray<uint8_t, 17> match_buf;
while (!r.eof()) {
// Search in the memo for the longest string matching the upcoming data, of
// length 3-17 bytes
size_t best_match_offset = 0;
size_t best_match_length = 0;
size_t max_match_length = min<size_t>(r.remaining(), 17);
r.readx(match_buf.data(), max_match_length, false);
for (size_t match_length = 3; match_length <= max_match_length; match_length++) {
// Forbid matches that span the current memo position, or that cover the
// uninitialized part of the memo when the client decompresses
size_t start_offset = (r.where() < 0x12) ? 0 : memo_offset;
size_t end_offset = (memo_offset - match_length + 1) & 0xFFF;
for (size_t offset = start_offset; offset != end_offset; offset = (offset + 1) & 0xFFF) {
bool match_found = true;
for (size_t z = 0; z < match_length; z++) {
if (match_buf[z] != memo[(offset + z) & 0xFFF]) {
match_found = false;
break;
}
}
// If a match was found at this length, don't bother looking for other
// matches of the same length - one will suffice
if (match_found) {
best_match_length = match_length;
best_match_offset = offset;
break;
}
}
// If no matches were found at the current length, don't bother looking
// for longer matches
if (best_match_length < match_length) {
break;
}
}
// Write a backreference if a match was found; otherwise write a literal
if (best_match_length >= 3) {
pending_control_bits = (pending_control_bits >> 1) | 0x8000;
w.put_u8(best_match_offset & 0xFF);
w.put_u8(((best_match_offset >> 4) & 0xF0) | (best_match_length - 3));
for (size_t z = 0; z < best_match_length; z++) {
memo[memo_offset] = r.get_u8();
memo_offset = (memo_offset + 1) & 0xFFF;
}
} else {
pending_control_bits = (pending_control_bits >> 1) | 0x8080;
uint8_t v = r.get_u8();
w.put_u8(v);
memo[memo_offset] = v;
memo_offset = (memo_offset + 1) & 0xFFF;
}
// Write control byte if needed
if (pending_control_bits & 0x0100) {
w.pput_u8(next_control_byte_offset, pending_control_bits & 0xFF);
next_control_byte_offset = w.size();
w.put_u8(0);
pending_control_bits = 0x0000;
}
}
// Write the final control byte if needed
if (pending_control_bits & 0xFF00) {
while (!(pending_control_bits & 0x0100)) {
pending_control_bits >>= 1;
}
w.pput_u8(next_control_byte_offset, pending_control_bits & 0xFF);
}
return move(w.str());
}
+1
View File
@@ -14,3 +14,4 @@ std::string prs_decompress(const std::string& data, size_t max_output_size = 0);
size_t prs_decompress_size(const std::string& data, size_t max_output_size = 0);
std::string bc0_decompress(const std::string& data);
std::string bc0_compress(const std::string& data);
+11 -4
View File
@@ -231,11 +231,10 @@ similarly, if output-filename is missing or is '-', newserv writes to stdout.\n\
The options are:\n\
--compress-prs\n\
--decompress-prs\n\
Compress or decompress data using the PRS algorithm. Both input-filename\n\
and output-filename may be specified.\n\
--compress-bc0 [input-filename [output-filename]]\n\
--decompress-bc0 [input-filename [output-filename]]\n\
Decompress data using the BC0 algorithm. Both input-filename and\n\
output-filename may be specified.\n\
Compress or decompress data using the PRS or BC0 algorithms. Both\n\
input-filename and output-filename may be specified.\n\
--encrypt-data\n\
--decrypt-data\n\
Encrypt or decrypt data using PSO's standard network protocol encryption.\n\
@@ -312,6 +311,7 @@ enum class Behavior {
RUN_SERVER = 0,
COMPRESS_PRS,
DECOMPRESS_PRS,
COMPRESS_BC0,
DECOMPRESS_BC0,
ENCRYPT_DATA,
DECRYPT_DATA,
@@ -327,6 +327,7 @@ enum class Behavior {
static bool behavior_takes_input_filename(Behavior b) {
return (b == Behavior::COMPRESS_PRS) ||
(b == Behavior::DECOMPRESS_PRS) ||
(b == Behavior::COMPRESS_BC0) ||
(b == Behavior::DECOMPRESS_BC0) ||
(b == Behavior::ENCRYPT_DATA) ||
(b == Behavior::DECRYPT_DATA) ||
@@ -339,6 +340,7 @@ static bool behavior_takes_input_filename(Behavior b) {
static bool behavior_takes_output_filename(Behavior b) {
return (b == Behavior::COMPRESS_PRS) ||
(b == Behavior::DECOMPRESS_PRS) ||
(b == Behavior::COMPRESS_BC0) ||
(b == Behavior::DECOMPRESS_BC0) ||
(b == Behavior::ENCRYPT_DATA) ||
(b == Behavior::DECRYPT_DATA) ||
@@ -378,6 +380,8 @@ int main(int argc, char** argv) {
behavior = Behavior::COMPRESS_PRS;
} else if (!strcmp(argv[x], "--decompress-prs")) {
behavior = Behavior::DECOMPRESS_PRS;
} else if (!strcmp(argv[x], "--compress-bc0")) {
behavior = Behavior::COMPRESS_BC0;
} else if (!strcmp(argv[x], "--decompress-bc0")) {
behavior = Behavior::DECOMPRESS_BC0;
} else if (!strcmp(argv[x], "--encrypt-data")) {
@@ -479,6 +483,7 @@ int main(int argc, char** argv) {
switch (behavior) {
case Behavior::COMPRESS_PRS:
case Behavior::DECOMPRESS_PRS:
case Behavior::COMPRESS_BC0:
case Behavior::DECOMPRESS_BC0: {
string data = read_input_data();
if (parse_data) {
@@ -490,6 +495,8 @@ int main(int argc, char** argv) {
data = prs_compress(data);
} else if (behavior == Behavior::DECOMPRESS_PRS) {
data = prs_decompress(data);
} else if (behavior == Behavior::COMPRESS_BC0) {
data = bc0_compress(data);
} else if (behavior == Behavior::DECOMPRESS_BC0) {
data = bc0_decompress(data);
} else {