write bc0 compressor
This commit is contained in:
+134
-3
@@ -375,14 +375,44 @@ size_t prs_decompress_size(const string& data, size_t max_output_size) {
|
||||
|
||||
|
||||
|
||||
// BC0 is a compression algorithm fairly similar to PRS, but with a simpler set
|
||||
// of commands. Like PRS, there is a control stream, indicating when to copy a
|
||||
// literal byte from the input and when to copy from a backreference; unlike
|
||||
// PRS, there is only one type of backreference. Also, there is no stop opcode;
|
||||
// the decompressor simply stops when there are no more input bytes to read.
|
||||
|
||||
// The BC0 decompression implementation in PSO GC is vulnerable to overflow
|
||||
// attacks - there is no bounds checking on the output buffer. It is unlikely
|
||||
// that this can be usefully exploited (e.g. for RCE) because the output pointer
|
||||
// is checked before every byte is written, so we cannot change the output
|
||||
// pointer to any arbitrary address.
|
||||
|
||||
string bc0_decompress(const string& data) {
|
||||
StringReader r(data);
|
||||
StringWriter w;
|
||||
|
||||
// Unlike PRS, BC0 uses a memo which "rolls over" every 0x1000 bytes. The
|
||||
// boundaries of these "memo pages" are offset by -0x12 bytes for some reason,
|
||||
// so the first output byte corresponds to position 0xFEE on the first memo
|
||||
// page. Backreferences refer to offsets based on the start of memo pages; for
|
||||
// example, if the current output offset is 0x1234, a backreference with
|
||||
// offset 0x123 refers to the byte that was written at offset 0x1112 (because
|
||||
// that byte is at offset 0x112 in the memo, because the memo rolls over every
|
||||
// 0x1000 bytes and the first memo byte was 0x12 bytes before the beginning of
|
||||
// the next page). The memo is initially zeroed from 0 to 0xFEE; it seems PSO
|
||||
// GC doesn't initialize the last 0x12 bytes of the first memo page. (Here, we
|
||||
// implicitly initialize them with zeroes.)
|
||||
parray<uint8_t, 0x1000> memo;
|
||||
uint16_t memo_offset = 0x0FEE;
|
||||
|
||||
// The low byte of this value contains the control stream data; the high bits
|
||||
// specify which low bits are valid. When the last FF is shifted out of the
|
||||
// high bit, we need to read a new control stream byte to get the next set of
|
||||
// control bits.
|
||||
uint16_t control_stream_bits = 0x0000;
|
||||
|
||||
while (!r.eof()) {
|
||||
// Read control stream bits if needed
|
||||
control_stream_bits >>= 1;
|
||||
if ((control_stream_bits & 0x100) == 0) {
|
||||
control_stream_bits = 0xFF00 | r.get_u8();
|
||||
@@ -390,19 +420,32 @@ string bc0_decompress(const string& data) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Control bit 0 means to perform a backreference copy. The offset and
|
||||
// length are stored in two bytes in the input stream, laid out as follows:
|
||||
// a1 = 0bBBBBBBBB
|
||||
// a2 = 0bAAAACCCC
|
||||
// The offset is the concatenation of bits AAAABBBBBBBB, which refers to a
|
||||
// position in the memo; the number of bytes to copy is (C + 3). The
|
||||
// decompressor copies that many bytes from that offset in the memo, and
|
||||
// writes them to the output and to the current position in the memo.
|
||||
if ((control_stream_bits & 1) == 0) {
|
||||
uint8_t a1 = r.get_u8();
|
||||
if (r.eof()) {
|
||||
break;
|
||||
}
|
||||
uint8_t a2 = r.get_u8();
|
||||
size_t count = (a2 & 0x0F) + 2;
|
||||
for (size_t z = 0; z <= count; z++) {
|
||||
uint8_t v = memo[((a1 | ((a2 << 4) & 0xF00)) + z) & 0x0FFF];
|
||||
size_t count = (a2 & 0x0F) + 3;
|
||||
size_t backreference_offset = a1 | ((a2 << 4) & 0xF00);
|
||||
for (size_t z = 0; z < count; z++) {
|
||||
uint8_t v = memo[(backreference_offset + z) & 0x0FFF];
|
||||
w.put_u8(v);
|
||||
memo[memo_offset] = v;
|
||||
memo_offset = (memo_offset + 1) & 0x0FFF;
|
||||
}
|
||||
|
||||
// Control stream 1 means to write a byte directly from the input to the
|
||||
// output. As above, the byte is also written to the memo.
|
||||
} else {
|
||||
uint8_t v = r.get_u8();
|
||||
w.put_u8(v);
|
||||
@@ -413,3 +456,91 @@ string bc0_decompress(const string& data) {
|
||||
|
||||
return move(w.str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
string bc0_compress(const string& data) {
|
||||
StringReader r(data);
|
||||
StringWriter w;
|
||||
|
||||
parray<uint8_t, 0x1000> memo;
|
||||
uint16_t memo_offset = 0x0FEE;
|
||||
|
||||
size_t next_control_byte_offset = w.size();
|
||||
w.put_u8(0);
|
||||
uint16_t pending_control_bits = 0x0000;
|
||||
|
||||
parray<uint8_t, 17> match_buf;
|
||||
while (!r.eof()) {
|
||||
// Search in the memo for the longest string matching the upcoming data, of
|
||||
// length 3-17 bytes
|
||||
size_t best_match_offset = 0;
|
||||
size_t best_match_length = 0;
|
||||
size_t max_match_length = min<size_t>(r.remaining(), 17);
|
||||
r.readx(match_buf.data(), max_match_length, false);
|
||||
for (size_t match_length = 3; match_length <= max_match_length; match_length++) {
|
||||
|
||||
// Forbid matches that span the current memo position, or that cover the
|
||||
// uninitialized part of the memo when the client decompresses
|
||||
size_t start_offset = (r.where() < 0x12) ? 0 : memo_offset;
|
||||
size_t end_offset = (memo_offset - match_length + 1) & 0xFFF;
|
||||
|
||||
for (size_t offset = start_offset; offset != end_offset; offset = (offset + 1) & 0xFFF) {
|
||||
bool match_found = true;
|
||||
for (size_t z = 0; z < match_length; z++) {
|
||||
if (match_buf[z] != memo[(offset + z) & 0xFFF]) {
|
||||
match_found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If a match was found at this length, don't bother looking for other
|
||||
// matches of the same length - one will suffice
|
||||
if (match_found) {
|
||||
best_match_length = match_length;
|
||||
best_match_offset = offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If no matches were found at the current length, don't bother looking
|
||||
// for longer matches
|
||||
if (best_match_length < match_length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Write a backreference if a match was found; otherwise write a literal
|
||||
if (best_match_length >= 3) {
|
||||
pending_control_bits = (pending_control_bits >> 1) | 0x8000;
|
||||
w.put_u8(best_match_offset & 0xFF);
|
||||
w.put_u8(((best_match_offset >> 4) & 0xF0) | (best_match_length - 3));
|
||||
for (size_t z = 0; z < best_match_length; z++) {
|
||||
memo[memo_offset] = r.get_u8();
|
||||
memo_offset = (memo_offset + 1) & 0xFFF;
|
||||
}
|
||||
} else {
|
||||
pending_control_bits = (pending_control_bits >> 1) | 0x8080;
|
||||
uint8_t v = r.get_u8();
|
||||
w.put_u8(v);
|
||||
memo[memo_offset] = v;
|
||||
memo_offset = (memo_offset + 1) & 0xFFF;
|
||||
}
|
||||
|
||||
// Write control byte if needed
|
||||
if (pending_control_bits & 0x0100) {
|
||||
w.pput_u8(next_control_byte_offset, pending_control_bits & 0xFF);
|
||||
next_control_byte_offset = w.size();
|
||||
w.put_u8(0);
|
||||
pending_control_bits = 0x0000;
|
||||
}
|
||||
}
|
||||
|
||||
// Write the final control byte if needed
|
||||
if (pending_control_bits & 0xFF00) {
|
||||
while (!(pending_control_bits & 0x0100)) {
|
||||
pending_control_bits >>= 1;
|
||||
}
|
||||
w.pput_u8(next_control_byte_offset, pending_control_bits & 0xFF);
|
||||
}
|
||||
|
||||
return move(w.str());
|
||||
}
|
||||
|
||||
@@ -14,3 +14,4 @@ std::string prs_decompress(const std::string& data, size_t max_output_size = 0);
|
||||
size_t prs_decompress_size(const std::string& data, size_t max_output_size = 0);
|
||||
|
||||
std::string bc0_decompress(const std::string& data);
|
||||
std::string bc0_compress(const std::string& data);
|
||||
|
||||
+11
-4
@@ -231,11 +231,10 @@ similarly, if output-filename is missing or is '-', newserv writes to stdout.\n\
|
||||
The options are:\n\
|
||||
--compress-prs\n\
|
||||
--decompress-prs\n\
|
||||
Compress or decompress data using the PRS algorithm. Both input-filename\n\
|
||||
and output-filename may be specified.\n\
|
||||
--compress-bc0 [input-filename [output-filename]]\n\
|
||||
--decompress-bc0 [input-filename [output-filename]]\n\
|
||||
Decompress data using the BC0 algorithm. Both input-filename and\n\
|
||||
output-filename may be specified.\n\
|
||||
Compress or decompress data using the PRS or BC0 algorithms. Both\n\
|
||||
input-filename and output-filename may be specified.\n\
|
||||
--encrypt-data\n\
|
||||
--decrypt-data\n\
|
||||
Encrypt or decrypt data using PSO's standard network protocol encryption.\n\
|
||||
@@ -312,6 +311,7 @@ enum class Behavior {
|
||||
RUN_SERVER = 0,
|
||||
COMPRESS_PRS,
|
||||
DECOMPRESS_PRS,
|
||||
COMPRESS_BC0,
|
||||
DECOMPRESS_BC0,
|
||||
ENCRYPT_DATA,
|
||||
DECRYPT_DATA,
|
||||
@@ -327,6 +327,7 @@ enum class Behavior {
|
||||
static bool behavior_takes_input_filename(Behavior b) {
|
||||
return (b == Behavior::COMPRESS_PRS) ||
|
||||
(b == Behavior::DECOMPRESS_PRS) ||
|
||||
(b == Behavior::COMPRESS_BC0) ||
|
||||
(b == Behavior::DECOMPRESS_BC0) ||
|
||||
(b == Behavior::ENCRYPT_DATA) ||
|
||||
(b == Behavior::DECRYPT_DATA) ||
|
||||
@@ -339,6 +340,7 @@ static bool behavior_takes_input_filename(Behavior b) {
|
||||
static bool behavior_takes_output_filename(Behavior b) {
|
||||
return (b == Behavior::COMPRESS_PRS) ||
|
||||
(b == Behavior::DECOMPRESS_PRS) ||
|
||||
(b == Behavior::COMPRESS_BC0) ||
|
||||
(b == Behavior::DECOMPRESS_BC0) ||
|
||||
(b == Behavior::ENCRYPT_DATA) ||
|
||||
(b == Behavior::DECRYPT_DATA) ||
|
||||
@@ -378,6 +380,8 @@ int main(int argc, char** argv) {
|
||||
behavior = Behavior::COMPRESS_PRS;
|
||||
} else if (!strcmp(argv[x], "--decompress-prs")) {
|
||||
behavior = Behavior::DECOMPRESS_PRS;
|
||||
} else if (!strcmp(argv[x], "--compress-bc0")) {
|
||||
behavior = Behavior::COMPRESS_BC0;
|
||||
} else if (!strcmp(argv[x], "--decompress-bc0")) {
|
||||
behavior = Behavior::DECOMPRESS_BC0;
|
||||
} else if (!strcmp(argv[x], "--encrypt-data")) {
|
||||
@@ -479,6 +483,7 @@ int main(int argc, char** argv) {
|
||||
switch (behavior) {
|
||||
case Behavior::COMPRESS_PRS:
|
||||
case Behavior::DECOMPRESS_PRS:
|
||||
case Behavior::COMPRESS_BC0:
|
||||
case Behavior::DECOMPRESS_BC0: {
|
||||
string data = read_input_data();
|
||||
if (parse_data) {
|
||||
@@ -490,6 +495,8 @@ int main(int argc, char** argv) {
|
||||
data = prs_compress(data);
|
||||
} else if (behavior == Behavior::DECOMPRESS_PRS) {
|
||||
data = prs_decompress(data);
|
||||
} else if (behavior == Behavior::COMPRESS_BC0) {
|
||||
data = bc0_compress(data);
|
||||
} else if (behavior == Behavior::DECOMPRESS_BC0) {
|
||||
data = bc0_decompress(data);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user