diff --git a/src/Compression.cc b/src/Compression.cc index 5303a124..fc4e6421 100644 --- a/src/Compression.cc +++ b/src/Compression.cc @@ -216,8 +216,9 @@ string prs_compress( class ControlStreamReader { public: - ControlStreamReader(StringReader& r) : r(r), - bits(0x0000) {} + ControlStreamReader(StringReader& r) + : r(r), + bits(0x0000) {} bool read() { if (!(this->bits & 0x0100)) { diff --git a/src/FunctionCompiler.cc b/src/FunctionCompiler.cc index b6f58047..4da02cea 100644 --- a/src/FunctionCompiler.cc +++ b/src/FunctionCompiler.cc @@ -4,6 +4,7 @@ #include #include +#include #include #ifdef HAVE_RESOURCE_FILE @@ -11,6 +12,7 @@ #endif #include "CommandFormats.hh" +#include "Compression.hh" #include "Loggers.hh" using namespace std; @@ -247,7 +249,8 @@ bool FunctionCodeIndex::patch_menu_empty(uint32_t specific_version) const { return true; } -DOLFileIndex::DOLFileIndex(const string& directory) { +DOLFileIndex::DOLFileIndex(const string& directory, bool compress) + : files_compressed(compress) { if (!function_compiler_available()) { function_compiler_log.info("Function compiler is not available"); return; @@ -274,16 +277,63 @@ DOLFileIndex::DOLFileIndex(const string& directory) { dol->name = name; string path = directory + "/" + filename; - dol->data = load_file(path); + string file_data = load_file(path); + + string description; + if (this->files_compressed) { + uint64_t start = now(); + string compressed = prs_compress(file_data); + StringWriter w; + if (compressed.size() >= file_data.size()) { + w.put_u32b(0); + w.put_u32b(file_data.size()); + w.write(file_data); + } else { + w.put_u32b(compressed.size()); + w.put_u32b(file_data.size()); + w.write(compressed); + } + while (w.size() & 3) { + w.put_u8(0); + } + dol->data = std::move(w.str()); + uint64_t diff = now() - start; + + string orig_size_str = format_size(file_data.size()); + string compressed_size_str = format_size(dol->data.size()); + string time_str = format_duration(diff); + + if (compressed.size() >= file_data.size()) { + function_compiler_log.info("Loaded and compressed DOL file %s (%s -> %s, %s) (inefficient compression; using uncompressed version)", + dol->name.c_str(), orig_size_str.c_str(), compressed_size_str.c_str(), time_str.c_str()); + description = string_printf("$C6%s$C7\n%s", dol->name.c_str(), orig_size_str.c_str()); + } else { + function_compiler_log.info("Loaded and compressed DOL file %s (%s -> %s, %s)", + dol->name.c_str(), orig_size_str.c_str(), compressed_size_str.c_str(), time_str.c_str()); + description = string_printf("$C6%s$C7\n%s\n%s (orig)", dol->name.c_str(), compressed_size_str.c_str(), orig_size_str.c_str()); + } + + } else { + StringWriter w; + w.put_u32b(0); + w.put_u32b(file_data.size()); + w.write(file_data); + while (w.size() & 3) { + w.put_u8(0); + } + dol->data = std::move(w.str()); + + string orig_size_str = format_size(dol->data.size()); + function_compiler_log.info("Loaded DOL file %s (%s)", filename.c_str(), orig_size_str.c_str()); + + description = string_printf("$C6%s$C7\n%s", dol->name.c_str(), orig_size_str.c_str()); + } this->name_to_file.emplace(dol->name, dol); this->item_id_to_file.emplace_back(dol); - string size_str = format_size(dol->data.size()); - string description = string_printf("$C6%s$C7\n%s", dol->name.c_str(), size_str.c_str()); menu->items.emplace_back(dol->menu_item_id, decode_sjis(dol->name), decode_sjis(description), MenuItem::Flag::REQUIRES_SEND_FUNCTION_CALL); - function_compiler_log.info("Loaded DOL file %s", filename.c_str()); } catch (const exception& e) { function_compiler_log.warning("Failed to load DOL file %s: %s", filename.c_str(), e.what()); diff --git a/src/FunctionCompiler.hh b/src/FunctionCompiler.hh index e6f5524f..8e265f71 100644 --- a/src/FunctionCompiler.hh +++ b/src/FunctionCompiler.hh @@ -76,12 +76,13 @@ struct DOLFileIndex { std::string data; }; + bool files_compressed; std::vector> item_id_to_file; std::map> name_to_file; std::shared_ptr menu; DOLFileIndex() = default; - explicit DOLFileIndex(const std::string& directory); + DOLFileIndex(const std::string& directory, bool compress); inline bool empty() const { return this->name_to_file.empty() && this->item_id_to_file.empty(); diff --git a/src/ServerState.cc b/src/ServerState.cc index 132b4c40..0ff12294 100644 --- a/src/ServerState.cc +++ b/src/ServerState.cc @@ -24,6 +24,7 @@ ServerState::ServerState(const char* config_filename, bool is_replay) allow_saving(true), item_tracking_enabled(true), episode_3_send_function_call_enabled(false), + enable_dol_compression(false), catch_handler_exceptions(true), ep3_behavior_flags(0), run_shell_behavior(RunShellBehavior::DEFAULT), @@ -654,6 +655,12 @@ void ServerState::parse_config(shared_ptr config_json) { this->episode_3_send_function_call_enabled = false; } + try { + this->enable_dol_compression = d.at("CompressDOLFiles")->as_bool(); + } catch (const out_of_range&) { + this->enable_dol_compression = false; + } + try { this->catch_handler_exceptions = d.at("CatchHandlerExceptions")->as_bool(); } catch (const out_of_range&) { @@ -860,5 +867,5 @@ void ServerState::compile_functions() { void ServerState::load_dol_files() { config_log.info("Loading DOL files"); - this->dol_file_index.reset(new DOLFileIndex("system/dol")); + this->dol_file_index.reset(new DOLFileIndex("system/dol", this->enable_dol_compression)); } diff --git a/src/ServerState.hh b/src/ServerState.hh index 1b5a548d..2385ed24 100644 --- a/src/ServerState.hh +++ b/src/ServerState.hh @@ -55,6 +55,7 @@ struct ServerState { bool allow_saving; bool item_tracking_enabled; bool episode_3_send_function_call_enabled; + bool enable_dol_compression; bool catch_handler_exceptions; uint32_t ep3_behavior_flags; RunShellBehavior run_shell_behavior; diff --git a/system/config.example.json b/system/config.example.json index b5defb01..034fe511 100644 --- a/system/config.example.json +++ b/system/config.example.json @@ -309,6 +309,11 @@ // enable Episode 3 patches by default; it only does so if this option is on. // "EnableEpisode3SendFunctionCall": true, + // If this setting is enabled, newserv will compress DOL files at startup + // time. This makes startup time slower but makes clients' loading time much + // faster. + "CompressDOLFiles": true, + // By default, the server keeps track of items in all games, even for versions // other than Blue Burst. This enables use of the $what command, as well as // protection against item duplication cheats (the cheater is disconnected diff --git a/system/ppc/PRSDecompress.inc.s b/system/ppc/PRSDecompress.inc.s new file mode 100644 index 00000000..a2400462 --- /dev/null +++ b/system/ppc/PRSDecompress.inc.s @@ -0,0 +1,103 @@ +prs_decompress__start: + # r3 = dest ptr (used as write ptr) + subi r3, r3, 1 + # r4 = src ptr (used as read ptr) + subi r4, r4, 1 + # r5 = dest size (converted to ptr to last valid output byte) + add r5, r5, r3 + # r6 = src size (converted to ptr to last valid input byte) + add r6, r6, r4 + # r7 = control bits + guard bits + li r7, 0 + # r8 = temp for offset/count + # r9 = original dest ptr - 1 (used for computing return value) + mr r9, r3 + # r10 = temp for reading/writing data + # r11 = second-level saved LR, temp for offset/count + # r12 = saved LR + mflr r12 + +prs_decompress__next_opcode: + bl prs_decompress__cmp_control_bit_and_return_in_r10 + beq prs_decompress__control_0 + +prs_decompress__control_1: + bl prs_decompress__read_byte_to_r10 + bl prs_decompress__write_byte_from_r10 + b prs_decompress__next_opcode + +prs_decompress__control_0: + bl prs_decompress__cmp_control_bit_and_return_in_r10 + beq prs_decompress__control_00 + +prs_decompress__control_01: + bl prs_decompress__read_byte_to_r10 + rlwinm r8, r10, 29, 27, 31 # low 5 bits of offset + rlwinm r11, r10, 0, 29, 31 # size + addi r11, r11, 2 + bl prs_decompress__read_byte_to_r10 + rlwimi. r8, r10, 5, 19, 26 # high 8 bits of offset + bne prs_decompress__control_01_not_stop_opcode + sub r3, r3, r9 + mtlr r12 + blr +prs_decompress__control_01_not_stop_opcode: + ori r8, r8, 0xE000 + cmplwi r11, 2 + bne prs_decompress__control_01_not_extended_count + bl prs_decompress__read_byte_to_r10 + addi r11, r10, 1 +prs_decompress__control_01_not_extended_count: + mtctr r11 + b prs_decompress__control_00_01_copy + +prs_decompress__control_00: + bl prs_decompress__cmp_control_bit_and_return_in_r10 + rlwinm r11, r10, 1, 30, 30 + bl prs_decompress__cmp_control_bit_and_return_in_r10 + rlwimi r11, r10, 0, 31, 31 + addi r11, r11, 2 + mtctr r11 + bl prs_decompress__read_byte_to_r10 + ori r8, r10, 0xFF00 + +prs_decompress__control_00_01_copy: + # r8 = src offset (negative 16-bit value) + # ctr = byte count to copy + oris r8, r8, 0xFFFF + add r8, r8, r3 # r8 = copy src ptr (minus 1, for lbzu) +prs_decompress__control_00_01_copy_again: + lbzu r10, [r8 + 1] + bl prs_decompress__write_byte_from_r10 + bdnz prs_decompress__control_00_01_copy_again + b prs_decompress__next_opcode + +prs_decompress__cmp_control_bit_and_return_in_r10: + andi. r10, r7, 0x0100 + bne prs_decompress__skip_read + mflr r8 + bl prs_decompress__read_byte_to_r10 + mtlr r8 + mr r7, r10 + ori r7, r7, 0xFF00 +prs_decompress__skip_read: + andi. r10, r7, 1 + rlwinm r7, r7, 31, 17, 31 + blr + +prs_decompress__read_byte_to_r10: + cmp r4, r6 + bge prs_decompress__return_error + lbzu r10, [r4 + 1] + blr + +prs_decompress__write_byte_from_r10: + cmp r3, r5 + bge prs_decompress__return_error + stbu [r3 + 1], r10 + blr + +prs_decompress__return_error: + li r3, -1 + mtlr r12 + blr diff --git a/system/ppc/RunDOL.s b/system/ppc/RunDOL.s index 445ffed7..3265478b 100644 --- a/system/ppc/RunDOL.s +++ b/system/ppc/RunDOL.s @@ -53,8 +53,25 @@ copy_code_to_low_memory__again: run_dol: - lwz r30, [r31 + 0x10] # r30 = DOL base ptr + lwz r30, [r31 + 0x10] # r30 = data base ptr + # Decompress the file first. If the compressed size is zero, then skip this + # step (the file is not compressed). The header consists of two fields: + # compressed size followed by decompressed size. + lwz r6, [r30] + cmplwi r6, 0 + beq run_dol__not_compressed + lwz r5, [r30 + 4] + addi r4, r30, 8 # Compressed data immediately follows the 2 header fields + sub r3, r30, r5 # Decompress to immediately before the compressed data + mr r30, r3 # Save DOL header pointer for after decompression + bl prs_decompress + b run_dol__decompressed + +run_dol__not_compressed: + addi r30, r30, 8 + +run_dol__decompressed: # DOL files are very simple: they have up to 7 text sections, up to 11 data # sections, and a BSS section and an entrypoint. No imports or other fancy # things to do - we just have to move a bunch of bytes around. @@ -119,6 +136,11 @@ flush_cached_code_writes: +prs_decompress: + .include PRSDecompress + + + return_end_ptr: mflr r3 bctr