From b58f354c416e8403203ea2a9f5e16092839183e6 Mon Sep 17 00:00:00 2001 From: Martin Michelsen Date: Thu, 23 Mar 2023 23:09:15 -0700 Subject: [PATCH] add extract-bml action --- CMakeLists.txt | 1 + README.md | 1 + src/BMLArchive.cc | 105 ++++++++++++++++++++++++++++++++++++++++++++++ src/BMLArchive.hh | 38 +++++++++++++++++ src/Main.cc | 52 ++++++++++++++++++----- 5 files changed, 187 insertions(+), 10 deletions(-) create mode 100644 src/BMLArchive.cc create mode 100644 src/BMLArchive.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index f1c6d233..e13e8ff3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ find_package(resource_file QUIET) # Executable definition add_executable(newserv + src/BMLArchive.cc src/CatSession.cc src/Channel.cc src/ChatCommands.cc diff --git a/README.md b/README.md index d9aa05a2..f9d688a0 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ Current known issues / missing features / things to do: - Implement trade window - Fix some edge cases on the BB proxy server (e.g. make sure Change Ship does the right thing, which is not the same as what it should do on other versions). - There is a function that encodes QST files, but there's no corresponding CLI option. +- Figure out what controls BML file data segment alignment. - PSOX is not tested at all. - Memory patches currently are platform-specific but not version-specific. This makes them quite a bit harder to write and use properly. - Find a way to silence audio in RunDOL.s. Some old DOLs don't reset audio systems at load time and it's annoying to hear the crash buzz when the GC hasn't actually crashed. diff --git a/src/BMLArchive.cc b/src/BMLArchive.cc new file mode 100644 index 00000000..f07bafed --- /dev/null +++ b/src/BMLArchive.cc @@ -0,0 +1,105 @@ +#include "BMLArchive.hh" + +#include +#include +#include + +#include "Text.hh" + +using namespace std; + + + +template +struct BMLHeader { + parray unknown_a1; + LongT num_entries; + parray unknown_a2; +} __attribute__((packed)); + +template +struct BMLHeaderEntry { + ptext filename; + LongT compressed_size; + parray unknown_a1; + LongT decompressed_size; + LongT compressed_gvm_size; + LongT decompressed_gvm_size; + parray unknown_a2; +} __attribute__((packed)); + +template +void BMLArchive::load_t() { + StringReader r(*this->data); + + const auto& header = r.get>(); + + size_t offset = 0x800; + while (this->entries.size() < header.num_entries) { + const auto& entry = r.get>(); + + if (offset + entry.compressed_size > this->data->size()) { + throw runtime_error("BML data entry extends beyond end of data"); + } + size_t data_offset = offset; + offset = (offset + entry.compressed_size + 0x1F) & (~0x1F); + + if (offset + entry.compressed_gvm_size > this->data->size()) { + throw runtime_error("BML GVM entry extends beyond end of data"); + } + size_t gvm_offset = offset; + offset = (offset + entry.compressed_gvm_size + 0x1F) & (~0x1F); + + this->entries.emplace(entry.filename, Entry{ + data_offset, entry.compressed_size, gvm_offset, entry.compressed_gvm_size}); + } +} + +BMLArchive::BMLArchive(shared_ptr data, bool big_endian) + : data(data) { + if (big_endian) { + this->load_t(); + } else { + this->load_t(); + } +} + +const unordered_map BMLArchive::all_entries() const { + return this->entries; +} + +pair BMLArchive::get(const std::string& name) const { + try { + const auto& entry = this->entries.at(name); + return make_pair(this->data->data() + entry.offset, entry.size); + } catch (const out_of_range&) { + throw out_of_range("BML does not contain file: " + name); + } +} + +pair BMLArchive::get_gvm(const std::string& name) const { + try { + const auto& entry = this->entries.at(name); + return make_pair(this->data->data() + entry.gvm_offset, entry.gvm_size); + } catch (const out_of_range&) { + throw out_of_range("BML does not contain file: " + name); + } +} + +string BMLArchive::get_copy(const string& name) const { + try { + const auto& entry = this->entries.at(name); + return this->data->substr(entry.offset, entry.size); + } catch (const out_of_range&) { + throw out_of_range("BML does not contain file: " + name); + } +} + +StringReader BMLArchive::get_reader(const string& name) const { + try { + const auto& entry = this->entries.at(name); + return StringReader(this->data->data() + entry.offset, entry.size); + } catch (const out_of_range&) { + throw out_of_range("BML does not contain file: " + name); + } +} diff --git a/src/BMLArchive.hh b/src/BMLArchive.hh new file mode 100644 index 00000000..bc7b5a87 --- /dev/null +++ b/src/BMLArchive.hh @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include + + + +class BMLArchive { +public: + BMLArchive(std::shared_ptr data, bool big_endian); + ~BMLArchive() = default; + + struct Entry { + uint64_t offset; + uint32_t size; + uint64_t gvm_offset; + uint32_t gvm_size; + }; + const std::unordered_map all_entries() const; + + std::pair get(const std::string& name) const; + std::pair get_gvm(const std::string& name) const; + std::string get_copy(const std::string& name) const; + StringReader get_reader(const std::string& name) const; + +private: + template + void load_t(); + + std::shared_ptr data; + + std::unordered_map entries; +}; diff --git a/src/Main.cc b/src/Main.cc index 8cb3acc3..7d227a23 100644 --- a/src/Main.cc +++ b/src/Main.cc @@ -13,9 +13,11 @@ #include #include +#include "BMLArchive.hh" #include "CatSession.hh" #include "Compression.hh" #include "DNSServer.hh" +#include "GSLArchive.hh" #include "IPStackSimulator.hh" #include "Loggers.hh" #include "NetworkAddresses.hh" @@ -168,10 +170,11 @@ The actions are:\n\ sure client sessions are repeatable and code changes don\'t affect existing\n\ (working) functionality.\n\ extract-gsl [INPUT-FILENAME] [--big-endian]\n\ - Extract all files from a GSL archive into the current directory.\n\ + extract-bml [INPUT-FILENAME] [--big-endian]\n\ + Extract all files from a GSL or BML archive into the current directory.\n\ input-filename may be specified. If output-filename is specified, then it\n\ is treated as a prefix which is prepended to the filename of each file\n\ - contained in the GSL archive. If --big-endian is given, the GSL header is\n\ + contained in the archive. If --big-endian is given, the archive header is\n\ read in GameCube format; otherwise it is read in PC/BB format.\n\ \n\ A few options apply to multiple modes described above:\n\ @@ -198,6 +201,7 @@ enum class Behavior { DECODE_QUEST_FILE, DECODE_SJIS, EXTRACT_GSL, + EXTRACT_BML, FORMAT_ITEMRT_ENTRY, FORMAT_ITEMRT_REL, SHOW_EP3_DATA, @@ -221,6 +225,7 @@ static bool behavior_takes_input_filename(Behavior b) { (b == Behavior::FORMAT_ITEMRT_ENTRY) || (b == Behavior::FORMAT_ITEMRT_REL) || (b == Behavior::EXTRACT_GSL) || + (b == Behavior::EXTRACT_BML) || (b == Behavior::PARSE_OBJECT_GRAPH) || (b == Behavior::REPLAY_LOG) || (b == Behavior::CAT_CLIENT); @@ -234,7 +239,9 @@ static bool behavior_takes_output_filename(Behavior b) { (b == Behavior::ENCRYPT_DATA) || (b == Behavior::DECRYPT_DATA) || (b == Behavior::DECRYPT_TRIVIAL_DATA) || - (b == Behavior::DECODE_SJIS); + (b == Behavior::DECODE_SJIS) || + (b == Behavior::EXTRACT_GSL) || + (b == Behavior::EXTRACT_BML); } enum class QuestFileFormat { @@ -360,6 +367,8 @@ int main(int argc, char** argv) { behavior = Behavior::REPLAY_LOG; } else if (!strcmp(argv[x], "extract-gsl")) { behavior = Behavior::EXTRACT_GSL; + } else if (!strcmp(argv[x], "extract-bml")) { + behavior = Behavior::EXTRACT_BML; } else { throw invalid_argument(string_printf("unknown command: %s (try --help)", argv[x])); } @@ -672,7 +681,8 @@ int main(int argc, char** argv) { break; } - case Behavior::EXTRACT_GSL: { + case Behavior::EXTRACT_GSL: + case Behavior::EXTRACT_BML: { if (!output_filename) { output_filename = ""; } else if (!strcmp(output_filename, "-")) { @@ -680,13 +690,35 @@ int main(int argc, char** argv) { } string data = read_input_data(); - shared_ptr data_shared(new string(move(data))); - GSLArchive gsl(data_shared, big_endian); - for (const auto& entry_it : gsl.all_entries()) { - auto e = gsl.get(entry_it.first); - save_file(output_filename + entry_it.first, e.first, e.second); - fprintf(stderr, "... %s\n", entry_it.first.c_str()); + + if (behavior == Behavior::EXTRACT_GSL) { + GSLArchive arch(data_shared, big_endian); + for (const auto& entry_it : arch.all_entries()) { + auto e = arch.get(entry_it.first); + string out_file = output_filename + entry_it.first; + save_file(out_file.c_str(), e.first, e.second); + fprintf(stderr, "... %s\n", out_file.c_str()); + } + } else { + BMLArchive arch(data_shared, big_endian); + for (const auto& entry_it : arch.all_entries()) { + { + auto e = arch.get(entry_it.first); + string data = prs_decompress(e.first, e.second); + string out_file = output_filename + entry_it.first; + save_file(out_file, data); + fprintf(stderr, "... %s\n", out_file.c_str()); + } + + auto gvm_e = arch.get_gvm(entry_it.first); + if (gvm_e.second) { + string data = prs_decompress(gvm_e.first, gvm_e.second); + string out_file = output_filename + entry_it.first + ".gvm"; + save_file(out_file, data); + fprintf(stderr, "... %s\n", out_file.c_str()); + } + } } break; }