diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d3bc4c1..a876c7fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,6 +103,7 @@ add_executable(newserv src/Shell.cc src/StaticGameData.cc src/Text.cc + src/TextArchive.cc src/Version.cc ) target_include_directories(newserv PUBLIC ${LIBEVENT_INCLUDE_DIR}) diff --git a/README.md b/README.md index 0b400f32..a76dc7ee 100644 --- a/README.md +++ b/README.md @@ -397,6 +397,7 @@ newserv has many CLI options, which can be used to access functionality other th * Decode Shift-JIS text to UTF-16 (`decode-sjis`) * Convert quests in .gci, .vms, .dlq, or .qst format to .bin/.dat format (`decode-gci`, `decode-vms`, `decode-dlq`, `decode-qst`) * Convert quests in .bin/.dat to .qst format (`encode-qst`) +* Convert text archives (e.g. TextEnglish.pr2) to JSON and vice versa (`decode-text-archive`, `encode-text-archive`) * Disassemble quest scripts (`disassemble-quest-script`) * Format Episode 3 game data in a human-readable manner (`show-ep3-maps`, `show-ep3-cards`) * Convert item data to a human-readable description, or vice versa (`describe-item`, `encode-item`) diff --git a/src/Main.cc b/src/Main.cc index a28230a2..616a37f6 100644 --- a/src/Main.cc +++ b/src/Main.cc @@ -36,6 +36,7 @@ #include "ServerState.hh" #include "StaticGameData.hh" #include "Text.hh" +#include "TextArchive.hh" using namespace std; @@ -241,6 +242,10 @@ The actions are:\n\ is treated as a prefix which is prepended to the filename of each file\n\ contained in the archive. If --big-endian is given, the archive header is\n\ read in GameCube format; otherwise it is read in PC/BB format.\n\ + decode-text-archive [INPUT-FILENAME [OUTPUT-FILENAME]]\n\ + encode-text-archive [INPUT-FILENAME [OUTPUT-FILENAME]]\n\ + Decode a text archive (e.g. TextEnglish.pr2) to JSON for easy editing, or\n\ + encode a JSON file to a text archive.\n\ format-rare-item-set [--json] [INPUT-FILENAME]\n\ Print the contents of a rare item table in a human-readable format. If\n\ --json is given, the input is parsed as a JSON rare item set (see\n\ @@ -306,6 +311,8 @@ enum class Behavior { DECODE_SJIS, EXTRACT_GSL, EXTRACT_BML, + DECODE_TEXT_ARCHIVE, + ENCODE_TEXT_ARCHIVE, FORMAT_RARE_ITEM_SET, CONVERT_ITEMRT_REL_TO_JSON, SHOW_EP3_MAPS, @@ -357,6 +364,8 @@ static bool behavior_takes_input_filename(Behavior b) { (b == Behavior::CONVERT_ITEMRT_REL_TO_JSON) || (b == Behavior::EXTRACT_GSL) || (b == Behavior::EXTRACT_BML) || + (b == Behavior::DECODE_TEXT_ARCHIVE) || + (b == Behavior::ENCODE_TEXT_ARCHIVE) || (b == Behavior::DESCRIBE_ITEM) || (b == Behavior::ENCODE_ITEM) || (b == Behavior::PARSE_OBJECT_GRAPH) || @@ -392,7 +401,9 @@ static bool behavior_takes_output_filename(Behavior b) { (b == Behavior::CONVERT_ITEMRT_REL_TO_JSON) || (b == Behavior::DECODE_SJIS) || (b == Behavior::EXTRACT_GSL) || - (b == Behavior::EXTRACT_BML); + (b == Behavior::EXTRACT_BML) || + (b == Behavior::DECODE_TEXT_ARCHIVE) || + (b == Behavior::ENCODE_TEXT_ARCHIVE); } int main(int argc, char** argv) { @@ -620,6 +631,10 @@ int main(int argc, char** argv) { behavior = Behavior::EXTRACT_GSL; } else if (!strcmp(argv[x], "extract-bml")) { behavior = Behavior::EXTRACT_BML; + } else if (!strcmp(argv[x], "decode-text-archive")) { + behavior = Behavior::DECODE_TEXT_ARCHIVE; + } else if (!strcmp(argv[x], "encode-text-archive")) { + behavior = Behavior::ENCODE_TEXT_ARCHIVE; } else if (!strcmp(argv[x], "generate-dc-serial-number")) { behavior = Behavior::GENERATE_DC_SERIAL_NUMBER; } else if (!strcmp(argv[x], "generate-all-dc-serial-numbers")) { @@ -699,6 +714,8 @@ int main(int argc, char** argv) { filename += ".bmp"; } else if (behavior == Behavior::ENCODE_GVM) { filename += ".gvm"; + } else if (behavior == Behavior::DECODE_TEXT_ARCHIVE) { + filename += ".json"; } else if (behavior == Behavior::DISASSEMBLE_QUEST_SCRIPT) { filename += ".txt"; } else if (behavior == Behavior::CONVERT_ITEMRT_REL_TO_JSON) { @@ -734,19 +751,9 @@ int main(int argc, char** argv) { size_t pr2_expected_size = 0; if (behavior == Behavior::DECOMPRESS_PR2) { - if (data.size() < 8) { - throw runtime_error("not enough data for PR2 header"); - } - data.resize((data.size() + 3) & (~3)); - StringReader r(data); - pr2_expected_size = big_endian ? r.get_u32b() : r.get_u32l(); - PSOV2Encryption crypt(big_endian ? r.get_u32b() : r.get_u32l()); - if (big_endian) { - crypt.encrypt_big_endian(data.data() + 8, data.size() - 8); - } else { - crypt.decrypt(data.data() + 8, data.size() - 8); - } - data = data.substr(8); + auto decrypted = big_endian ? decrypt_pr2_data(data) : decrypt_pr2_data(data); + pr2_expected_size = decrypted.decompressed_size; + data = std::move(decrypted.compressed_data); } size_t input_bytes = data.size(); @@ -799,25 +806,9 @@ int main(int argc, char** argv) { log_warning("Result data size (%zu bytes) does not match expected size from PR2 header (%zu bytes)", data.size(), pr2_expected_size); } else if (behavior == Behavior::COMPRESS_PR2) { uint32_t pr2_seed = seed.empty() ? random_object() : stoul(seed, nullptr, 16); - size_t orig_size = data.size(); - data.resize((data.size() + 3) & (~3)); - PSOV2Encryption crypt(pr2_seed); - if (big_endian) { - crypt.encrypt_big_endian(data.data(), data.size()); - } else { - crypt.encrypt(data.data(), data.size()); - } - data.resize(orig_size); - StringWriter w; - if (big_endian) { - w.put_u32b(input_bytes); - w.put_u32b(pr2_seed); - } else { - w.put_u32l(input_bytes); - w.put_u32l(pr2_seed); - } - w.write(data); - data = std::move(w.str()); + data = big_endian + ? encrypt_pr2_data(data, input_bytes, pr2_seed) + : encrypt_pr2_data(data, input_bytes, pr2_seed); } write_output_data(data.data(), data.size()); @@ -1438,6 +1429,40 @@ int main(int argc, char** argv) { break; } + case Behavior::DECODE_TEXT_ARCHIVE: { + string data = read_input_data(); + TextArchive a(data, big_endian); + JSON j = a.json(); + string out_data = j.serialize(JSON::SerializeOption::FORMAT); + write_output_data(out_data.data(), out_data.size()); + break; + } + case Behavior::ENCODE_TEXT_ARCHIVE: { + auto json = JSON::parse(read_input_data()); + TextArchive a(json); + auto result = a.serialize(big_endian); + if (!output_filename) { + if (!input_filename || !strcmp(input_filename, "-")) { + throw runtime_error("encoded text archive cannot be written to stdout"); + } + save_file(string_printf("%s.pr2", input_filename), result.first); + save_file(string_printf("%s.pr3", input_filename), result.second); + } else if (!strcmp(input_filename, "-")) { + throw runtime_error("encoded text archive cannot be written to stdout"); + } else { + string out_filename = output_filename; + if (ends_with(out_filename, ".pr2")) { + save_file(out_filename, result.first); + out_filename[out_filename.size() - 1] = '3'; + save_file(out_filename, result.second); + } else { + save_file(out_filename + ".pr2", result.first); + save_file(out_filename + ".pr3", result.second); + } + } + break; + } + case Behavior::CAT_CLIENT: { shared_ptr key; if (cli_version == GameVersion::BB) { diff --git a/src/PSOEncryption.hh b/src/PSOEncryption.hh index c07112f4..50819478 100644 --- a/src/PSOEncryption.hh +++ b/src/PSOEncryption.hh @@ -280,3 +280,47 @@ std::u16string encrypt_challenge_rank_text(const ptext& data) { } std::string decrypt_v2_registry_value(const void* data, size_t size); + +struct DecryptedPR2 { + std::string compressed_data; + size_t decompressed_size; +}; + +template +DecryptedPR2 decrypt_pr2_data(const std::string& data) { + using U32T = std::conditional_t; + + if (data.size() < 8) { + throw std::runtime_error("not enough data for PR2 header"); + } + StringReader r(data); + DecryptedPR2 ret = { + .compressed_data = data.substr(8), + .decompressed_size = r.get()}; + PSOV2Encryption crypt(r.get()); + if (IsBigEndian) { + crypt.encrypt_big_endian(ret.compressed_data.data(), ret.compressed_data.size()); + } else { + crypt.decrypt(ret.compressed_data.data(), ret.compressed_data.size()); + } + return ret; +} + +template +std::string encrypt_pr2_data(const std::string& data, size_t decompressed_size, uint32_t seed) { + using U32T = std::conditional_t; + + StringWriter w; + w.put(decompressed_size); + w.put(seed); + w.write(data); + + std::string ret = std::move(w.str()); + PSOV2Encryption crypt(seed); + if (IsBigEndian) { + crypt.encrypt_big_endian(ret.data() + 8, ret.size() - 8); + } else { + crypt.decrypt(ret.data() + 8, ret.size() - 8); + } + return ret; +} diff --git a/src/TextArchive.cc b/src/TextArchive.cc new file mode 100644 index 00000000..e557f4ca --- /dev/null +++ b/src/TextArchive.cc @@ -0,0 +1,302 @@ +#include "TextArchive.hh" + +#include +#include +#include +#include +#include + +#include "Compression.hh" +#include "PSOEncryption.hh" +#include "Text.hh" + +using namespace std; + +TextArchive::TextArchive(const string& pr2_data, bool big_endian) { + if (big_endian) { + this->load_t(pr2_data); + } else { + this->load_t(pr2_data); + } +} + +TextArchive::TextArchive(const JSON& json) { + for (const auto& collection_json : json.at("collections").as_list()) { + auto& collection = this->collections.emplace_back(); + for (const auto& string_json : collection_json->as_list()) { + collection.emplace_back(string_json->as_string()); + } + } + + for (const auto& keyboard_json : json.at("keyboards").as_list()) { + auto& keyboard = this->keyboards.emplace_back(new Keyboard()); + for (size_t y = 0; y < keyboard->size(); y++) { + auto& row = keyboard->at(y); + const auto& row_json = keyboard_json->at(y); + for (size_t x = 0; x < row.size(); x++) { + row[x] = row_json.at(x).as_int(); + } + } + } + + this->keyboard_selector_width = json.at("keyboard_selector_width").as_int(); +} + +JSON TextArchive::json() const { + auto collections_json = JSON::list(); + for (const auto& collection : this->collections) { + auto collection_json = JSON::list(); + for (const auto& s : collection) { + collection_json.emplace_back(s); + } + collections_json.emplace_back(std::move(collection_json)); + } + auto keyboards_json = JSON::list(); + for (const auto& kb : this->keyboards) { + JSON keyboard_json = JSON::list(); + for (size_t y = 0; y < kb->size(); y++) { + const auto& row = kb->at(y); + JSON row_json = JSON::list(); + for (size_t x = 0; x < row.size(); x++) { + row_json.emplace_back(row[x]); + } + keyboard_json.emplace_back(std::move(row_json)); + } + keyboards_json.emplace_back(std::move(keyboard_json)); + } + return JSON::dict({ + {"collections", std::move(collections_json)}, + {"keyboards", std::move(keyboards_json)}, + {"keyboard_selector_width", this->keyboard_selector_width}, + }); +} + +const string& TextArchive::get_string(size_t collection_index, size_t index) const { + return this->collections.at(collection_index).at(index); +} + +void TextArchive::set_string(size_t collection_index, size_t index, const string& data) { + if (collection_index >= this->collections.size()) { + this->collections.resize(collection_index + 1); + } + auto& coll = this->collections[collection_index]; + if (index >= coll.size()) { + coll.resize(index + 1); + } + coll[index] = data; +} + +void TextArchive::set_string(size_t collection_index, size_t index, string&& data) { + if (collection_index >= this->collections.size()) { + this->collections.resize(collection_index + 1); + } + auto& coll = this->collections[collection_index]; + if (index >= coll.size()) { + coll.resize(index + 1); + } + coll[index] = std::move(data); +} + +void TextArchive::resize_collection(size_t collection_index, size_t size) { + if (collection_index >= this->collections.size()) { + this->collections.resize(collection_index + 1); + } + this->collections[collection_index].resize(size); +} + +void TextArchive::resize_collection(size_t num_collections) { + this->collections.resize(num_collections); +} + +TextArchive::Keyboard TextArchive::get_keyboard(size_t kb_index) const { + return *this->keyboards.at(kb_index); +} + +void TextArchive::set_keyboard(size_t kb_index, const Keyboard& kb) { + if (kb_index >= this->keyboards.size()) { + this->keyboards.resize(kb_index + 1); + } + this->keyboards[kb_index].reset(new Keyboard(kb)); +} + +void TextArchive::resize_keyboards(size_t num_keyboards) { + this->keyboards.resize(num_keyboards); +} + +pair TextArchive::serialize(bool big_endian) const { + if (big_endian) { + return this->serialize_t(); + } else { + return this->serialize_t(); + } +} + +template +void TextArchive::load_t(const string& pr2_data) { + using U32T = std::conditional_t; + using U16T = std::conditional_t; + + // The structure is as follows: + // Footer: + // U32T keyboard_index_offset ->: + // U8 num_keyboards + // U8 keyboard_selector_width + // U8 unused[2] + // U32T keyboards_offset ->: + // U32T keyboard_offset[num_keyboards] ->: + // U16T key_defs[7][16] + // U32T collections_offset ->: + // U32T[...] strings_offset ->: + // U32T[...] string_offset ->: + // char string[...\0] + // + + auto pr2_decrypted = decrypt_pr2_data(pr2_data); + auto decompressed = prs_decompress(pr2_decrypted.compressed_data); + StringReader r(decompressed); + + // Annoyingly, there doesn't appear to be any bounds-checking on the language + // functions, so there are no counts of strings in each collection. We have to + // figure out where each collection ends by collecting all the relevant + // offsets in the file instead. + set used_offsets; + used_offsets.emplace(r.size() - 8); + + uint32_t keyboard_index_offset = r.pget(r.size() - 8); + used_offsets.emplace(keyboard_index_offset); + size_t num_keyboards = r.pget_u8(keyboard_index_offset); + this->keyboard_selector_width = r.pget_u8(keyboard_index_offset + 1); + uint32_t keyboards_offset = r.pget(keyboard_index_offset + 4); + used_offsets.emplace(keyboards_offset); + while (this->keyboards.size() < num_keyboards) { + uint32_t keyboard_offset = r.pget(keyboards_offset + 4 * this->keyboards.size()); + used_offsets.emplace(keyboard_offset); + auto& kb = this->keyboards.emplace_back(new Keyboard()); + auto key_r = r.sub(keyboard_offset, sizeof(Keyboard)); + for (size_t y = 0; y < kb->size(); y++) { + auto& row = kb->at(y); + for (size_t x = 0; x < row.size(); x++) { + row[x] = key_r.get(); + } + } + } + + uint32_t collections_offset = r.pget(r.size() - 4); + for (uint32_t offset = collections_offset; !used_offsets.count(offset); offset += 4) { + used_offsets.emplace(r.pget(offset)); + } + used_offsets.emplace(collections_offset); + + for (uint32_t offset = collections_offset; (offset == collections_offset) || !used_offsets.count(offset); offset += 4) { + auto& collection = this->collections.emplace_back(); + uint32_t first_string_offset_offset = r.pget(offset); + for (uint32_t string_offset_offset = first_string_offset_offset; + (string_offset_offset == first_string_offset_offset) || !used_offsets.count(string_offset_offset); + string_offset_offset += 4) { + collection.emplace_back(r.pget_cstr(r.pget(string_offset_offset))); + } + } +} + +template +pair TextArchive::serialize_t() const { + using U32T = std::conditional_t; + using U16T = std::conditional_t; + + StringWriter w; + set relocation_offsets; + auto put_offset_u32 = [&](uint32_t v) { + relocation_offsets.emplace(w.size()); + w.put(v); + }; + + uint32_t collections_offset; + { + unordered_map string_to_offset; + for (const auto& collection : this->collections) { + for (const auto& s : collection) { + if (string_to_offset.emplace(s, w.size()).second) { + w.write(s); + w.put_u8(0); + while (w.size() & 3) { + w.put_u8(0); + } + } + } + } + + vector collection_offsets; + for (const auto& collection : this->collections) { + collection_offsets.emplace_back(w.size()); + for (const auto& s : collection) { + put_offset_u32(string_to_offset.at(s)); + } + } + + collections_offset = w.size(); + for (uint32_t collection_offset : collection_offsets) { + put_offset_u32(collection_offset); + } + } + + uint32_t keyboard_index_offset; + { + vector keyboard_offsets; + for (const auto& keyboard : this->keyboards) { + keyboard_offsets.emplace_back(w.size()); + for (size_t y = 0; y < keyboard->size(); y++) { + const auto& row = keyboard->at(y); + for (size_t x = 0; x < row.size(); x++) { + w.put(row[x]); + } + } + } + + uint32_t keyboards_offset = w.size(); + for (uint32_t keyboard_offset : keyboard_offsets) { + put_offset_u32(keyboard_offset); + } + + keyboard_index_offset = w.size(); + w.put_u8(keyboard_offsets.size()); + w.put_u8(this->keyboard_selector_width); + w.put_u16(0); + put_offset_u32(keyboards_offset); + } + + put_offset_u32(keyboard_index_offset); + put_offset_u32(collections_offset); + + StringWriter reloc_w; + reloc_w.put_u32(0); + reloc_w.put(relocation_offsets.size()); + reloc_w.put_u64(0); + reloc_w.put(w.size() - 8); + reloc_w.put_u32(0); + reloc_w.put_u64(0); + { + size_t offset = 0; + for (size_t reloc_offset : relocation_offsets) { + if (reloc_offset & 3) { + throw logic_error("misaligned relocation"); + } + size_t num_words = (reloc_offset - offset) >> 2; + if (num_words > 0xFFFF) { + throw runtime_error("relocation offset too far away"); + } + reloc_w.put(num_words); + offset = reloc_offset; + } + } + + const string& pr2_data = w.str(); + const string& pr3_data = reloc_w.str(); + print_data(stderr, pr2_data); + string pr2_compressed = prs_compress_optimal(pr2_data.data(), pr2_data.size()); + string pr3_compressed = prs_compress_optimal(pr3_data.data(), pr3_data.size()); + print_data(stderr, pr2_compressed); + string pr2_ret = encrypt_pr2_data(pr2_compressed, pr2_data.size(), random_object()); + string pr3_ret = encrypt_pr2_data(pr3_compressed, pr3_data.size(), random_object()); + print_data(stderr, pr2_ret); + return make_pair(std::move(pr2_ret), std::move(pr3_ret)); +} diff --git a/src/TextArchive.hh b/src/TextArchive.hh new file mode 100644 index 00000000..f99f7f2b --- /dev/null +++ b/src/TextArchive.hh @@ -0,0 +1,51 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include "Text.hh" + +// This class implements loading and saving of text archives, commonly found in +// PSO games with filenames like TextEnglish.pr2 and TextEnglish.pr3. The game +// requires both files, but newserv needs only the pr2 file to load a text +// archive. When saving (serializing), both pr2 and pr3 files are generated. +class TextArchive { +public: + using Keyboard = parray, 7>; + + explicit TextArchive(const JSON& json); + TextArchive(const std::string& pr2_data, bool big_endian); + ~TextArchive() = default; + + JSON json() const; + + const std::string& get_string(size_t collection_index, size_t index) const; + void set_string(size_t collection_index, size_t index, const std::string& data); + void set_string(size_t collection_index, size_t index, std::string&& data); + void resize_collection(size_t collection_index, size_t size); + void resize_collection(size_t num_collections); + + Keyboard get_keyboard(size_t kb_index) const; + void set_keyboard(size_t kb_index, const Keyboard& kb); + void resize_keyboards(size_t num_keyboards); + + uint8_t get_keyboard_selector_width() const; + void set_keyboard_selector_width(uint8_t width); + + // Returns (pr2_data, pr3_data) + std::pair serialize(bool big_endian) const; + +private: + template + void load_t(const std::string& pr2_data); + template + std::pair serialize_t() const; + + std::vector> collections; + std::vector> keyboards; + uint8_t keyboard_selector_width; +};