diff --git a/src/Main.cc b/src/Main.cc index 08363ce7..44e12ec2 100644 --- a/src/Main.cc +++ b/src/Main.cc @@ -1453,13 +1453,14 @@ Action a_decode_text_archive( expects the input not to have a REL footer.\n", +[](Arguments& args) { string data = read_input_data(args); + bool is_sjis = args.get("japanese"); unique_ptr ts; size_t collection_count = args.get("collections", 0); if (collection_count) { - ts = make_unique(data, collection_count, !args.get("has-pr3")); + ts = make_unique(data, collection_count, !args.get("has-pr3"), is_sjis); } else { - ts = make_unique(data, args.get("big-endian")); + ts = make_unique(data, args.get("big-endian"), is_sjis); } JSON j = ts->json(); string out_data = j.serialize(JSON::SerializeOption::FORMAT | JSON::SerializeOption::ESCAPE_CONTROLS_ONLY); @@ -1472,10 +1473,11 @@ Action a_encode_text_archive( +[](Arguments& args) { const string& input_filename = args.get(1, false); const string& output_filename = args.get(2, false); + bool is_sjis = args.get("japanese"); auto json = JSON::parse(read_input_data(args)); BinaryTextAndKeyboardsSet a(json); - auto result = a.serialize(args.get("big-endian")); + auto result = a.serialize(args.get("big-endian"), is_sjis); if (output_filename.empty()) { if (input_filename.empty() || (input_filename == "-")) { throw runtime_error("encoded text archive cannot be written to stdout"); diff --git a/src/TextIndex.cc b/src/TextIndex.cc index c42c0320..e23ba11e 100644 --- a/src/TextIndex.cc +++ b/src/TextIndex.cc @@ -167,7 +167,7 @@ string UnicodeTextSet::serialize() const { return prs_compress_optimal(header_w.str()); } -BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer) { +BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer, bool is_sjis) { auto pr2_decrypted = decrypt_pr2_data(pr2_data); auto decompressed = prs_decompress(pr2_decrypted.compressed_data); StringReader r(decompressed); @@ -187,6 +187,8 @@ BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_coun } used_offsets.emplace(root_offset); + auto& tt = is_sjis ? tt_sjis_to_utf8 : tt_8859_to_utf8; + collection_offsets_r.go(0); while (!collection_offsets_r.eof()) { auto& collection = this->collections.emplace_back(); @@ -197,18 +199,18 @@ BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_coun for (uint32_t string_offset_offset = first_string_offset_offset; (string_offset_offset == first_string_offset_offset) || !used_offsets.count(string_offset_offset); string_offset_offset += 4) { - collection.emplace_back(r.pget_cstr(r.pget_u32l(string_offset_offset))); + collection.emplace_back(tt(r.pget_cstr(r.pget_u32l(string_offset_offset)))); } } catch (const out_of_range&) { } } } -BinaryTextAndKeyboardsSet::BinaryTextAndKeyboardsSet(const string& pr2_data, bool big_endian) { +BinaryTextAndKeyboardsSet::BinaryTextAndKeyboardsSet(const string& pr2_data, bool big_endian, bool is_sjis) { if (big_endian) { - this->parse_t(pr2_data); + this->parse_t(pr2_data, is_sjis); } else { - this->parse_t(pr2_data); + this->parse_t(pr2_data, is_sjis); } } @@ -271,19 +273,21 @@ void BinaryTextAndKeyboardsSet::resize_keyboards(size_t num_keyboards) { this->keyboards.resize(num_keyboards); } -pair BinaryTextAndKeyboardsSet::serialize(bool big_endian) const { +pair BinaryTextAndKeyboardsSet::serialize(bool big_endian, bool is_sjis) const { if (big_endian) { - return this->serialize_t(); + return this->serialize_t(is_sjis); } else { - return this->serialize_t(); + return this->serialize_t(is_sjis); } } template -void BinaryTextAndKeyboardsSet::parse_t(const string& pr2_data) { +void BinaryTextAndKeyboardsSet::parse_t(const string& pr2_data, bool is_sjis) { using U32T = std::conditional_t; using U16T = std::conditional_t; + auto& tt = is_sjis ? tt_sjis_to_utf8 : tt_8859_to_utf8; + // The structure is as follows: // Footer: // U32T keyboard_index_offset ->: @@ -341,16 +345,18 @@ void BinaryTextAndKeyboardsSet::parse_t(const string& pr2_data) { for (uint32_t string_offset_offset = first_string_offset_offset; (string_offset_offset == first_string_offset_offset) || !used_offsets.count(string_offset_offset); string_offset_offset += 4) { - collection.emplace_back(r.pget_cstr(r.pget(string_offset_offset))); + collection.emplace_back(tt(r.pget_cstr(r.pget(string_offset_offset)))); } } } template -pair BinaryTextAndKeyboardsSet::serialize_t() const { +pair BinaryTextAndKeyboardsSet::serialize_t(bool is_sjis) const { using U32T = std::conditional_t; using U16T = std::conditional_t; + auto& tt = is_sjis ? tt_utf8_to_sjis : tt_utf8_to_8859; + StringWriter w; ::set relocation_offsets; auto put_offset_u32 = [&](uint32_t v) { @@ -364,7 +370,7 @@ pair BinaryTextAndKeyboardsSet::serialize_t() const { for (const auto& collection : this->collections) { for (const auto& s : collection) { if (string_to_offset.emplace(s, w.size()).second) { - w.write(s); + w.write(tt(s)); w.put_u8(0); while (w.size() & 3) { w.put_u8(0); @@ -439,13 +445,10 @@ pair BinaryTextAndKeyboardsSet::serialize_t() const { const string& pr2_data = w.str(); const string& pr3_data = reloc_w.str(); - print_data(stderr, pr2_data); string pr2_compressed = prs_compress_optimal(pr2_data.data(), pr2_data.size()); string pr3_compressed = prs_compress_optimal(pr3_data.data(), pr3_data.size()); - print_data(stderr, pr2_compressed); string pr2_ret = encrypt_pr2_data(pr2_compressed, pr2_data.size(), random_object()); string pr3_ret = encrypt_pr2_data(pr3_compressed, pr3_data.size(), random_object()); - print_data(stderr, pr2_ret); return make_pair(std::move(pr2_ret), std::move(pr3_ret)); } @@ -454,7 +457,7 @@ TextIndex::TextIndex( function(Version, const string&)> get_patch_file) : log("[TextIndex] ", static_game_data_log.min_level) { if (!directory.empty()) { - auto add_version = [&](Version version, const string& subdirectory, function(const string&)> make_set) -> void { + auto add_version = [&](Version version, const string& subdirectory, function(const string&, bool)> make_set) -> void { static const map bintext_filenames({ {"TextJapanese.pr2", 0x00}, {"TextEnglish.pr2", 0x01}, @@ -484,7 +487,7 @@ TextIndex::TextIndex( this->add_set(version, it.second, make_shared(JSON::parse(load_file(json_path)))); } else if (isfile(file_path)) { this->log.info("Loading %s %c binary text set from %s", name_for_enum(version), char_for_language_code(it.second), file_path.c_str()); - this->add_set(version, it.second, make_set(load_file(file_path))); + this->add_set(version, it.second, make_set(load_file(file_path), it.second == 0)); } } } else { @@ -498,11 +501,11 @@ TextIndex::TextIndex( auto patch_file = get_patch_file ? get_patch_file(version, it.first) : nullptr; if (patch_file) { this->log.info("Loading %s %c Unicode text set from %s in patch tree", name_for_enum(version), char_for_language_code(it.second), it.first.c_str()); - this->add_set(version, it.second, make_set(*patch_file)); + this->add_set(version, it.second, make_set(*patch_file, it.second == 0)); } else { if (isfile(file_path)) { this->log.info("Loading %s %c Unicode text set from %s", name_for_enum(version), char_for_language_code(it.second), file_path.c_str()); - this->add_set(version, it.second, make_set(load_file(file_path))); + this->add_set(version, it.second, make_set(load_file(file_path), it.second == 0)); } } } @@ -510,12 +513,12 @@ TextIndex::TextIndex( } }; - auto make_binary_dc112000 = +[](const string& data) { return make_shared(data, 21, true); }; - auto make_binary_dcnte_dcv1 = +[](const string& data) { return make_shared(data, 26, true); }; - auto make_binary_dcv2 = +[](const string& data) { return make_shared(data, 37, false); }; - auto make_binary_gc = +[](const string& data) { return make_shared(data, true); }; - auto make_binary_xb = +[](const string& data) { return make_shared(data, false); }; - auto make_unitxt = +[](const string& data) { return make_shared(data); }; + auto make_binary_dc112000 = +[](const string& data, bool is_sjis) { return make_shared(data, 21, true, is_sjis); }; + auto make_binary_dcnte_dcv1 = +[](const string& data, bool is_sjis) { return make_shared(data, 26, true, is_sjis); }; + auto make_binary_dcv2 = +[](const string& data, bool is_sjis) { return make_shared(data, 37, false, is_sjis); }; + auto make_binary_gc = +[](const string& data, bool is_sjis) { return make_shared(data, true, is_sjis); }; + auto make_binary_xb = +[](const string& data, bool is_sjis) { return make_shared(data, false, is_sjis); }; + auto make_unitxt = +[](const string& data, bool) { return make_shared(data); }; add_version(Version::DC_NTE, "dc-nte", make_binary_dcnte_dcv1); add_version(Version::DC_V1_11_2000_PROTOTYPE, "dc-11-2000", make_binary_dc112000); diff --git a/src/TextIndex.hh b/src/TextIndex.hh index f6465def..f4bc5577 100644 --- a/src/TextIndex.hh +++ b/src/TextIndex.hh @@ -53,7 +53,7 @@ class BinaryTextSet : public TextSet { public: explicit BinaryTextSet(const JSON& json) : TextSet(json) {} explicit BinaryTextSet(JSON&& json) : TextSet(json) {} - BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer); + BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer, bool is_sjis); ~BinaryTextSet() = default; // TODO: Implement serialize functions }; @@ -64,7 +64,7 @@ public: explicit BinaryTextAndKeyboardsSet(const JSON& json); explicit BinaryTextAndKeyboardsSet(JSON&& json); - BinaryTextAndKeyboardsSet(const std::string& pr2_data, bool big_endian); + BinaryTextAndKeyboardsSet(const std::string& pr2_data, bool big_endian, bool is_sjis); ~BinaryTextAndKeyboardsSet() = default; virtual JSON json() const; @@ -77,13 +77,13 @@ public: void set_keyboard_selector_width(uint8_t width); // Returns (pr2_data, pr3_data) - std::pair serialize(bool big_endian) const; + std::pair serialize(bool big_endian, bool is_sjis) const; protected: template - void parse_t(const std::string& pr2_data); + void parse_t(const std::string& pr2_data, bool is_sjis); template - std::pair serialize_t() const; + std::pair serialize_t(bool is_sjis) const; std::vector> keyboards; uint8_t keyboard_selector_width;