add sjis case for text set encode/decode
This commit is contained in:
+5
-3
@@ -1453,13 +1453,14 @@ Action a_decode_text_archive(
|
||||
expects the input not to have a REL footer.\n",
|
||||
+[](Arguments& args) {
|
||||
string data = read_input_data(args);
|
||||
bool is_sjis = args.get<bool>("japanese");
|
||||
|
||||
unique_ptr<TextSet> ts;
|
||||
size_t collection_count = args.get<size_t>("collections", 0);
|
||||
if (collection_count) {
|
||||
ts = make_unique<BinaryTextSet>(data, collection_count, !args.get<bool>("has-pr3"));
|
||||
ts = make_unique<BinaryTextSet>(data, collection_count, !args.get<bool>("has-pr3"), is_sjis);
|
||||
} else {
|
||||
ts = make_unique<BinaryTextAndKeyboardsSet>(data, args.get<bool>("big-endian"));
|
||||
ts = make_unique<BinaryTextAndKeyboardsSet>(data, args.get<bool>("big-endian"), is_sjis);
|
||||
}
|
||||
JSON j = ts->json();
|
||||
string out_data = j.serialize(JSON::SerializeOption::FORMAT | JSON::SerializeOption::ESCAPE_CONTROLS_ONLY);
|
||||
@@ -1472,10 +1473,11 @@ Action a_encode_text_archive(
|
||||
+[](Arguments& args) {
|
||||
const string& input_filename = args.get<string>(1, false);
|
||||
const string& output_filename = args.get<string>(2, false);
|
||||
bool is_sjis = args.get<bool>("japanese");
|
||||
|
||||
auto json = JSON::parse(read_input_data(args));
|
||||
BinaryTextAndKeyboardsSet a(json);
|
||||
auto result = a.serialize(args.get<bool>("big-endian"));
|
||||
auto result = a.serialize(args.get<bool>("big-endian"), is_sjis);
|
||||
if (output_filename.empty()) {
|
||||
if (input_filename.empty() || (input_filename == "-")) {
|
||||
throw runtime_error("encoded text archive cannot be written to stdout");
|
||||
|
||||
+28
-25
@@ -167,7 +167,7 @@ string UnicodeTextSet::serialize() const {
|
||||
return prs_compress_optimal(header_w.str());
|
||||
}
|
||||
|
||||
BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer) {
|
||||
BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer, bool is_sjis) {
|
||||
auto pr2_decrypted = decrypt_pr2_data<false>(pr2_data);
|
||||
auto decompressed = prs_decompress(pr2_decrypted.compressed_data);
|
||||
StringReader r(decompressed);
|
||||
@@ -187,6 +187,8 @@ BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_coun
|
||||
}
|
||||
used_offsets.emplace(root_offset);
|
||||
|
||||
auto& tt = is_sjis ? tt_sjis_to_utf8 : tt_8859_to_utf8;
|
||||
|
||||
collection_offsets_r.go(0);
|
||||
while (!collection_offsets_r.eof()) {
|
||||
auto& collection = this->collections.emplace_back();
|
||||
@@ -197,18 +199,18 @@ BinaryTextSet::BinaryTextSet(const std::string& pr2_data, size_t collection_coun
|
||||
for (uint32_t string_offset_offset = first_string_offset_offset;
|
||||
(string_offset_offset == first_string_offset_offset) || !used_offsets.count(string_offset_offset);
|
||||
string_offset_offset += 4) {
|
||||
collection.emplace_back(r.pget_cstr(r.pget_u32l(string_offset_offset)));
|
||||
collection.emplace_back(tt(r.pget_cstr(r.pget_u32l(string_offset_offset))));
|
||||
}
|
||||
} catch (const out_of_range&) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BinaryTextAndKeyboardsSet::BinaryTextAndKeyboardsSet(const string& pr2_data, bool big_endian) {
|
||||
BinaryTextAndKeyboardsSet::BinaryTextAndKeyboardsSet(const string& pr2_data, bool big_endian, bool is_sjis) {
|
||||
if (big_endian) {
|
||||
this->parse_t<true>(pr2_data);
|
||||
this->parse_t<true>(pr2_data, is_sjis);
|
||||
} else {
|
||||
this->parse_t<false>(pr2_data);
|
||||
this->parse_t<false>(pr2_data, is_sjis);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -271,19 +273,21 @@ void BinaryTextAndKeyboardsSet::resize_keyboards(size_t num_keyboards) {
|
||||
this->keyboards.resize(num_keyboards);
|
||||
}
|
||||
|
||||
pair<string, string> BinaryTextAndKeyboardsSet::serialize(bool big_endian) const {
|
||||
pair<string, string> BinaryTextAndKeyboardsSet::serialize(bool big_endian, bool is_sjis) const {
|
||||
if (big_endian) {
|
||||
return this->serialize_t<true>();
|
||||
return this->serialize_t<true>(is_sjis);
|
||||
} else {
|
||||
return this->serialize_t<false>();
|
||||
return this->serialize_t<false>(is_sjis);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool IsBigEndian>
|
||||
void BinaryTextAndKeyboardsSet::parse_t(const string& pr2_data) {
|
||||
void BinaryTextAndKeyboardsSet::parse_t(const string& pr2_data, bool is_sjis) {
|
||||
using U32T = std::conditional_t<IsBigEndian, be_uint32_t, le_uint32_t>;
|
||||
using U16T = std::conditional_t<IsBigEndian, be_uint16_t, le_uint16_t>;
|
||||
|
||||
auto& tt = is_sjis ? tt_sjis_to_utf8 : tt_8859_to_utf8;
|
||||
|
||||
// The structure is as follows:
|
||||
// Footer:
|
||||
// U32T keyboard_index_offset ->:
|
||||
@@ -341,16 +345,18 @@ void BinaryTextAndKeyboardsSet::parse_t(const string& pr2_data) {
|
||||
for (uint32_t string_offset_offset = first_string_offset_offset;
|
||||
(string_offset_offset == first_string_offset_offset) || !used_offsets.count(string_offset_offset);
|
||||
string_offset_offset += 4) {
|
||||
collection.emplace_back(r.pget_cstr(r.pget<U32T>(string_offset_offset)));
|
||||
collection.emplace_back(tt(r.pget_cstr(r.pget<U32T>(string_offset_offset))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool IsBigEndian>
|
||||
pair<string, string> BinaryTextAndKeyboardsSet::serialize_t() const {
|
||||
pair<string, string> BinaryTextAndKeyboardsSet::serialize_t(bool is_sjis) const {
|
||||
using U32T = std::conditional_t<IsBigEndian, be_uint32_t, le_uint32_t>;
|
||||
using U16T = std::conditional_t<IsBigEndian, be_uint16_t, le_uint16_t>;
|
||||
|
||||
auto& tt = is_sjis ? tt_utf8_to_sjis : tt_utf8_to_8859;
|
||||
|
||||
StringWriter w;
|
||||
::set<size_t> relocation_offsets;
|
||||
auto put_offset_u32 = [&](uint32_t v) {
|
||||
@@ -364,7 +370,7 @@ pair<string, string> BinaryTextAndKeyboardsSet::serialize_t() const {
|
||||
for (const auto& collection : this->collections) {
|
||||
for (const auto& s : collection) {
|
||||
if (string_to_offset.emplace(s, w.size()).second) {
|
||||
w.write(s);
|
||||
w.write(tt(s));
|
||||
w.put_u8(0);
|
||||
while (w.size() & 3) {
|
||||
w.put_u8(0);
|
||||
@@ -439,13 +445,10 @@ pair<string, string> BinaryTextAndKeyboardsSet::serialize_t() const {
|
||||
|
||||
const string& pr2_data = w.str();
|
||||
const string& pr3_data = reloc_w.str();
|
||||
print_data(stderr, pr2_data);
|
||||
string pr2_compressed = prs_compress_optimal(pr2_data.data(), pr2_data.size());
|
||||
string pr3_compressed = prs_compress_optimal(pr3_data.data(), pr3_data.size());
|
||||
print_data(stderr, pr2_compressed);
|
||||
string pr2_ret = encrypt_pr2_data<IsBigEndian>(pr2_compressed, pr2_data.size(), random_object<uint32_t>());
|
||||
string pr3_ret = encrypt_pr2_data<IsBigEndian>(pr3_compressed, pr3_data.size(), random_object<uint32_t>());
|
||||
print_data(stderr, pr2_ret);
|
||||
return make_pair(std::move(pr2_ret), std::move(pr3_ret));
|
||||
}
|
||||
|
||||
@@ -454,7 +457,7 @@ TextIndex::TextIndex(
|
||||
function<shared_ptr<const string>(Version, const string&)> get_patch_file)
|
||||
: log("[TextIndex] ", static_game_data_log.min_level) {
|
||||
if (!directory.empty()) {
|
||||
auto add_version = [&](Version version, const string& subdirectory, function<shared_ptr<TextSet>(const string&)> make_set) -> void {
|
||||
auto add_version = [&](Version version, const string& subdirectory, function<shared_ptr<TextSet>(const string&, bool)> make_set) -> void {
|
||||
static const map<string, uint8_t> bintext_filenames({
|
||||
{"TextJapanese.pr2", 0x00},
|
||||
{"TextEnglish.pr2", 0x01},
|
||||
@@ -484,7 +487,7 @@ TextIndex::TextIndex(
|
||||
this->add_set(version, it.second, make_shared<BinaryTextSet>(JSON::parse(load_file(json_path))));
|
||||
} else if (isfile(file_path)) {
|
||||
this->log.info("Loading %s %c binary text set from %s", name_for_enum(version), char_for_language_code(it.second), file_path.c_str());
|
||||
this->add_set(version, it.second, make_set(load_file(file_path)));
|
||||
this->add_set(version, it.second, make_set(load_file(file_path), it.second == 0));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -498,11 +501,11 @@ TextIndex::TextIndex(
|
||||
auto patch_file = get_patch_file ? get_patch_file(version, it.first) : nullptr;
|
||||
if (patch_file) {
|
||||
this->log.info("Loading %s %c Unicode text set from %s in patch tree", name_for_enum(version), char_for_language_code(it.second), it.first.c_str());
|
||||
this->add_set(version, it.second, make_set(*patch_file));
|
||||
this->add_set(version, it.second, make_set(*patch_file, it.second == 0));
|
||||
} else {
|
||||
if (isfile(file_path)) {
|
||||
this->log.info("Loading %s %c Unicode text set from %s", name_for_enum(version), char_for_language_code(it.second), file_path.c_str());
|
||||
this->add_set(version, it.second, make_set(load_file(file_path)));
|
||||
this->add_set(version, it.second, make_set(load_file(file_path), it.second == 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -510,12 +513,12 @@ TextIndex::TextIndex(
|
||||
}
|
||||
};
|
||||
|
||||
auto make_binary_dc112000 = +[](const string& data) { return make_shared<BinaryTextSet>(data, 21, true); };
|
||||
auto make_binary_dcnte_dcv1 = +[](const string& data) { return make_shared<BinaryTextSet>(data, 26, true); };
|
||||
auto make_binary_dcv2 = +[](const string& data) { return make_shared<BinaryTextSet>(data, 37, false); };
|
||||
auto make_binary_gc = +[](const string& data) { return make_shared<BinaryTextAndKeyboardsSet>(data, true); };
|
||||
auto make_binary_xb = +[](const string& data) { return make_shared<BinaryTextAndKeyboardsSet>(data, false); };
|
||||
auto make_unitxt = +[](const string& data) { return make_shared<UnicodeTextSet>(data); };
|
||||
auto make_binary_dc112000 = +[](const string& data, bool is_sjis) { return make_shared<BinaryTextSet>(data, 21, true, is_sjis); };
|
||||
auto make_binary_dcnte_dcv1 = +[](const string& data, bool is_sjis) { return make_shared<BinaryTextSet>(data, 26, true, is_sjis); };
|
||||
auto make_binary_dcv2 = +[](const string& data, bool is_sjis) { return make_shared<BinaryTextSet>(data, 37, false, is_sjis); };
|
||||
auto make_binary_gc = +[](const string& data, bool is_sjis) { return make_shared<BinaryTextAndKeyboardsSet>(data, true, is_sjis); };
|
||||
auto make_binary_xb = +[](const string& data, bool is_sjis) { return make_shared<BinaryTextAndKeyboardsSet>(data, false, is_sjis); };
|
||||
auto make_unitxt = +[](const string& data, bool) { return make_shared<UnicodeTextSet>(data); };
|
||||
|
||||
add_version(Version::DC_NTE, "dc-nte", make_binary_dcnte_dcv1);
|
||||
add_version(Version::DC_V1_11_2000_PROTOTYPE, "dc-11-2000", make_binary_dc112000);
|
||||
|
||||
+5
-5
@@ -53,7 +53,7 @@ class BinaryTextSet : public TextSet {
|
||||
public:
|
||||
explicit BinaryTextSet(const JSON& json) : TextSet(json) {}
|
||||
explicit BinaryTextSet(JSON&& json) : TextSet(json) {}
|
||||
BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer);
|
||||
BinaryTextSet(const std::string& pr2_data, size_t collection_count, bool has_rel_footer, bool is_sjis);
|
||||
~BinaryTextSet() = default;
|
||||
// TODO: Implement serialize functions
|
||||
};
|
||||
@@ -64,7 +64,7 @@ public:
|
||||
|
||||
explicit BinaryTextAndKeyboardsSet(const JSON& json);
|
||||
explicit BinaryTextAndKeyboardsSet(JSON&& json);
|
||||
BinaryTextAndKeyboardsSet(const std::string& pr2_data, bool big_endian);
|
||||
BinaryTextAndKeyboardsSet(const std::string& pr2_data, bool big_endian, bool is_sjis);
|
||||
~BinaryTextAndKeyboardsSet() = default;
|
||||
|
||||
virtual JSON json() const;
|
||||
@@ -77,13 +77,13 @@ public:
|
||||
void set_keyboard_selector_width(uint8_t width);
|
||||
|
||||
// Returns (pr2_data, pr3_data)
|
||||
std::pair<std::string, std::string> serialize(bool big_endian) const;
|
||||
std::pair<std::string, std::string> serialize(bool big_endian, bool is_sjis) const;
|
||||
|
||||
protected:
|
||||
template <bool IsBigEndian>
|
||||
void parse_t(const std::string& pr2_data);
|
||||
void parse_t(const std::string& pr2_data, bool is_sjis);
|
||||
template <bool IsBigEndian>
|
||||
std::pair<std::string, std::string> serialize_t() const;
|
||||
std::pair<std::string, std::string> serialize_t(bool is_sjis) const;
|
||||
|
||||
std::vector<std::unique_ptr<Keyboard>> keyboards;
|
||||
uint8_t keyboard_selector_width;
|
||||
|
||||
Reference in New Issue
Block a user