diff --git a/src/Main.cc b/src/Main.cc index 94bac908..e9ec0974 100644 --- a/src/Main.cc +++ b/src/Main.cc @@ -1244,6 +1244,60 @@ Action a_encode_unicode_text_set( write_output_data(args, encoded.data(), encoded.size(), "prs"); }); +Action a_decode_word_select_set( + "decode-word-select-set", "\ + decode-word-select-set [INPUT-FILENAME]\n\ + Decode a Word Select data file and print all the tokens. A version option\n\ + (e.g. --gc-ep3) is required. If the Word Select set is for PC or BB, the\n\ + --unitxt option is also required, and must point to a unitxt file in prs\n\ + or JSON format. For PC (V2), the unitxt_e.prs file should be used; for BB,\n\ + the unitxt_ws_e.prs file should be used.\n", + +[](Arguments& args) { + auto version = get_cli_version(args); + + string unitxt_filename = args.get("unitxt"); + vector unitxt_collection; + if (!unitxt_filename.empty()) { + vector> unitxt_data; + if (ends_with(unitxt_filename, ".prs")) { + unitxt_data = parse_unicode_text_set(load_file(unitxt_filename)); + } else if (ends_with(unitxt_filename, ".json")) { + auto json = JSON::parse(load_file(unitxt_filename)); + for (const auto& coll_it : json.as_list()) { + auto& coll = unitxt_data.emplace_back(); + for (const auto& str_it : coll_it->as_list()) { + coll.emplace_back(str_it->as_string()); + } + } + } else { + throw runtime_error("unitxt filename must end in .prs or .json"); + } + if (version == Version::BB_V4) { + unitxt_collection = std::move(unitxt_data.at(0)); + } else { + unitxt_collection = std::move(unitxt_data.at(35)); + } + } + + WordSelectSet ws(read_input_data(args), version, &unitxt_collection, args.get("japanese")); + ws.print(stdout); + }); + +Action a_generate_word_select_table( + "generate-word-select-table", nullptr, +[](Arguments& args) { + auto table = ServerState::load_word_select_table_from_system(); + Version v = Version::UNKNOWN; + try { + v = get_cli_version(args); + } catch (const runtime_error&) {} + + if (v != Version::UNKNOWN) { + table->print_index(stdout, v); + } else { + table->print(stdout); + } + }); + Action a_cat_client( "cat-client", "\ cat-client ADDR:PORT\n\ diff --git a/src/PSOEncryption.hh b/src/PSOEncryption.hh index 8bec3046..2de82293 100644 --- a/src/PSOEncryption.hh +++ b/src/PSOEncryption.hh @@ -5,10 +5,12 @@ #include #include +#include #include #include -#include "Text.hh" // for parray +#include "Compression.hh" +#include "Text.hh" class PSOEncryption { public: @@ -277,6 +279,16 @@ DecryptedPR2 decrypt_pr2_data(const std::string& data) { return ret; } +template +std::string decrypt_and_decompress_pr2_data(const std::string& data) { + auto decrypted = decrypt_pr2_data(data); + std::string decompressed = prs_decompress(decrypted.compressed_data); + if (decompressed.size() != decrypted.decompressed_size) { + throw std::runtime_error("decompressed size does not match expected size"); + } + return decompressed; +} + template std::string encrypt_pr2_data(const std::string& data, size_t decompressed_size, uint32_t seed) { using U32T = std::conditional_t; diff --git a/src/ServerState.cc b/src/ServerState.cc index 2a4dc44f..69bf428f 100644 --- a/src/ServerState.cc +++ b/src/ServerState.cc @@ -14,6 +14,7 @@ #include "NetworkAddresses.hh" #include "SendCommands.hh" #include "Text.hh" +#include "UnicodeTextSet.hh" using namespace std; @@ -1106,9 +1107,59 @@ void ServerState::load_level_table() { this->level_table = make_shared(*this->load_bb_file("PlyLevelTbl.prs"), true); } +shared_ptr ServerState::load_word_select_table_from_system() { + vector> name_alias_lists; + auto json = JSON::parse(load_file("system/word-select/name-alias-lists.json")); + for (const auto& coll_it : json.as_list()) { + auto& coll = name_alias_lists.emplace_back(); + for (const auto& str_it : coll_it->as_list()) { + coll.emplace_back(str_it->as_string()); + } + } + + config_log.info("(Word select) Loading pc_unitxt.prs"); + vector> pc_unitxt_data = parse_unicode_text_set(load_file("system/word-select/pc_unitxt.prs")); + config_log.info("(Word select) Loading bb_unitxt_ws.prs"); + vector> bb_unitxt_data = parse_unicode_text_set(load_file("system/word-select/bb_unitxt_ws.prs")); + vector pc_unitxt_collection = std::move(pc_unitxt_data.at(35)); + vector bb_unitxt_collection = std::move(bb_unitxt_data.at(0)); + + config_log.info("(Word select) Loading DC_NTE data"); + WordSelectSet dc_nte_ws(load_file("system/word-select/dc_nte_ws_data.bin"), Version::DC_NTE, nullptr, true); + config_log.info("(Word select) Loading DC_V1_11_2000_PROTOTYPE data"); + WordSelectSet dc_112000_ws(load_file("system/word-select/dc_112000_ws_data.bin"), Version::DC_V1_11_2000_PROTOTYPE, nullptr, false); + config_log.info("(Word select) Loading DC_V1 data"); + WordSelectSet dc_v1_ws(load_file("system/word-select/dcv1_ws_data.bin"), Version::DC_V1, nullptr, false); + config_log.info("(Word select) Loading DC_V2 data"); + WordSelectSet dc_v2_ws(load_file("system/word-select/dcv2_ws_data.bin"), Version::DC_V2, nullptr, false); + config_log.info("(Word select) Loading PC_NTE data"); + WordSelectSet pc_nte_ws(load_file("system/word-select/pc_nte_ws_data.bin"), Version::PC_NTE, &pc_unitxt_collection, false); + config_log.info("(Word select) Loading PC_V2 data"); + WordSelectSet pc_v2_ws(load_file("system/word-select/pc_ws_data.bin"), Version::PC_V2, &pc_unitxt_collection, false); + config_log.info("(Word select) Loading GC_NTE data"); + WordSelectSet gc_nte_ws(load_file("system/word-select/gc_nte_ws_data.bin"), Version::GC_NTE, nullptr, false); + config_log.info("(Word select) Loading GC_V3 data"); + WordSelectSet gc_v3_ws(load_file("system/word-select/gc_ws_data.bin"), Version::GC_V3, nullptr, false); + config_log.info("(Word select) Loading GC_EP3_NTE data"); + WordSelectSet gc_ep3_nte_ws(load_file("system/word-select/gc_ep3_nte_ws_data.bin"), Version::GC_EP3_NTE, nullptr, false); + config_log.info("(Word select) Loading GC_EP3 data"); + WordSelectSet gc_ep3_ws(load_file("system/word-select/gc_ep3_ws_data.bin"), Version::GC_EP3, nullptr, false); + config_log.info("(Word select) Loading XB_V3 data"); + WordSelectSet xb_v3_ws(load_file("system/word-select/xb_ws_data.bin"), Version::XB_V3, nullptr, false); + config_log.info("(Word select) Loading BB_V4 data"); + WordSelectSet bb_v4_ws(load_file("system/word-select/bb_ws_data.bin"), Version::BB_V4, &bb_unitxt_collection, false); + + config_log.info("(Word select) Generating table"); + return make_shared( + dc_nte_ws, dc_112000_ws, dc_v1_ws, dc_v2_ws, + pc_nte_ws, pc_v2_ws, gc_nte_ws, gc_v3_ws, + gc_ep3_nte_ws, gc_ep3_ws, xb_v3_ws, bb_v4_ws, + name_alias_lists); +} + void ServerState::load_word_select_table() { config_log.info("Loading Word Select table"); - this->word_select_table = make_shared(JSON::parse(load_file("system/word-select-table.json"))); + this->word_select_table = this->load_word_select_table_from_system(); } void ServerState::load_item_name_index() { diff --git a/src/ServerState.hh b/src/ServerState.hh index 18642aa3..2103ce0c 100644 --- a/src/ServerState.hh +++ b/src/ServerState.hh @@ -284,6 +284,7 @@ struct ServerState : public std::enable_shared_from_this { void load_level_table(); void load_item_name_index(); void load_item_tables(); + static std::shared_ptr load_word_select_table_from_system(); void load_word_select_table(); void load_ep3_data(); void resolve_ep3_card_names(); diff --git a/src/WordSelectTable.cc b/src/WordSelectTable.cc index ae986cb4..0067379d 100644 --- a/src/WordSelectTable.cc +++ b/src/WordSelectTable.cc @@ -5,68 +5,276 @@ #include #include +#include "Compression.hh" + using namespace std; -static void index_add(vector& index, uint16_t position, size_t value) { - if (position != 0xFFFF) { - if (index.size() <= position) { - index.resize(position + 1); +template +static vector read_direct_table(const StringReader& base_r, size_t offset, size_t count) { + vector ret; + auto entries_r = base_r.sub(offset, count * sizeof(ReadT)); + while (!entries_r.eof()) { + ret.emplace_back(entries_r.get()); + } + return ret; +} + +template +static vector> read_indirect_table(const StringReader& base_r, size_t offset, size_t count) { + vector> ret; + auto pointers_r = base_r.sub(offset, sizeof(OffsetT) * 2 * count); + while (!pointers_r.eof()) { + uint32_t sub_offset = pointers_r.get(); + uint32_t sub_count = pointers_r.get(); + ret.emplace_back(read_direct_table(base_r, sub_offset, sub_count)); + } + return ret; +} + +template +struct NonWindowsRoot { + using U32T = typename std::conditional::type; + U32T strings_table; + U32T table1; + U32T table2; + U32T token_id_to_string_id_table; + U32T table4; + U32T article_types_table; + U32T table6; +} __attribute__((packed)); + +struct PCV2Root { + le_uint32_t unknown_a1; + le_uint32_t unknown_a2; + le_uint32_t table1; + le_uint32_t table2; + le_uint32_t token_id_to_string_id_table; + le_uint32_t table4; + le_uint32_t article_types_table; + le_uint32_t table6; +} __attribute__((packed)); + +struct BBRoot { + le_uint32_t table1; + le_uint32_t table2; + le_uint32_t token_id_to_string_id_table; + le_uint32_t table4; + le_uint32_t article_types_table; + le_uint32_t table6; +} __attribute__((packed)); + +template +void WordSelectSet::parse_non_windows_t(const std::string& data, bool use_sjis) { + using U32T = typename std::conditional::type; + using U16T = typename std::conditional::type; + + StringReader r(data); + uint32_t root_offset = r.pget(r.size() - 0x10); + const auto& root = r.pget>(root_offset); + + { + auto string_offset_r = r.sub(root.strings_table, sizeof(U32T) * StringTableCount); + while (!string_offset_r.eof()) { + string raw_s = r.pget_cstr(string_offset_r.template get()); + this->strings.emplace_back(use_sjis ? tt_sjis_to_utf8(raw_s) : tt_8859_to_utf8(raw_s)); } - index[position] = value; } + + // this->table1 = read_indirect_table(r, root.table1, Table1Count); + // this->table2 = read_indirect_table(r, root.table2, Table2Count); + this->token_id_to_string_id = read_direct_table(r, root.token_id_to_string_id_table, TokenCount); + // this->table4 = read_indirect_table(r, root.table4, Table4Count); + // this->article_types = read_direct_table(r, root.article_types_table, ArticleTypesCount); + // this->table6 = read_indirect_table(r, root.table6, Table6Count); } -WordSelectTable::WordSelectTable(const JSON& json) { - this->tokens.reserve(json.size()); - for (const auto& item : json.as_list()) { - JSON dc_value_json = item->at(0); - JSON pc_value_json = item->at(1); - JSON gc_value_json = item->at(2); - JSON ep3_value_json = item->at(3); - JSON bb_value_json = item->at(4); - uint16_t dc_value = dc_value_json.is_null() ? 0xFFFF : dc_value_json.as_int(); - uint16_t pc_value = pc_value_json.is_null() ? 0xFFFF : pc_value_json.as_int(); - uint16_t gc_value = gc_value_json.is_null() ? 0xFFFF : gc_value_json.as_int(); - uint16_t ep3_value = ep3_value_json.is_null() ? 0xFFFF : ep3_value_json.as_int(); - uint16_t bb_value = bb_value_json.is_null() ? 0xFFFF : bb_value_json.as_int(); - this->tokens.emplace_back(Token{ - .dc_value = dc_value, - .pc_value = pc_value, - .gc_value = gc_value, - .ep3_value = ep3_value, - .bb_value = bb_value, - }); - index_add(this->dc_index, dc_value, this->tokens.size() - 1); - index_add(this->pc_index, pc_value, this->tokens.size() - 1); - index_add(this->gc_index, gc_value, this->tokens.size() - 1); - index_add(this->ep3_index, ep3_value, this->tokens.size() - 1); - index_add(this->bb_index, bb_value, this->tokens.size() - 1); +template +void WordSelectSet::parse_windows_t(const std::string& data, const std::vector* unitxt_collection) { + if (!unitxt_collection) { + throw runtime_error("a unitxt collection is required"); } + + StringReader r(data); + uint32_t root_offset = r.pget(r.size() - 0x10); + const auto& root = r.pget(root_offset); + this->strings = *unitxt_collection; + // this->table1 = read_indirect_table(r, root.table1, Table1Count); + // this->table2 = read_indirect_table(r, root.table2, Table2Count); + this->token_id_to_string_id = read_direct_table(r, root.token_id_to_string_id_table, TokenCount); + // this->table4 = read_indirect_table(r, root.table4, Table4Count); + // this->article_types = read_direct_table(r, root.article_types_table, ArticleTypesCount); + // this->table6 = read_indirect_table(r, root.table6, Table6Count); } -uint16_t WordSelectTable::Token::value_for_version(Version version) const { +WordSelectSet::WordSelectSet(const string& data, Version version, const vector* unitxt_collection, bool use_sjis) { switch (version) { - case Version::DC_NTE: + case Version::DC_NTE: { + if (data.size() < 4) { + throw runtime_error("data is too small"); + } + string decrypted = data.substr(0, data.size() - 4); + uint32_t seed = *reinterpret_cast(data.data() + data.size() - 4); + PSOV2Encryption crypt(seed); + crypt.decrypt(decrypted); + this->parse_non_windows_t(decrypted, use_sjis); + break; + } case Version::DC_V1_11_2000_PROTOTYPE: + this->parse_non_windows_t(decrypt_and_decompress_pr2_data(data), use_sjis); + break; case Version::DC_V1: case Version::DC_V2: - return this->dc_value; + this->parse_non_windows_t(decrypt_and_decompress_pr2_data(data), use_sjis); + break; case Version::PC_NTE: case Version::PC_V2: - return this->pc_value; + this->parse_windows_t(decrypt_and_decompress_pr2_data(data), unitxt_collection); + break; case Version::GC_NTE: - case Version::GC_V3: - case Version::XB_V3: - // TODO: Which index does GC_NTE use? Here we presume it's the same as GC, - // but this may not be true - return this->gc_value; + this->parse_non_windows_t(decrypt_and_decompress_pr2_data(data), use_sjis); + break; case Version::GC_EP3_NTE: + this->parse_non_windows_t(decrypt_and_decompress_pr2_data(data), use_sjis); + break; + case Version::GC_V3: case Version::GC_EP3: - return this->ep3_value; + this->parse_non_windows_t(decrypt_and_decompress_pr2_data(data), use_sjis); + break; + case Version::XB_V3: + this->parse_non_windows_t(decrypt_and_decompress_pr2_data(data), use_sjis); + break; case Version::BB_V4: - return this->bb_value; + this->parse_windows_t(decrypt_and_decompress_pr2_data(data), unitxt_collection); + break; default: - throw logic_error("invalid word select version"); + throw runtime_error("unsupported word select data version"); + } +} + +const string& WordSelectSet::string_for_token(uint16_t token_id) const { + return this->strings.at(this->token_id_to_string_id.at(token_id)); +} + +void WordSelectSet::print(FILE* stream) const { + fprintf(stream, "strings:\n"); + for (size_t z = 0; z < this->strings.size(); z++) { + fprintf(stream, " [%04zX] \"%s\"\n", z, this->strings[z].c_str()); + } + fprintf(stream, "token_id_to_string_id:\n"); + for (size_t z = 0; z < this->token_id_to_string_id.size(); z++) { + fprintf(stream, " [%04zX] %04zX \"%s\"\n", z, this->token_id_to_string_id[z], this->string_for_token(z).c_str()); + } +} + +WordSelectTable::WordSelectTable( + const WordSelectSet& dc_nte_ws, + const WordSelectSet& dc_112000_ws, + const WordSelectSet& dc_v1_ws, + const WordSelectSet& dc_v2_ws, + const WordSelectSet& pc_nte_ws, + const WordSelectSet& pc_v2_ws, + const WordSelectSet& gc_nte_ws, + const WordSelectSet& gc_v3_ws, + const WordSelectSet& gc_ep3_nte_ws, + const WordSelectSet& gc_ep3_ws, + const WordSelectSet& xb_v3_ws, + const WordSelectSet& bb_v4_ws, + const vector>& name_alias_lists) { + + unordered_map name_to_canonical_name; + for (const auto& alias_list : name_alias_lists) { + if (alias_list.size() < 2) { + continue; + } + auto it = alias_list.begin(); + auto canonical_name = *it; + for (it++; it != alias_list.end(); it++) { + name_to_canonical_name.emplace(*it, canonical_name); + } + } + + vector> dynamic_tokens; + { + for (size_t z = 0; z < 12; z++) { + auto& token = dynamic_tokens.emplace_back(make_shared()); + token->canonical_name = string_printf("__PLAYER_%zu_NAME__", z); + this->name_to_token.emplace(token->canonical_name, token); + } + auto& token = dynamic_tokens.emplace_back(make_shared()); + token->canonical_name = "__BLANK__"; + this->name_to_token.emplace(token->canonical_name, token); + } + + array ws_sets = { + &dc_nte_ws, &dc_112000_ws, &dc_v1_ws, &dc_v2_ws, + &pc_nte_ws, &pc_v2_ws, &gc_nte_ws, &gc_v3_ws, + &gc_ep3_nte_ws, &gc_ep3_ws, &xb_v3_ws, &bb_v4_ws}; + + for (size_t s_version = 0; s_version < ws_sets.size(); s_version++) { + Version version = static_cast(static_cast(Version::DC_NTE) + s_version); + const auto& ws_set = *ws_sets[s_version]; + auto& index = this->tokens_by_version.at(s_version); + + index.reserve(ws_set.num_tokens()); + for (size_t token_id = 0; token_id < ws_set.num_tokens(); token_id++) { + const string& str = ws_set.string_for_token(token_id); + + string canonical_name; + try { + canonical_name = name_to_canonical_name.at(str); + } catch (const out_of_range&) { + canonical_name = str; + } + + auto token_it = this->name_to_token.find(canonical_name); + if (token_it == this->name_to_token.end()) { + token_it = this->name_to_token.emplace(canonical_name, make_shared()).first; + token_it->second->canonical_name = std::move(canonical_name); + } + token_it->second->slot_for_version(version) = token_id; + index.emplace_back(token_it->second); + } + + size_t dynamic_token_base_id = ws_set.num_tokens(); + for (size_t z = 0; z < dynamic_tokens.size(); z++) { + auto& token = dynamic_tokens[z]; + token->slot_for_version(version) = dynamic_token_base_id + z; + index.emplace_back(token); + } + } +} + +void WordSelectTable::print(FILE* stream) const { + fprintf(stream, "DCN DC11 DCv1 DCv2 PCN PCv2 GCN GCv3 Ep3N Ep3 XBv3 BBv4 CANONICAL-NAME\n"); + for (const auto& it : this->name_to_token) { + const auto& token = it.second; + for (size_t z = 0; z < 12; z++) { + if (token->values_by_version[z] == 0xFFFF) { + fprintf(stream, " "); + } else { + fprintf(stream, "%04hX ", token->values_by_version[z]); + } + } + string serialized = JSON(token->canonical_name).serialize(); + fprintf(stream, "%s\n", serialized.c_str()); + } +} + +void WordSelectTable::print_index(FILE* stream, Version v) const { + fprintf(stream, " DCN DC11 DCv1 DCv2 PCN PCv2 GCN GCv3 Ep3N Ep3 XBv3 BBv4 CANONICAL-NAME\n"); + const auto& index = this->tokens_for_version(v); + for (size_t token_id = 0; token_id < index.size(); token_id++) { + const auto& token = index[token_id]; + fprintf(stream, "%04zX => ", token_id); + for (size_t z = 0; z < 12; z++) { + if (token->values_by_version[z] == 0xFFFF) { + fprintf(stream, " "); + } else { + fprintf(stream, "%04hX ", token->values_by_version[z]); + } + } + string serialized = JSON(token->canonical_name).serialize(); + fprintf(stream, "%s\n", serialized.c_str()); } } @@ -74,42 +282,18 @@ WordSelectMessage WordSelectTable::translate( const WordSelectMessage& msg, Version from_version, Version to_version) const { - const std::vector* index; - switch (from_version) { - case Version::DC_NTE: - case Version::DC_V1_11_2000_PROTOTYPE: - case Version::DC_V1: - case Version::DC_V2: - index = &this->dc_index; - break; - case Version::PC_NTE: - case Version::PC_V2: - index = &this->pc_index; - break; - case Version::GC_NTE: - case Version::GC_V3: - case Version::XB_V3: - // TODO: Which index does GC_NTE use? Here we presume it's the same as GC, - // but this may not be true - index = &this->gc_index; - break; - case Version::GC_EP3_NTE: - case Version::GC_EP3: - index = &this->ep3_index; - break; - case Version::BB_V4: - index = &this->bb_index; - break; - default: - throw logic_error("invalid word select version"); - } + const auto& index = this->tokens_for_version(from_version); WordSelectMessage ret; for (size_t z = 0; z < ret.tokens.size(); z++) { if (msg.tokens[z] == 0xFFFF) { ret.tokens[z] = 0xFFFF; } else { - ret.tokens[z] = this->tokens.at(index->at(msg.tokens[z])).value_for_version(to_version); + const auto& token = index.at(msg.tokens[z]); + if (!token) { + throw runtime_error(string_printf("token %04hX does not exist in the index", msg.tokens[z].load())); + } + ret.tokens[z] = token->slot_for_version(to_version); if (ret.tokens[z] == 0xFFFF) { throw runtime_error(string_printf("token %04hX has no translation", msg.tokens[z].load())); } @@ -121,3 +305,9 @@ WordSelectMessage WordSelectTable::translate( ret.unknown_a4 = msg.unknown_a4; return ret; } + +WordSelectTable::Token::Token() { + for (size_t z = 0; z < this->values_by_version.size(); z++) { + this->values_by_version[z] = 0xFFFF; + } +} diff --git a/src/WordSelectTable.hh b/src/WordSelectTable.hh index 132ad66e..d9e54f79 100644 --- a/src/WordSelectTable.hh +++ b/src/WordSelectTable.hh @@ -2,16 +2,84 @@ #include -#include #include #include #include "CommandFormats.hh" #include "QuestScript.hh" +class WordSelectSet { +public: + WordSelectSet(const std::string& data, Version version, const std::vector* unitxt_collection, bool use_sjis); + ~WordSelectSet() = default; + + inline size_t num_strings() const { + return this->strings.size(); + } + inline size_t num_tokens() const { + return this->token_id_to_string_id.size(); + } + + const std::string& string_for_token(uint16_t token_id) const; + + void print(FILE* stream) const; + +protected: + template + void parse_non_windows_t(const std::string& data, bool use_sjis); + template + void parse_windows_t(const std::string& data, const std::vector* unitxt_collection); + + std::vector strings; + std::vector token_id_to_string_id; + // Note: PC NTE and PC have exactly the same parameters + // => DC NTE DC112000 DCv1 DCv2 PCNTE/PC GC NTE GC XB Ep3 NTE Ep3 USA BB + // root: => 000074DC 000072A4 0000755C 0000755C 00002B50 0000AB04 0000BCAC 0000B620 0000B648 0000B914 0000B5FC + // u32 ???: => 00002A9C + // TODO + // u32 ???: => 00002B14 + // TODO + // u32 strings_table: => 00006338 0000612C 000063C0 000063C0 (unitxt) 00009208 00009C9C 00009C34 00009C5C 00009904 (unitxt) + // u32 string_offset[COUNT]: => 469 45E 467 467 (unitxt) 63F 804 67B 67C 804 (unitxt) + // char string[...\0] + // u32 table1: => 00000B90 00000B54 00000D3C 00000D3C 00001018 0000100C 000012F0 000012F0 000012F0 000011D0 000012F0 + // {u32 offset, u32 count}[COUNT]: => 94 122 93 93 F9 F9 126 126 126 17F 126 + // u16[count] + // u32 table2: => 00001178 00001108 00001300 00001300 000019D8 000019CC 00001EE8 00001EE8 00001EE8 00001DC8 00001EE8 + // {u32 offset, u32 count}[COUNT]: => 7 7 7 7 7 7 13 13 13 13 13 + // u16[count] + // u32 token_id_to_string_id_table => 000011B0 00001140 00001338 00001338 00001A10 00001A04 00001F80 00001F80 00001F80 00001E60 00001F80 + // u16[COUNT] string_id_for_token_id => 466 44B 457 457 645 693 68C 68C 68C 68C 68C + // u32 table4: => 00001A5C 00001B08 00001D1C 00001D1C 000027D0 000027C4 00002DCC 00002DCC 00002DCC 00002CAC 00002DCC + // (non-NTE) {u32 offset, u32 count}[COUNT]: => 2 2 2 2 2 2 2 2 2 2 + // u16[count] + // (NTE) u16[COUNT] => E1 + // u32 article_types_table: => 00001C1E 00001B18 00001D2C 00001D2C 000027E0 000027D4 00002DDC 00002DDC 00002DDC 00002CBC 00002DDC + // u8[COUNT] article_types => 1C8 166 166 166 266 266 28A 28A 28A 28A 266 + // u32 table6: => 00001E28 00001CBC 00001ED0 00001ED0 00002A84 00002A78 000030A4 000030A4 000030A4 00002F84 00003080 + // {u32 offset, u32 count}[3]: + // u16[count] +}; + class WordSelectTable { public: - explicit WordSelectTable(const JSON& json); + WordSelectTable( + const WordSelectSet& dc_nte_ws, + const WordSelectSet& dc_112000_ws, + const WordSelectSet& dc_v1_ws, + const WordSelectSet& dc_v2_ws, + const WordSelectSet& pc_nte_ws, + const WordSelectSet& pc_v2_ws, + const WordSelectSet& gc_nte_ws, + const WordSelectSet& gc_v3_ws, + const WordSelectSet& gc_ep3_nte_ws, + const WordSelectSet& gc_ep3_ws, + const WordSelectSet& xb_v3_ws, + const WordSelectSet& bb_v4_ws, + const std::vector>& name_alias_lists); + + void print(FILE* stream) const; + void print_index(FILE* stream, Version v) const; WordSelectMessage translate( const WordSelectMessage& msg, @@ -20,18 +88,23 @@ public: private: struct Token { - uint16_t dc_value; - uint16_t pc_value; - uint16_t gc_value; - uint16_t ep3_value; - uint16_t bb_value; + std::array values_by_version; + std::string canonical_name; - uint16_t value_for_version(Version version) const; + Token(); + + inline uint16_t& slot_for_version(Version version) { + return this->values_by_version.at(static_cast(version) - static_cast(Version::DC_NTE)); + } + inline uint16_t slot_for_version(Version version) const { + return this->values_by_version.at(static_cast(version) - static_cast(Version::DC_NTE)); + } }; - std::vector dc_index; - std::vector pc_index; - std::vector gc_index; - std::vector ep3_index; - std::vector bb_index; - std::vector tokens; + + std::map> name_to_token; + std::array>, 12> tokens_by_version; + + inline const std::vector>& tokens_for_version(Version version) const { + return this->tokens_by_version.at(static_cast(version) - static_cast(Version::DC_NTE)); + } }; diff --git a/system/word-select/bb_unitxt_ws.prs b/system/word-select/bb_unitxt_ws.prs new file mode 100644 index 00000000..5e095828 Binary files /dev/null and b/system/word-select/bb_unitxt_ws.prs differ diff --git a/system/word-select/bb_ws_data.bin b/system/word-select/bb_ws_data.bin new file mode 100644 index 00000000..02513f29 Binary files /dev/null and b/system/word-select/bb_ws_data.bin differ diff --git a/system/word-select/dc_112000_ws_data.bin b/system/word-select/dc_112000_ws_data.bin new file mode 100644 index 00000000..af0961b7 Binary files /dev/null and b/system/word-select/dc_112000_ws_data.bin differ diff --git a/system/word-select/dc_nte_ws_data.bin b/system/word-select/dc_nte_ws_data.bin new file mode 100644 index 00000000..f17cb007 Binary files /dev/null and b/system/word-select/dc_nte_ws_data.bin differ diff --git a/system/word-select/dcv1_ws_data.bin b/system/word-select/dcv1_ws_data.bin new file mode 100644 index 00000000..755615f6 Binary files /dev/null and b/system/word-select/dcv1_ws_data.bin differ diff --git a/system/word-select/dcv2_ws_data.bin b/system/word-select/dcv2_ws_data.bin new file mode 100644 index 00000000..079afbb6 Binary files /dev/null and b/system/word-select/dcv2_ws_data.bin differ diff --git a/system/word-select/gc_ep3_nte_ws_data.bin b/system/word-select/gc_ep3_nte_ws_data.bin new file mode 100755 index 00000000..d67b9d77 Binary files /dev/null and b/system/word-select/gc_ep3_nte_ws_data.bin differ diff --git a/system/word-select/gc_ep3_ws_data.bin b/system/word-select/gc_ep3_ws_data.bin new file mode 100755 index 00000000..12526b8d Binary files /dev/null and b/system/word-select/gc_ep3_ws_data.bin differ diff --git a/system/word-select/gc_nte_ws_data.bin b/system/word-select/gc_nte_ws_data.bin new file mode 100755 index 00000000..67bb4cfb Binary files /dev/null and b/system/word-select/gc_nte_ws_data.bin differ diff --git a/system/word-select/gc_ws_data.bin b/system/word-select/gc_ws_data.bin new file mode 100755 index 00000000..635bc2fe Binary files /dev/null and b/system/word-select/gc_ws_data.bin differ diff --git a/system/word-select/name-alias-lists.json b/system/word-select/name-alias-lists.json new file mode 100644 index 00000000..714b58d1 --- /dev/null +++ b/system/word-select/name-alias-lists.json @@ -0,0 +1,256 @@ +[ + ["CADUCEUS", "CADUSEUS"], + ["AREA:MINE", "AREA:MACHINE MINE"], + ["AREA:RUINS", "AREA:ANCIENT"], + ["ARMS", "arms"], + ["EVADE MATERIAL", "AVOID MATERIAL"], + ["Android,\nlay down a trap for me.", "Android, \nlay down a trap for me."], + ["Are you ready to\ngo on an adventure?", "Are you ready to\ngo adventure?", "Are you ready to go adventure?"], + ["CHAINSAWD", "CHAINSWORD"], + ["CHRISTMAS PRESENT", "CHIRISTMAS PRESENT"], + ["PIONEER 2", "CITY"], + ["CUSTOM BARRIER ver.00", "CUSTOM BARRIER ver.OO"], + ["CUSTOM RAY ver.00", "CUSTOM RAY", "CUSTOM RAY ver.OO"], + ["Can I join in?\nI'm a beginner.", "Can I join in? I'm a beginner."], + ["Can you come again\nwhen you have\nmore experience?", "Can you come again when you have more experience?"], + ["Can you come over\nto see me?", "Can you come over to see me?"], + ["Can you play some\nmore?", "Can you play some more?"], + ["Can you wait for me\nfor just a little while\nlonger?", "Can you wait for me for just a little while longer?"], + ["DAL RA LIE", "DAL LA LIE"], + ["DARK BELRA", "DARTH BELRA"], + ["DULGER", "DURGA"], + ["Did we come to this\nplace before?", "Did we come to this place before?"], + ["Did you restore\nyour HP?", "Did you restore your HP?"], + ["Do you want\nanything from me?", "Do you want anything\nfrom me?", "Do you want anything from me?"], // TODO CHECK + ["Do you want to chat\nwith me?", "Do you want to chat with me?"], + ["Drop a Mag for me!", "Drop a Mag!"], + ["Drop a weapon for me!", "Drop a weapon!"], + ["Drop some armor for me!", "Drop some armor!"], + ["Forget about them.\nLet's move.", "Forget about them. Let's move."], // TODO CHECK + ["Give me some armor.", "Give me some armors."], + ["Give me your GUILD CARD.", "Give me your Guild Card."], + ["Go into the\nTELEPORTER.", "Go into the\nTRANSPORTER.", "Go into the TRANSPORTER."], + ["Good to see you.\nGot time to talk?", "Good to see you. Got time to talk?"], + ["H&S25 JUSTICE", "HS25 JUSTICE"], + ["Hang on a second.\nI'll be there.", "Hang on a second. I'll be there."], + ["Hello!? Are you\nwith me?", "Hello!? Are you with me?"], + ["Help me out! I can't\nrun away!", "Help me out! I can't run away!"], + ["Help, I have almost\nno HP!", "Help, I have almost no HP!"], + ["Here's a MONOFLUID.", "Here's a MONOFLUIO."], + ["Here's a MONOMATE.", "Here's a MONOMAIT."], + ["Here's my GUILD CARD.", "Here's my Guild Card."], + ["Hey, what happened?", "Hey, what happened??"], + ["Hi. Can you go\nadventuring with me?", "Hi. Can you go adventuring with me?"], + ["How about going\nto the east?", "How about going to the east?"], + ["How about going\nto the north?", "How about going to the north?"], + ["How about going\nto the south?", "How about going to the south?"], + ["How about going\nto the west?", "How about going to the west?"], + ["How about the day\nafter tomorrow?", "How about the day after tomorrow?"], + ["I also want to go\nadventure.", "I also want to go adventure."], + ["I can't tell right\nnow.", "I can't tell right now."], + ["I don't understand\nwhat you mean.", "I don't understand\nthe meaning.", "I don't understand the meaning."], + ["I doubt you've heard\nof my country.\nIt's very small.", "I doubt you've heard of my country. It's very small."], + ["I have to leave\nnow.", "I have to leave now."], + ["I prefer Chatting\nrather than going\non an adventure.", "I prefer Chatting rather than going on an adventure."], + ["I prefer going on\na free adventure,\nnot a QUEST.", "I prefer going on\na free adventure,\nnot in a QUEST.", "I prefer going on a free adventure, not in a QUEST."], + ["I returned to the\nprevious area.", "I returned to the\nprevious room.", "I returned to the previous room."], + ["I think it's about time\nto stop playing...", "I think it's about time to stop playing..."], + ["I think we should\ngo together.", "I think we should go together."], + ["I want to continue\nthe adventure.", "I want to continue the adventure."], + ["I want to go on\na free adventure.", "I want to go on a free adventure."], + ["I'll cast ANTI on\nyou.", "I'll cast ANTI on you."], + ["I'll cast DEBAND on\nyou.", "I'll cast DEBAND on you."], + ["I'll cast RESTA on\nyou.", "I'll cast RESTA on you."], + ["I'll cast RYUKER.", "I'll cast REUKER on\nyou.", "I'll cast REUKER on you."], + ["I'll cast SHIFTA on\nyou.", "I'll cast SHIFTA on you."], + ["I'll generate the\nPHOTON BLAST!", "I'll generate the PHOTON BLAST!"], + ["I'll get closer to\nyou.", "I'll get closer to you."], + ["I'll lead the enemies\nover here. Somebody\nback me up.", "I'll lead the enemies\nover here. Somebody\nsupport me.", "I'll lead the enemies over here. Somebody support me."], + ["I'll take care of\nthis!", "I'll take care of this!"], + ["I'll turn on the\nswitch.", "I'll turn on the switch."], + ["I'm coming to\nsee you.", "I'm coiming to see you."], + ["I'm coming to save\nyou. Hold on.", "I'm coming to save you. Hold on."], + ["I'm coming to save you,\nbut it will take time.", "I'm coming to save you, but it will take time."], + ["I'm going ahead of\nyou.", "I'm going ahead of you."], + ["I'm going in.\nBack me up, OK?", "I'm going in. Back me up, OK?"], + ["I'm going to be\nkilled.", "I'm going to be killed."], + ["I'm not ready.\nHold on.", "I'm not ready. Hold on."], + ["Is this the right\ndirection?", "Is this the right direction?"], + ["It's OK! We can do\nit!", "It's OK! We can do it!"], + ["It's a waste of time.\nCome later.", "It's a waste of time. Come later."], + ["JUSTY-23ST", "JUSTY'23ST"], + ["Just joking.", "Jusy joking."], + ["L&K14 COMBAT", "LK14 COMBAT"], + ["Let me take care\nof them.", "Let me take care of them."], + ["Let's exchange\nGUILD CARDS.", "Let's exchange\nGUILD-CARDs", "Let's exchange COM-CARDs"], + ["Let's get out of\nhere.", "Let's get out of here."], + ["Let's go back to\nthe previous room.", "Let's go back to the previous room."], + ["Let's move to the\nnext area.", "Let's move to the next area."], + ["Let's split up into\ntwo groups.", "Let's split up into two groups."], + ["Let's trade\nsomething.", "Let's trade something."], + ["Long time no see.", "Long time no see you."], + ["M&A60 VISE", "MA60 VISE"], + ["MADHU", "MADU"], + ["MASTER/ABILITY", "MASTER/ABILYTY"], + ["MELQUEEK", "MELQEEK"], + ["MERLAN", "MELRAN"], + ["Nice playing with\nyou.", "Nice playing with you."], + ["No problem. I know\nthe route.", "No problem. I know the route."], + ["No way! I'll help\nyou!", "No way! I'll help you!"], + ["No, I want to play\nsome more!", "No, I want to play some more!"], + ["No, I'll go there.\nWhere are you?", "No, I'll go there. Where are you?"], + ["No, I've played this\ngame for some time.", "No, I've played this game for some time."], + ["OK!", "OK"], + ["OK, I'll catch up\nwith you later.", "OK, I'll catch up with you later."], + ["OK, I'll wait\nfor you here.", "OK, I'll wait for you here."], + ["OK, let me create\nit.", "OK, let me create it."], + ["OK, what do you\nwant?", "OK, what do you want?"], + ["OK, where shall\nwe go?", "OK, where shall we go?"], + ["OK, where shall\nwe meet?", "OK, where shall we meet?"], + ["OK.\nBut wait a minute.", "OK. I'll be right\nback."], + ["Okay,\nI'm going to throw the switch.\nEverybody get ready to run like mad!", "Okay,I'm going to throw the switch.\nEverybody get ready to run like mad!"], + ["P-ARM'S ARMS", "P-ARM's ARMs"], + ["PSYCHO", "PSYCHO WAND"], + ["Please cast ANTI\non me!", "Please cast ANTI on me!"], + ["Please cast RESTA\non me!", "Please cast RESTA on me!"], + ["Please clear the\ntrap.", "Please clear the trap."], + ["Please create a\nTEAM.", "Please create a TEAM."], + ["Please send me\nyour SIMPLE MAIL again\na little later.", "Please send me your\nSIMPLE MAIL again\na little later.", "Please send me your SHORT MAIL again a little later."], + ["Please take care\nof them!", "Please take care of them!"], + ["Please use\nWORD SELECT.", "Please use WORD SELECT."], + ["Please wait for\nme there.", "Please wait for me there."], + ["RUDRA", "RUDORA"], + ["Really?", "Really\u00EF\u00BC\u009F"], + ["Russia", "Rossiya"], + ["SHIELD", "Shield"], + ["STANDSTILL SHIELD", "STANDDTILL SHIELD"], + ["DRAW", "SUCK"], + ["Select Form.", "Select form."], + ["Select Subject.", "Select subject."], + ["Select Target.", "Select target."], + ["Select Topic.", "Select topic."], + ["Shall we go and get\na QUEST first?", "Shall we go and get\na QUEST?", "Shall we go and get a QUEST?"], + ["Shall we go back\nto PIONEER 2?", "Shall we go back\nto the Morgue?", "Shall we go back\nto PIONEER 2", "Shall we go back\nto the CITY?", "Shall we go back to the CITY?"], + ["Shall we go back ?", "Shall we go back?"], + ["Shall we go to the\nCHECK ROOM first?", "Shall we go to the CHECK ROOM first?"], + ["Shall we go to the\nMEDICAL ROOM first?", "Shall we go to the MEDICAL ROOM first?"], + ["Shall we go to the\nPRINCIPAL'S ROOM first?", "Shall we go to\nthe PRINCIPAL'S ROOM first?", "Shall we go to the PRINCIPAL'S ROOM first?"], + ["Shall we go to the\nSHOPS first?", "Shall we go to the\nshops first?", "Shall we go to the shops first?"], + ["Shall we go to the\nTEKKER first?", "Shall we go to the TEKKER first?"], + ["Shall we go to\nthe VISUAL LOBBY first?", "Shall we go to the\nVISUAL LOBBY first?", "Shall we go to the VISUAL LOBBY first?"], + ["Shall we retreat\nnow?", "Shall we retreat now?"], + ["Somebody go and\nhelp.", "Somebody go and help."], + ["Somebody please\nrestore my HP!", "Somebody please restore my HP!"], + ["Somebody please\nrestore my TP!", "Somebody please restore my TP!"], + ["Sorry to keep you\nwaiting.", "Sorry to keep you waiting."], + ["Sorry, I can't save\nyou.", "Sorry, I can't save you."], + ["Sorry, I can't!", "Sorry, I can_t."], + ["Sorry, I don't have\nany good items.", "Sorry, I don't have any good items."], + ["Sorry, I have an\nappointment.", "Sorry,\nI have an appointment.", "Sorry, I have an appointment."], + ["Sorry, I have no\nTechnique to save\nyou.", "Sorry, I have no Technique to save you."], + ["Sorry, I have no\ntime.", "Sorry, I have no time."], + ["Sorry, I have nothing\nright now.", "Sorry, I have nothing right now."], + ["Sorry, I have nothing\nthat'd help you right now.", "Sorry, I have nothing that'd help you right now."], + ["Sorry, I wasn't paying\nattention.", "Sorry, I wasn't paying attention."], + ["Sorry, I'm waiting\nfor my friends.", "Sorry,\nI'm waiting\nfor my friends.", "Sorry, I'm waiting for my friends."], + ["Sorry,I can't help\nyou.", "Sorry,I can't help you."], + ["Sure thing!", "Sure,"], + ["Sure, go ahead.", "Sure, go ahead"], + ["Sure, let me handle\nthis!", "Sure, let me handle this!"], + ["TOPIC: MAG", "TOPIC: BARRIER/MAG Card (FORCE)"], + ["TOPIC: TECHNIQUE", "TOPIC: TECHINIQUE", "TOPIC: Technique Card"], + ["Thanks for your\nhelp.", "Thanks for your help."], + ["The enemies here\nare too strong.\nLet's leave.", "The enemies here are too strong. Let's leave."], + ["There are some\nmore enemies left.", "There are some more enemies left."], + ["There are still enemies\nto defeat.", "There are still enemies to defeat."], + ["There are still items\nto get.", "There are still items to get."], + ["There must be a\nswitch.", "There must be a switch."], + ["This door won't\nopen.", "This door won't open."], + ["This is the first time\nI've played the game.", "This is the first time I've played the game."], + ["This way.\nFollow me.", "This way. Follow\nme.", "This way. Follow me."], + ["U.S.A.", "U.S.A"], + ["Mexico", "United Mexican States"], + ["VISK-235W", "VISK'235W"], + ["VOL OPT ver.2", "VOL OPT Ver2"], + ["WALS-MK2", "WALS'MK2"], + ["Want me to lend you a MAG?", "Want me to lend you a Mag?"], + ["Watch out for the\nenemies!", "Watch out for the enemies!"], + ["Watch out.\nThere's a trap here.", "Watch out. There's a trap here."], + ["We can't move unless\nwe kill all of them.", "We can't move unless we kill all of them."], + ["We don't need to.\nLet's go.", "We don't need to. Let's go."], + ["What is our TEAM\nname?", "What is our TEAM name?"], + ["What is the name\nof your TEAM?", "What is the name of your TEAM?"], + ["What is your\nlanguage setting?", "What is\nyour language setting?", "What is your language setting?"], + ["What level are\nyou?", "What level are\nyou ?", "What level are you ?", "What level are you?"], + ["What's wrong with\nyou?", "What's wrong with you?"], + ["When can we meet\nagain?", "When can we meet again?"], + ["Which AREA do you\nwant to go to?", "Which AREA do you want to go to?"], + ["Which QUEST do you\nwant to play?", "Which QUEST do you want to play?", "Which TEAM\nwill you join?"], + ["Which TEAM will\nyou join?", "Which TEAM will you\njoin in?", "Which TEAM will you join in?", "Which country\ndo you live in?"], + ["Which country do\nyou live in?", "Which country do you live in?", "Which direction shall\nwe go?", "Which direction shall we go?"], + ["Which lobby are\nyou in now?", "Which lobby are you in now?"], + ["Who is creating a TEAM?", "Who creating a TEAM?"], + ["Why don't we take\na break?", "Why don't we\ntake a break?", "Why don't we take\na rest?", "Why don't we take a rest?"], + ["Will you go with\nme?", "Will you go with me?"], + ["Will you still be\nstaying here?", "Will you still stay\nhere?", "Will you still stay here?"], + ["YASMINKOV 2000H", "YASMINKOV 2000"], + ["YASMINKOV 3000R", "YASMINKOV 3000"], + ["YELLOWBOZE", "YELLOWBOOZE"], + ["You create a TEAM,\nplease.", "You create a TEAM, please."], + ["You create a Team,\nplease.", "You create a Team, please."], + ["are you going to meet", "are you meeting with"], + ["are you headed for", "are you headed"], + ["are you venturing", "are you traveling"], + ["art appreciation", "art"], + ["bad fortune", "bad luck"], + ["body boarding", "body boad"], + ["camping", "camp"], + ["can equip with", "can equip"], + ["can't equip with", "can't equip"], + ["canoeing", "canoe"], + ["collecting things", "collection"], + ["commanding the PC", "using computers"], + ["don't stay", "don't stay in"], + ["driving a car", "drive a car"], + ["bonsai trees", "dwarf tree"], + ["equiped with", "equipped"], + ["film appreciation", "animated film"], + ["go move on toward", "go move on towards"], + ["going to the movies", "appreciateing movie"], + ["is exiting the game", "is quiting the game"], + ["is leading", "is heading"], + ["is returning", "returning"], + ["listening to music", "appreciateing music"], + ["looked older", "looking older"], + ["making art", "art"], + ["running marathons", "marathon"], + ["mountain biking", "riding a mountain bike"], + ["must stay", "must stay in"], + ["playing American football", "playing american football"], + ["playing ice hockey", "playing Ice hockey"], + ["playing piano", "playing a piano"], + ["playing video games", "playing a video game"], + ["playing basketball", "pleying basketball"], + ["playing darts", "playing dart"], + ["powered down with \"DEF DOWN\"", "powered up with \"DE DOWN\""], + ["powered up with \"DEF UP\"", "powered up with \"DE UP\""], + ["playing online games", "ON-LINE GAME"], + ["puzzles", "puzzle"], + ["reading comics", "reading a cartoon"], + ["rollerblading", "roller blade"], + ["snowboarding", "snow board"], + ["sports", "sport"], + ["stay", "stay in"], + ["taking photographs", "takeing a photograph"], + ["taking effect of \"SHOCK\"", "taking effect of \"SHORT\""], + ["the next room", "the next loom"], + ["travelling", "travel"], + ["was taken", "did you lose"], + ["watching TV", "watching the television"], + ["will equip with", "will equip"], + ["will gather", "will gather at"], + ["will move on toward", "will move on towards"], + ["will you bet on", "will you bet"], + ["will you exit", "will you quit"], + ["will you take with you", "will you take with"], +] diff --git a/system/word-select/pc_nte_unitxt.prs b/system/word-select/pc_nte_unitxt.prs new file mode 100644 index 00000000..fcc8e0b3 Binary files /dev/null and b/system/word-select/pc_nte_unitxt.prs differ diff --git a/system/word-select/pc_nte_ws_data.bin b/system/word-select/pc_nte_ws_data.bin new file mode 100644 index 00000000..719e8266 Binary files /dev/null and b/system/word-select/pc_nte_ws_data.bin differ diff --git a/system/word-select/pc_unitxt.prs b/system/word-select/pc_unitxt.prs new file mode 100644 index 00000000..29a60fa9 Binary files /dev/null and b/system/word-select/pc_unitxt.prs differ diff --git a/system/word-select/pc_ws_data.bin b/system/word-select/pc_ws_data.bin new file mode 100644 index 00000000..baf9dd49 Binary files /dev/null and b/system/word-select/pc_ws_data.bin differ diff --git a/system/word-select/xb_ws_data.bin b/system/word-select/xb_ws_data.bin new file mode 100644 index 00000000..50248ea0 Binary files /dev/null and b/system/word-select/xb_ws_data.bin differ