rewrite word select table to support all versions
This commit is contained in:
+54
@@ -1244,6 +1244,60 @@ Action a_encode_unicode_text_set(
|
||||
write_output_data(args, encoded.data(), encoded.size(), "prs");
|
||||
});
|
||||
|
||||
Action a_decode_word_select_set(
|
||||
"decode-word-select-set", "\
|
||||
decode-word-select-set [INPUT-FILENAME]\n\
|
||||
Decode a Word Select data file and print all the tokens. A version option\n\
|
||||
(e.g. --gc-ep3) is required. If the Word Select set is for PC or BB, the\n\
|
||||
--unitxt option is also required, and must point to a unitxt file in prs\n\
|
||||
or JSON format. For PC (V2), the unitxt_e.prs file should be used; for BB,\n\
|
||||
the unitxt_ws_e.prs file should be used.\n",
|
||||
+[](Arguments& args) {
|
||||
auto version = get_cli_version(args);
|
||||
|
||||
string unitxt_filename = args.get<string>("unitxt");
|
||||
vector<string> unitxt_collection;
|
||||
if (!unitxt_filename.empty()) {
|
||||
vector<vector<string>> unitxt_data;
|
||||
if (ends_with(unitxt_filename, ".prs")) {
|
||||
unitxt_data = parse_unicode_text_set(load_file(unitxt_filename));
|
||||
} else if (ends_with(unitxt_filename, ".json")) {
|
||||
auto json = JSON::parse(load_file(unitxt_filename));
|
||||
for (const auto& coll_it : json.as_list()) {
|
||||
auto& coll = unitxt_data.emplace_back();
|
||||
for (const auto& str_it : coll_it->as_list()) {
|
||||
coll.emplace_back(str_it->as_string());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw runtime_error("unitxt filename must end in .prs or .json");
|
||||
}
|
||||
if (version == Version::BB_V4) {
|
||||
unitxt_collection = std::move(unitxt_data.at(0));
|
||||
} else {
|
||||
unitxt_collection = std::move(unitxt_data.at(35));
|
||||
}
|
||||
}
|
||||
|
||||
WordSelectSet ws(read_input_data(args), version, &unitxt_collection, args.get<bool>("japanese"));
|
||||
ws.print(stdout);
|
||||
});
|
||||
|
||||
Action a_generate_word_select_table(
|
||||
"generate-word-select-table", nullptr, +[](Arguments& args) {
|
||||
auto table = ServerState::load_word_select_table_from_system();
|
||||
Version v = Version::UNKNOWN;
|
||||
try {
|
||||
v = get_cli_version(args);
|
||||
} catch (const runtime_error&) {}
|
||||
|
||||
if (v != Version::UNKNOWN) {
|
||||
table->print_index(stdout, v);
|
||||
} else {
|
||||
table->print(stdout);
|
||||
}
|
||||
});
|
||||
|
||||
Action a_cat_client(
|
||||
"cat-client", "\
|
||||
cat-client ADDR:PORT\n\
|
||||
|
||||
+13
-1
@@ -5,10 +5,12 @@
|
||||
|
||||
#include <memory>
|
||||
#include <phosg/Encoding.hh>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Text.hh" // for parray
|
||||
#include "Compression.hh"
|
||||
#include "Text.hh"
|
||||
|
||||
class PSOEncryption {
|
||||
public:
|
||||
@@ -277,6 +279,16 @@ DecryptedPR2 decrypt_pr2_data(const std::string& data) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <bool IsBigEndian>
|
||||
std::string decrypt_and_decompress_pr2_data(const std::string& data) {
|
||||
auto decrypted = decrypt_pr2_data<IsBigEndian>(data);
|
||||
std::string decompressed = prs_decompress(decrypted.compressed_data);
|
||||
if (decompressed.size() != decrypted.decompressed_size) {
|
||||
throw std::runtime_error("decompressed size does not match expected size");
|
||||
}
|
||||
return decompressed;
|
||||
}
|
||||
|
||||
template <bool IsBigEndian>
|
||||
std::string encrypt_pr2_data(const std::string& data, size_t decompressed_size, uint32_t seed) {
|
||||
using U32T = std::conditional_t<IsBigEndian, be_uint32_t, le_uint32_t>;
|
||||
|
||||
+52
-1
@@ -14,6 +14,7 @@
|
||||
#include "NetworkAddresses.hh"
|
||||
#include "SendCommands.hh"
|
||||
#include "Text.hh"
|
||||
#include "UnicodeTextSet.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -1106,9 +1107,59 @@ void ServerState::load_level_table() {
|
||||
this->level_table = make_shared<LevelTableV4>(*this->load_bb_file("PlyLevelTbl.prs"), true);
|
||||
}
|
||||
|
||||
shared_ptr<WordSelectTable> ServerState::load_word_select_table_from_system() {
|
||||
vector<vector<string>> name_alias_lists;
|
||||
auto json = JSON::parse(load_file("system/word-select/name-alias-lists.json"));
|
||||
for (const auto& coll_it : json.as_list()) {
|
||||
auto& coll = name_alias_lists.emplace_back();
|
||||
for (const auto& str_it : coll_it->as_list()) {
|
||||
coll.emplace_back(str_it->as_string());
|
||||
}
|
||||
}
|
||||
|
||||
config_log.info("(Word select) Loading pc_unitxt.prs");
|
||||
vector<vector<string>> pc_unitxt_data = parse_unicode_text_set(load_file("system/word-select/pc_unitxt.prs"));
|
||||
config_log.info("(Word select) Loading bb_unitxt_ws.prs");
|
||||
vector<vector<string>> bb_unitxt_data = parse_unicode_text_set(load_file("system/word-select/bb_unitxt_ws.prs"));
|
||||
vector<string> pc_unitxt_collection = std::move(pc_unitxt_data.at(35));
|
||||
vector<string> bb_unitxt_collection = std::move(bb_unitxt_data.at(0));
|
||||
|
||||
config_log.info("(Word select) Loading DC_NTE data");
|
||||
WordSelectSet dc_nte_ws(load_file("system/word-select/dc_nte_ws_data.bin"), Version::DC_NTE, nullptr, true);
|
||||
config_log.info("(Word select) Loading DC_V1_11_2000_PROTOTYPE data");
|
||||
WordSelectSet dc_112000_ws(load_file("system/word-select/dc_112000_ws_data.bin"), Version::DC_V1_11_2000_PROTOTYPE, nullptr, false);
|
||||
config_log.info("(Word select) Loading DC_V1 data");
|
||||
WordSelectSet dc_v1_ws(load_file("system/word-select/dcv1_ws_data.bin"), Version::DC_V1, nullptr, false);
|
||||
config_log.info("(Word select) Loading DC_V2 data");
|
||||
WordSelectSet dc_v2_ws(load_file("system/word-select/dcv2_ws_data.bin"), Version::DC_V2, nullptr, false);
|
||||
config_log.info("(Word select) Loading PC_NTE data");
|
||||
WordSelectSet pc_nte_ws(load_file("system/word-select/pc_nte_ws_data.bin"), Version::PC_NTE, &pc_unitxt_collection, false);
|
||||
config_log.info("(Word select) Loading PC_V2 data");
|
||||
WordSelectSet pc_v2_ws(load_file("system/word-select/pc_ws_data.bin"), Version::PC_V2, &pc_unitxt_collection, false);
|
||||
config_log.info("(Word select) Loading GC_NTE data");
|
||||
WordSelectSet gc_nte_ws(load_file("system/word-select/gc_nte_ws_data.bin"), Version::GC_NTE, nullptr, false);
|
||||
config_log.info("(Word select) Loading GC_V3 data");
|
||||
WordSelectSet gc_v3_ws(load_file("system/word-select/gc_ws_data.bin"), Version::GC_V3, nullptr, false);
|
||||
config_log.info("(Word select) Loading GC_EP3_NTE data");
|
||||
WordSelectSet gc_ep3_nte_ws(load_file("system/word-select/gc_ep3_nte_ws_data.bin"), Version::GC_EP3_NTE, nullptr, false);
|
||||
config_log.info("(Word select) Loading GC_EP3 data");
|
||||
WordSelectSet gc_ep3_ws(load_file("system/word-select/gc_ep3_ws_data.bin"), Version::GC_EP3, nullptr, false);
|
||||
config_log.info("(Word select) Loading XB_V3 data");
|
||||
WordSelectSet xb_v3_ws(load_file("system/word-select/xb_ws_data.bin"), Version::XB_V3, nullptr, false);
|
||||
config_log.info("(Word select) Loading BB_V4 data");
|
||||
WordSelectSet bb_v4_ws(load_file("system/word-select/bb_ws_data.bin"), Version::BB_V4, &bb_unitxt_collection, false);
|
||||
|
||||
config_log.info("(Word select) Generating table");
|
||||
return make_shared<WordSelectTable>(
|
||||
dc_nte_ws, dc_112000_ws, dc_v1_ws, dc_v2_ws,
|
||||
pc_nte_ws, pc_v2_ws, gc_nte_ws, gc_v3_ws,
|
||||
gc_ep3_nte_ws, gc_ep3_ws, xb_v3_ws, bb_v4_ws,
|
||||
name_alias_lists);
|
||||
}
|
||||
|
||||
void ServerState::load_word_select_table() {
|
||||
config_log.info("Loading Word Select table");
|
||||
this->word_select_table = make_shared<WordSelectTable>(JSON::parse(load_file("system/word-select-table.json")));
|
||||
this->word_select_table = this->load_word_select_table_from_system();
|
||||
}
|
||||
|
||||
void ServerState::load_item_name_index() {
|
||||
|
||||
@@ -284,6 +284,7 @@ struct ServerState : public std::enable_shared_from_this<ServerState> {
|
||||
void load_level_table();
|
||||
void load_item_name_index();
|
||||
void load_item_tables();
|
||||
static std::shared_ptr<WordSelectTable> load_word_select_table_from_system();
|
||||
void load_word_select_table();
|
||||
void load_ep3_data();
|
||||
void resolve_ep3_card_names();
|
||||
|
||||
+262
-72
@@ -5,68 +5,276 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Compression.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
static void index_add(vector<size_t>& index, uint16_t position, size_t value) {
|
||||
if (position != 0xFFFF) {
|
||||
if (index.size() <= position) {
|
||||
index.resize(position + 1);
|
||||
template <typename RetT, typename ReadT>
|
||||
static vector<RetT> read_direct_table(const StringReader& base_r, size_t offset, size_t count) {
|
||||
vector<RetT> ret;
|
||||
auto entries_r = base_r.sub(offset, count * sizeof(ReadT));
|
||||
while (!entries_r.eof()) {
|
||||
ret.emplace_back(entries_r.get<ReadT>());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename RetT, typename ReadT, typename OffsetT>
|
||||
static vector<vector<RetT>> read_indirect_table(const StringReader& base_r, size_t offset, size_t count) {
|
||||
vector<vector<RetT>> ret;
|
||||
auto pointers_r = base_r.sub(offset, sizeof(OffsetT) * 2 * count);
|
||||
while (!pointers_r.eof()) {
|
||||
uint32_t sub_offset = pointers_r.get<OffsetT>();
|
||||
uint32_t sub_count = pointers_r.get<OffsetT>();
|
||||
ret.emplace_back(read_direct_table<RetT, ReadT>(base_r, sub_offset, sub_count));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <bool IsBigEndian>
|
||||
struct NonWindowsRoot {
|
||||
using U32T = typename std::conditional<IsBigEndian, be_uint32_t, le_uint32_t>::type;
|
||||
U32T strings_table;
|
||||
U32T table1;
|
||||
U32T table2;
|
||||
U32T token_id_to_string_id_table;
|
||||
U32T table4;
|
||||
U32T article_types_table;
|
||||
U32T table6;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct PCV2Root {
|
||||
le_uint32_t unknown_a1;
|
||||
le_uint32_t unknown_a2;
|
||||
le_uint32_t table1;
|
||||
le_uint32_t table2;
|
||||
le_uint32_t token_id_to_string_id_table;
|
||||
le_uint32_t table4;
|
||||
le_uint32_t article_types_table;
|
||||
le_uint32_t table6;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct BBRoot {
|
||||
le_uint32_t table1;
|
||||
le_uint32_t table2;
|
||||
le_uint32_t token_id_to_string_id_table;
|
||||
le_uint32_t table4;
|
||||
le_uint32_t article_types_table;
|
||||
le_uint32_t table6;
|
||||
} __attribute__((packed));
|
||||
|
||||
template <bool IsBigEndian, size_t StringTableCount, size_t TokenCount>
|
||||
void WordSelectSet::parse_non_windows_t(const std::string& data, bool use_sjis) {
|
||||
using U32T = typename std::conditional<IsBigEndian, be_uint32_t, le_uint32_t>::type;
|
||||
using U16T = typename std::conditional<IsBigEndian, be_uint16_t, le_uint16_t>::type;
|
||||
|
||||
StringReader r(data);
|
||||
uint32_t root_offset = r.pget<U32T>(r.size() - 0x10);
|
||||
const auto& root = r.pget<NonWindowsRoot<IsBigEndian>>(root_offset);
|
||||
|
||||
{
|
||||
auto string_offset_r = r.sub(root.strings_table, sizeof(U32T) * StringTableCount);
|
||||
while (!string_offset_r.eof()) {
|
||||
string raw_s = r.pget_cstr(string_offset_r.template get<U32T>());
|
||||
this->strings.emplace_back(use_sjis ? tt_sjis_to_utf8(raw_s) : tt_8859_to_utf8(raw_s));
|
||||
}
|
||||
index[position] = value;
|
||||
}
|
||||
|
||||
// this->table1 = read_indirect_table<uint16_t, U16T, U32T>(r, root.table1, Table1Count);
|
||||
// this->table2 = read_indirect_table<uint16_t, U16T, U32T>(r, root.table2, Table2Count);
|
||||
this->token_id_to_string_id = read_direct_table<size_t, U16T>(r, root.token_id_to_string_id_table, TokenCount);
|
||||
// this->table4 = read_indirect_table<uint16_t, U16T, U32T>(r, root.table4, Table4Count);
|
||||
// this->article_types = read_direct_table<uint8_t, uint8_t>(r, root.article_types_table, ArticleTypesCount);
|
||||
// this->table6 = read_indirect_table<uint16_t, U16T, U32T>(r, root.table6, Table6Count);
|
||||
}
|
||||
|
||||
WordSelectTable::WordSelectTable(const JSON& json) {
|
||||
this->tokens.reserve(json.size());
|
||||
for (const auto& item : json.as_list()) {
|
||||
JSON dc_value_json = item->at(0);
|
||||
JSON pc_value_json = item->at(1);
|
||||
JSON gc_value_json = item->at(2);
|
||||
JSON ep3_value_json = item->at(3);
|
||||
JSON bb_value_json = item->at(4);
|
||||
uint16_t dc_value = dc_value_json.is_null() ? 0xFFFF : dc_value_json.as_int();
|
||||
uint16_t pc_value = pc_value_json.is_null() ? 0xFFFF : pc_value_json.as_int();
|
||||
uint16_t gc_value = gc_value_json.is_null() ? 0xFFFF : gc_value_json.as_int();
|
||||
uint16_t ep3_value = ep3_value_json.is_null() ? 0xFFFF : ep3_value_json.as_int();
|
||||
uint16_t bb_value = bb_value_json.is_null() ? 0xFFFF : bb_value_json.as_int();
|
||||
this->tokens.emplace_back(Token{
|
||||
.dc_value = dc_value,
|
||||
.pc_value = pc_value,
|
||||
.gc_value = gc_value,
|
||||
.ep3_value = ep3_value,
|
||||
.bb_value = bb_value,
|
||||
});
|
||||
index_add(this->dc_index, dc_value, this->tokens.size() - 1);
|
||||
index_add(this->pc_index, pc_value, this->tokens.size() - 1);
|
||||
index_add(this->gc_index, gc_value, this->tokens.size() - 1);
|
||||
index_add(this->ep3_index, ep3_value, this->tokens.size() - 1);
|
||||
index_add(this->bb_index, bb_value, this->tokens.size() - 1);
|
||||
template <typename RootT, size_t TokenCount>
|
||||
void WordSelectSet::parse_windows_t(const std::string& data, const std::vector<std::string>* unitxt_collection) {
|
||||
if (!unitxt_collection) {
|
||||
throw runtime_error("a unitxt collection is required");
|
||||
}
|
||||
|
||||
StringReader r(data);
|
||||
uint32_t root_offset = r.pget<le_uint32_t>(r.size() - 0x10);
|
||||
const auto& root = r.pget<RootT>(root_offset);
|
||||
this->strings = *unitxt_collection;
|
||||
// this->table1 = read_indirect_table<uint16_t, le_uint16_t, le_uint32_t>(r, root.table1, Table1Count);
|
||||
// this->table2 = read_indirect_table<uint16_t, le_uint16_t, le_uint32_t>(r, root.table2, Table2Count);
|
||||
this->token_id_to_string_id = read_direct_table<size_t, le_uint16_t>(r, root.token_id_to_string_id_table, TokenCount);
|
||||
// this->table4 = read_indirect_table<uint16_t, le_uint16_t, le_uint32_t>(r, root.table4, Table4Count);
|
||||
// this->article_types = read_direct_table<uint8_t, uint8_t>(r, root.article_types_table, ArticleTypesCount);
|
||||
// this->table6 = read_indirect_table<uint16_t, le_uint16_t, le_uint32_t>(r, root.table6, Table6Count);
|
||||
}
|
||||
|
||||
uint16_t WordSelectTable::Token::value_for_version(Version version) const {
|
||||
WordSelectSet::WordSelectSet(const string& data, Version version, const vector<string>* unitxt_collection, bool use_sjis) {
|
||||
switch (version) {
|
||||
case Version::DC_NTE:
|
||||
case Version::DC_NTE: {
|
||||
if (data.size() < 4) {
|
||||
throw runtime_error("data is too small");
|
||||
}
|
||||
string decrypted = data.substr(0, data.size() - 4);
|
||||
uint32_t seed = *reinterpret_cast<const le_uint32_t*>(data.data() + data.size() - 4);
|
||||
PSOV2Encryption crypt(seed);
|
||||
crypt.decrypt(decrypted);
|
||||
this->parse_non_windows_t<false, 0x469, 0x466>(decrypted, use_sjis);
|
||||
break;
|
||||
}
|
||||
case Version::DC_V1_11_2000_PROTOTYPE:
|
||||
this->parse_non_windows_t<false, 0x45E, 0x44B>(decrypt_and_decompress_pr2_data<false>(data), use_sjis);
|
||||
break;
|
||||
case Version::DC_V1:
|
||||
case Version::DC_V2:
|
||||
return this->dc_value;
|
||||
this->parse_non_windows_t<false, 0x467, 0x457>(decrypt_and_decompress_pr2_data<false>(data), use_sjis);
|
||||
break;
|
||||
case Version::PC_NTE:
|
||||
case Version::PC_V2:
|
||||
return this->pc_value;
|
||||
this->parse_windows_t<PCV2Root, 0x645>(decrypt_and_decompress_pr2_data<false>(data), unitxt_collection);
|
||||
break;
|
||||
case Version::GC_NTE:
|
||||
case Version::GC_V3:
|
||||
case Version::XB_V3:
|
||||
// TODO: Which index does GC_NTE use? Here we presume it's the same as GC,
|
||||
// but this may not be true
|
||||
return this->gc_value;
|
||||
this->parse_non_windows_t<true, 0x63F, 0x693>(decrypt_and_decompress_pr2_data<true>(data), use_sjis);
|
||||
break;
|
||||
case Version::GC_EP3_NTE:
|
||||
this->parse_non_windows_t<true, 0x67C, 0x68C>(decrypt_and_decompress_pr2_data<true>(data), use_sjis);
|
||||
break;
|
||||
case Version::GC_V3:
|
||||
case Version::GC_EP3:
|
||||
return this->ep3_value;
|
||||
this->parse_non_windows_t<true, 0x804, 0x68C>(decrypt_and_decompress_pr2_data<true>(data), use_sjis);
|
||||
break;
|
||||
case Version::XB_V3:
|
||||
this->parse_non_windows_t<false, 0x67B, 0x68C>(decrypt_and_decompress_pr2_data<false>(data), use_sjis);
|
||||
break;
|
||||
case Version::BB_V4:
|
||||
return this->bb_value;
|
||||
this->parse_windows_t<BBRoot, 0x68C>(decrypt_and_decompress_pr2_data<false>(data), unitxt_collection);
|
||||
break;
|
||||
default:
|
||||
throw logic_error("invalid word select version");
|
||||
throw runtime_error("unsupported word select data version");
|
||||
}
|
||||
}
|
||||
|
||||
const string& WordSelectSet::string_for_token(uint16_t token_id) const {
|
||||
return this->strings.at(this->token_id_to_string_id.at(token_id));
|
||||
}
|
||||
|
||||
void WordSelectSet::print(FILE* stream) const {
|
||||
fprintf(stream, "strings:\n");
|
||||
for (size_t z = 0; z < this->strings.size(); z++) {
|
||||
fprintf(stream, " [%04zX] \"%s\"\n", z, this->strings[z].c_str());
|
||||
}
|
||||
fprintf(stream, "token_id_to_string_id:\n");
|
||||
for (size_t z = 0; z < this->token_id_to_string_id.size(); z++) {
|
||||
fprintf(stream, " [%04zX] %04zX \"%s\"\n", z, this->token_id_to_string_id[z], this->string_for_token(z).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
WordSelectTable::WordSelectTable(
|
||||
const WordSelectSet& dc_nte_ws,
|
||||
const WordSelectSet& dc_112000_ws,
|
||||
const WordSelectSet& dc_v1_ws,
|
||||
const WordSelectSet& dc_v2_ws,
|
||||
const WordSelectSet& pc_nte_ws,
|
||||
const WordSelectSet& pc_v2_ws,
|
||||
const WordSelectSet& gc_nte_ws,
|
||||
const WordSelectSet& gc_v3_ws,
|
||||
const WordSelectSet& gc_ep3_nte_ws,
|
||||
const WordSelectSet& gc_ep3_ws,
|
||||
const WordSelectSet& xb_v3_ws,
|
||||
const WordSelectSet& bb_v4_ws,
|
||||
const vector<vector<string>>& name_alias_lists) {
|
||||
|
||||
unordered_map<string, string> name_to_canonical_name;
|
||||
for (const auto& alias_list : name_alias_lists) {
|
||||
if (alias_list.size() < 2) {
|
||||
continue;
|
||||
}
|
||||
auto it = alias_list.begin();
|
||||
auto canonical_name = *it;
|
||||
for (it++; it != alias_list.end(); it++) {
|
||||
name_to_canonical_name.emplace(*it, canonical_name);
|
||||
}
|
||||
}
|
||||
|
||||
vector<shared_ptr<Token>> dynamic_tokens;
|
||||
{
|
||||
for (size_t z = 0; z < 12; z++) {
|
||||
auto& token = dynamic_tokens.emplace_back(make_shared<Token>());
|
||||
token->canonical_name = string_printf("__PLAYER_%zu_NAME__", z);
|
||||
this->name_to_token.emplace(token->canonical_name, token);
|
||||
}
|
||||
auto& token = dynamic_tokens.emplace_back(make_shared<Token>());
|
||||
token->canonical_name = "__BLANK__";
|
||||
this->name_to_token.emplace(token->canonical_name, token);
|
||||
}
|
||||
|
||||
array<const WordSelectSet*, 12> ws_sets = {
|
||||
&dc_nte_ws, &dc_112000_ws, &dc_v1_ws, &dc_v2_ws,
|
||||
&pc_nte_ws, &pc_v2_ws, &gc_nte_ws, &gc_v3_ws,
|
||||
&gc_ep3_nte_ws, &gc_ep3_ws, &xb_v3_ws, &bb_v4_ws};
|
||||
|
||||
for (size_t s_version = 0; s_version < ws_sets.size(); s_version++) {
|
||||
Version version = static_cast<Version>(static_cast<size_t>(Version::DC_NTE) + s_version);
|
||||
const auto& ws_set = *ws_sets[s_version];
|
||||
auto& index = this->tokens_by_version.at(s_version);
|
||||
|
||||
index.reserve(ws_set.num_tokens());
|
||||
for (size_t token_id = 0; token_id < ws_set.num_tokens(); token_id++) {
|
||||
const string& str = ws_set.string_for_token(token_id);
|
||||
|
||||
string canonical_name;
|
||||
try {
|
||||
canonical_name = name_to_canonical_name.at(str);
|
||||
} catch (const out_of_range&) {
|
||||
canonical_name = str;
|
||||
}
|
||||
|
||||
auto token_it = this->name_to_token.find(canonical_name);
|
||||
if (token_it == this->name_to_token.end()) {
|
||||
token_it = this->name_to_token.emplace(canonical_name, make_shared<Token>()).first;
|
||||
token_it->second->canonical_name = std::move(canonical_name);
|
||||
}
|
||||
token_it->second->slot_for_version(version) = token_id;
|
||||
index.emplace_back(token_it->second);
|
||||
}
|
||||
|
||||
size_t dynamic_token_base_id = ws_set.num_tokens();
|
||||
for (size_t z = 0; z < dynamic_tokens.size(); z++) {
|
||||
auto& token = dynamic_tokens[z];
|
||||
token->slot_for_version(version) = dynamic_token_base_id + z;
|
||||
index.emplace_back(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WordSelectTable::print(FILE* stream) const {
|
||||
fprintf(stream, "DCN DC11 DCv1 DCv2 PCN PCv2 GCN GCv3 Ep3N Ep3 XBv3 BBv4 CANONICAL-NAME\n");
|
||||
for (const auto& it : this->name_to_token) {
|
||||
const auto& token = it.second;
|
||||
for (size_t z = 0; z < 12; z++) {
|
||||
if (token->values_by_version[z] == 0xFFFF) {
|
||||
fprintf(stream, " ");
|
||||
} else {
|
||||
fprintf(stream, "%04hX ", token->values_by_version[z]);
|
||||
}
|
||||
}
|
||||
string serialized = JSON(token->canonical_name).serialize();
|
||||
fprintf(stream, "%s\n", serialized.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void WordSelectTable::print_index(FILE* stream, Version v) const {
|
||||
fprintf(stream, " DCN DC11 DCv1 DCv2 PCN PCv2 GCN GCv3 Ep3N Ep3 XBv3 BBv4 CANONICAL-NAME\n");
|
||||
const auto& index = this->tokens_for_version(v);
|
||||
for (size_t token_id = 0; token_id < index.size(); token_id++) {
|
||||
const auto& token = index[token_id];
|
||||
fprintf(stream, "%04zX => ", token_id);
|
||||
for (size_t z = 0; z < 12; z++) {
|
||||
if (token->values_by_version[z] == 0xFFFF) {
|
||||
fprintf(stream, " ");
|
||||
} else {
|
||||
fprintf(stream, "%04hX ", token->values_by_version[z]);
|
||||
}
|
||||
}
|
||||
string serialized = JSON(token->canonical_name).serialize();
|
||||
fprintf(stream, "%s\n", serialized.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,42 +282,18 @@ WordSelectMessage WordSelectTable::translate(
|
||||
const WordSelectMessage& msg,
|
||||
Version from_version,
|
||||
Version to_version) const {
|
||||
const std::vector<size_t>* index;
|
||||
switch (from_version) {
|
||||
case Version::DC_NTE:
|
||||
case Version::DC_V1_11_2000_PROTOTYPE:
|
||||
case Version::DC_V1:
|
||||
case Version::DC_V2:
|
||||
index = &this->dc_index;
|
||||
break;
|
||||
case Version::PC_NTE:
|
||||
case Version::PC_V2:
|
||||
index = &this->pc_index;
|
||||
break;
|
||||
case Version::GC_NTE:
|
||||
case Version::GC_V3:
|
||||
case Version::XB_V3:
|
||||
// TODO: Which index does GC_NTE use? Here we presume it's the same as GC,
|
||||
// but this may not be true
|
||||
index = &this->gc_index;
|
||||
break;
|
||||
case Version::GC_EP3_NTE:
|
||||
case Version::GC_EP3:
|
||||
index = &this->ep3_index;
|
||||
break;
|
||||
case Version::BB_V4:
|
||||
index = &this->bb_index;
|
||||
break;
|
||||
default:
|
||||
throw logic_error("invalid word select version");
|
||||
}
|
||||
const auto& index = this->tokens_for_version(from_version);
|
||||
|
||||
WordSelectMessage ret;
|
||||
for (size_t z = 0; z < ret.tokens.size(); z++) {
|
||||
if (msg.tokens[z] == 0xFFFF) {
|
||||
ret.tokens[z] = 0xFFFF;
|
||||
} else {
|
||||
ret.tokens[z] = this->tokens.at(index->at(msg.tokens[z])).value_for_version(to_version);
|
||||
const auto& token = index.at(msg.tokens[z]);
|
||||
if (!token) {
|
||||
throw runtime_error(string_printf("token %04hX does not exist in the index", msg.tokens[z].load()));
|
||||
}
|
||||
ret.tokens[z] = token->slot_for_version(to_version);
|
||||
if (ret.tokens[z] == 0xFFFF) {
|
||||
throw runtime_error(string_printf("token %04hX has no translation", msg.tokens[z].load()));
|
||||
}
|
||||
@@ -121,3 +305,9 @@ WordSelectMessage WordSelectTable::translate(
|
||||
ret.unknown_a4 = msg.unknown_a4;
|
||||
return ret;
|
||||
}
|
||||
|
||||
WordSelectTable::Token::Token() {
|
||||
for (size_t z = 0; z < this->values_by_version.size(); z++) {
|
||||
this->values_by_version[z] = 0xFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
+87
-14
@@ -2,16 +2,84 @@
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <phosg/JSON.hh>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "CommandFormats.hh"
|
||||
#include "QuestScript.hh"
|
||||
|
||||
class WordSelectSet {
|
||||
public:
|
||||
WordSelectSet(const std::string& data, Version version, const std::vector<std::string>* unitxt_collection, bool use_sjis);
|
||||
~WordSelectSet() = default;
|
||||
|
||||
inline size_t num_strings() const {
|
||||
return this->strings.size();
|
||||
}
|
||||
inline size_t num_tokens() const {
|
||||
return this->token_id_to_string_id.size();
|
||||
}
|
||||
|
||||
const std::string& string_for_token(uint16_t token_id) const;
|
||||
|
||||
void print(FILE* stream) const;
|
||||
|
||||
protected:
|
||||
template <bool IsBigEndian, size_t StringTableCount, size_t TokenCount>
|
||||
void parse_non_windows_t(const std::string& data, bool use_sjis);
|
||||
template <typename RootT, size_t TokenCount>
|
||||
void parse_windows_t(const std::string& data, const std::vector<std::string>* unitxt_collection);
|
||||
|
||||
std::vector<std::string> strings;
|
||||
std::vector<size_t> token_id_to_string_id;
|
||||
// Note: PC NTE and PC have exactly the same parameters
|
||||
// => DC NTE DC112000 DCv1 DCv2 PCNTE/PC GC NTE GC XB Ep3 NTE Ep3 USA BB
|
||||
// root: => 000074DC 000072A4 0000755C 0000755C 00002B50 0000AB04 0000BCAC 0000B620 0000B648 0000B914 0000B5FC
|
||||
// u32 ???: => 00002A9C
|
||||
// TODO
|
||||
// u32 ???: => 00002B14
|
||||
// TODO
|
||||
// u32 strings_table: => 00006338 0000612C 000063C0 000063C0 (unitxt) 00009208 00009C9C 00009C34 00009C5C 00009904 (unitxt)
|
||||
// u32 string_offset[COUNT]: => 469 45E 467 467 (unitxt) 63F 804 67B 67C 804 (unitxt)
|
||||
// char string[...\0]
|
||||
// u32 table1: => 00000B90 00000B54 00000D3C 00000D3C 00001018 0000100C 000012F0 000012F0 000012F0 000011D0 000012F0
|
||||
// {u32 offset, u32 count}[COUNT]: => 94 122 93 93 F9 F9 126 126 126 17F 126
|
||||
// u16[count]
|
||||
// u32 table2: => 00001178 00001108 00001300 00001300 000019D8 000019CC 00001EE8 00001EE8 00001EE8 00001DC8 00001EE8
|
||||
// {u32 offset, u32 count}[COUNT]: => 7 7 7 7 7 7 13 13 13 13 13
|
||||
// u16[count]
|
||||
// u32 token_id_to_string_id_table => 000011B0 00001140 00001338 00001338 00001A10 00001A04 00001F80 00001F80 00001F80 00001E60 00001F80
|
||||
// u16[COUNT] string_id_for_token_id => 466 44B 457 457 645 693 68C 68C 68C 68C 68C
|
||||
// u32 table4: => 00001A5C 00001B08 00001D1C 00001D1C 000027D0 000027C4 00002DCC 00002DCC 00002DCC 00002CAC 00002DCC
|
||||
// (non-NTE) {u32 offset, u32 count}[COUNT]: => 2 2 2 2 2 2 2 2 2 2
|
||||
// u16[count]
|
||||
// (NTE) u16[COUNT] => E1
|
||||
// u32 article_types_table: => 00001C1E 00001B18 00001D2C 00001D2C 000027E0 000027D4 00002DDC 00002DDC 00002DDC 00002CBC 00002DDC
|
||||
// u8[COUNT] article_types => 1C8 166 166 166 266 266 28A 28A 28A 28A 266
|
||||
// u32 table6: => 00001E28 00001CBC 00001ED0 00001ED0 00002A84 00002A78 000030A4 000030A4 000030A4 00002F84 00003080
|
||||
// {u32 offset, u32 count}[3]:
|
||||
// u16[count]
|
||||
};
|
||||
|
||||
class WordSelectTable {
|
||||
public:
|
||||
explicit WordSelectTable(const JSON& json);
|
||||
WordSelectTable(
|
||||
const WordSelectSet& dc_nte_ws,
|
||||
const WordSelectSet& dc_112000_ws,
|
||||
const WordSelectSet& dc_v1_ws,
|
||||
const WordSelectSet& dc_v2_ws,
|
||||
const WordSelectSet& pc_nte_ws,
|
||||
const WordSelectSet& pc_v2_ws,
|
||||
const WordSelectSet& gc_nte_ws,
|
||||
const WordSelectSet& gc_v3_ws,
|
||||
const WordSelectSet& gc_ep3_nte_ws,
|
||||
const WordSelectSet& gc_ep3_ws,
|
||||
const WordSelectSet& xb_v3_ws,
|
||||
const WordSelectSet& bb_v4_ws,
|
||||
const std::vector<std::vector<std::string>>& name_alias_lists);
|
||||
|
||||
void print(FILE* stream) const;
|
||||
void print_index(FILE* stream, Version v) const;
|
||||
|
||||
WordSelectMessage translate(
|
||||
const WordSelectMessage& msg,
|
||||
@@ -20,18 +88,23 @@ public:
|
||||
|
||||
private:
|
||||
struct Token {
|
||||
uint16_t dc_value;
|
||||
uint16_t pc_value;
|
||||
uint16_t gc_value;
|
||||
uint16_t ep3_value;
|
||||
uint16_t bb_value;
|
||||
std::array<uint16_t, 12> values_by_version;
|
||||
std::string canonical_name;
|
||||
|
||||
uint16_t value_for_version(Version version) const;
|
||||
Token();
|
||||
|
||||
inline uint16_t& slot_for_version(Version version) {
|
||||
return this->values_by_version.at(static_cast<size_t>(version) - static_cast<size_t>(Version::DC_NTE));
|
||||
}
|
||||
inline uint16_t slot_for_version(Version version) const {
|
||||
return this->values_by_version.at(static_cast<size_t>(version) - static_cast<size_t>(Version::DC_NTE));
|
||||
}
|
||||
};
|
||||
std::vector<size_t> dc_index;
|
||||
std::vector<size_t> pc_index;
|
||||
std::vector<size_t> gc_index;
|
||||
std::vector<size_t> ep3_index;
|
||||
std::vector<size_t> bb_index;
|
||||
std::vector<Token> tokens;
|
||||
|
||||
std::map<std::string, std::shared_ptr<Token>> name_to_token;
|
||||
std::array<std::vector<std::shared_ptr<Token>>, 12> tokens_by_version;
|
||||
|
||||
inline const std::vector<std::shared_ptr<Token>>& tokens_for_version(Version version) const {
|
||||
return this->tokens_by_version.at(static_cast<size_t>(version) - static_cast<size_t>(Version::DC_NTE));
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user