add text archive encoder/decoder

This commit is contained in:
Martin Michelsen
2023-10-08 23:00:18 -07:00
parent aa76631073
commit a674721727
6 changed files with 457 additions and 33 deletions
+1
View File
@@ -103,6 +103,7 @@ add_executable(newserv
src/Shell.cc
src/StaticGameData.cc
src/Text.cc
src/TextArchive.cc
src/Version.cc
)
target_include_directories(newserv PUBLIC ${LIBEVENT_INCLUDE_DIR})
+1
View File
@@ -397,6 +397,7 @@ newserv has many CLI options, which can be used to access functionality other th
* Decode Shift-JIS text to UTF-16 (`decode-sjis`)
* Convert quests in .gci, .vms, .dlq, or .qst format to .bin/.dat format (`decode-gci`, `decode-vms`, `decode-dlq`, `decode-qst`)
* Convert quests in .bin/.dat to .qst format (`encode-qst`)
* Convert text archives (e.g. TextEnglish.pr2) to JSON and vice versa (`decode-text-archive`, `encode-text-archive`)
* Disassemble quest scripts (`disassemble-quest-script`)
* Format Episode 3 game data in a human-readable manner (`show-ep3-maps`, `show-ep3-cards`)
* Convert item data to a human-readable description, or vice versa (`describe-item`, `encode-item`)
+58 -33
View File
@@ -36,6 +36,7 @@
#include "ServerState.hh"
#include "StaticGameData.hh"
#include "Text.hh"
#include "TextArchive.hh"
using namespace std;
@@ -241,6 +242,10 @@ The actions are:\n\
is treated as a prefix which is prepended to the filename of each file\n\
contained in the archive. If --big-endian is given, the archive header is\n\
read in GameCube format; otherwise it is read in PC/BB format.\n\
decode-text-archive [INPUT-FILENAME [OUTPUT-FILENAME]]\n\
encode-text-archive [INPUT-FILENAME [OUTPUT-FILENAME]]\n\
Decode a text archive (e.g. TextEnglish.pr2) to JSON for easy editing, or\n\
encode a JSON file to a text archive.\n\
format-rare-item-set [--json] [INPUT-FILENAME]\n\
Print the contents of a rare item table in a human-readable format. If\n\
--json is given, the input is parsed as a JSON rare item set (see\n\
@@ -306,6 +311,8 @@ enum class Behavior {
DECODE_SJIS,
EXTRACT_GSL,
EXTRACT_BML,
DECODE_TEXT_ARCHIVE,
ENCODE_TEXT_ARCHIVE,
FORMAT_RARE_ITEM_SET,
CONVERT_ITEMRT_REL_TO_JSON,
SHOW_EP3_MAPS,
@@ -357,6 +364,8 @@ static bool behavior_takes_input_filename(Behavior b) {
(b == Behavior::CONVERT_ITEMRT_REL_TO_JSON) ||
(b == Behavior::EXTRACT_GSL) ||
(b == Behavior::EXTRACT_BML) ||
(b == Behavior::DECODE_TEXT_ARCHIVE) ||
(b == Behavior::ENCODE_TEXT_ARCHIVE) ||
(b == Behavior::DESCRIBE_ITEM) ||
(b == Behavior::ENCODE_ITEM) ||
(b == Behavior::PARSE_OBJECT_GRAPH) ||
@@ -392,7 +401,9 @@ static bool behavior_takes_output_filename(Behavior b) {
(b == Behavior::CONVERT_ITEMRT_REL_TO_JSON) ||
(b == Behavior::DECODE_SJIS) ||
(b == Behavior::EXTRACT_GSL) ||
(b == Behavior::EXTRACT_BML);
(b == Behavior::EXTRACT_BML) ||
(b == Behavior::DECODE_TEXT_ARCHIVE) ||
(b == Behavior::ENCODE_TEXT_ARCHIVE);
}
int main(int argc, char** argv) {
@@ -620,6 +631,10 @@ int main(int argc, char** argv) {
behavior = Behavior::EXTRACT_GSL;
} else if (!strcmp(argv[x], "extract-bml")) {
behavior = Behavior::EXTRACT_BML;
} else if (!strcmp(argv[x], "decode-text-archive")) {
behavior = Behavior::DECODE_TEXT_ARCHIVE;
} else if (!strcmp(argv[x], "encode-text-archive")) {
behavior = Behavior::ENCODE_TEXT_ARCHIVE;
} else if (!strcmp(argv[x], "generate-dc-serial-number")) {
behavior = Behavior::GENERATE_DC_SERIAL_NUMBER;
} else if (!strcmp(argv[x], "generate-all-dc-serial-numbers")) {
@@ -699,6 +714,8 @@ int main(int argc, char** argv) {
filename += ".bmp";
} else if (behavior == Behavior::ENCODE_GVM) {
filename += ".gvm";
} else if (behavior == Behavior::DECODE_TEXT_ARCHIVE) {
filename += ".json";
} else if (behavior == Behavior::DISASSEMBLE_QUEST_SCRIPT) {
filename += ".txt";
} else if (behavior == Behavior::CONVERT_ITEMRT_REL_TO_JSON) {
@@ -734,19 +751,9 @@ int main(int argc, char** argv) {
size_t pr2_expected_size = 0;
if (behavior == Behavior::DECOMPRESS_PR2) {
if (data.size() < 8) {
throw runtime_error("not enough data for PR2 header");
}
data.resize((data.size() + 3) & (~3));
StringReader r(data);
pr2_expected_size = big_endian ? r.get_u32b() : r.get_u32l();
PSOV2Encryption crypt(big_endian ? r.get_u32b() : r.get_u32l());
if (big_endian) {
crypt.encrypt_big_endian(data.data() + 8, data.size() - 8);
} else {
crypt.decrypt(data.data() + 8, data.size() - 8);
}
data = data.substr(8);
auto decrypted = big_endian ? decrypt_pr2_data<true>(data) : decrypt_pr2_data<false>(data);
pr2_expected_size = decrypted.decompressed_size;
data = std::move(decrypted.compressed_data);
}
size_t input_bytes = data.size();
@@ -799,25 +806,9 @@ int main(int argc, char** argv) {
log_warning("Result data size (%zu bytes) does not match expected size from PR2 header (%zu bytes)", data.size(), pr2_expected_size);
} else if (behavior == Behavior::COMPRESS_PR2) {
uint32_t pr2_seed = seed.empty() ? random_object<uint32_t>() : stoul(seed, nullptr, 16);
size_t orig_size = data.size();
data.resize((data.size() + 3) & (~3));
PSOV2Encryption crypt(pr2_seed);
if (big_endian) {
crypt.encrypt_big_endian(data.data(), data.size());
} else {
crypt.encrypt(data.data(), data.size());
}
data.resize(orig_size);
StringWriter w;
if (big_endian) {
w.put_u32b(input_bytes);
w.put_u32b(pr2_seed);
} else {
w.put_u32l(input_bytes);
w.put_u32l(pr2_seed);
}
w.write(data);
data = std::move(w.str());
data = big_endian
? encrypt_pr2_data<true>(data, input_bytes, pr2_seed)
: encrypt_pr2_data<false>(data, input_bytes, pr2_seed);
}
write_output_data(data.data(), data.size());
@@ -1438,6 +1429,40 @@ int main(int argc, char** argv) {
break;
}
case Behavior::DECODE_TEXT_ARCHIVE: {
string data = read_input_data();
TextArchive a(data, big_endian);
JSON j = a.json();
string out_data = j.serialize(JSON::SerializeOption::FORMAT);
write_output_data(out_data.data(), out_data.size());
break;
}
case Behavior::ENCODE_TEXT_ARCHIVE: {
auto json = JSON::parse(read_input_data());
TextArchive a(json);
auto result = a.serialize(big_endian);
if (!output_filename) {
if (!input_filename || !strcmp(input_filename, "-")) {
throw runtime_error("encoded text archive cannot be written to stdout");
}
save_file(string_printf("%s.pr2", input_filename), result.first);
save_file(string_printf("%s.pr3", input_filename), result.second);
} else if (!strcmp(input_filename, "-")) {
throw runtime_error("encoded text archive cannot be written to stdout");
} else {
string out_filename = output_filename;
if (ends_with(out_filename, ".pr2")) {
save_file(out_filename, result.first);
out_filename[out_filename.size() - 1] = '3';
save_file(out_filename, result.second);
} else {
save_file(out_filename + ".pr2", result.first);
save_file(out_filename + ".pr3", result.second);
}
}
break;
}
case Behavior::CAT_CLIENT: {
shared_ptr<PSOBBEncryption::KeyFile> key;
if (cli_version == GameVersion::BB) {
+44
View File
@@ -280,3 +280,47 @@ std::u16string encrypt_challenge_rank_text(const ptext<char16_t, Size>& data) {
}
std::string decrypt_v2_registry_value(const void* data, size_t size);
struct DecryptedPR2 {
std::string compressed_data;
size_t decompressed_size;
};
template <bool IsBigEndian>
DecryptedPR2 decrypt_pr2_data(const std::string& data) {
using U32T = std::conditional_t<IsBigEndian, be_uint32_t, le_uint32_t>;
if (data.size() < 8) {
throw std::runtime_error("not enough data for PR2 header");
}
StringReader r(data);
DecryptedPR2 ret = {
.compressed_data = data.substr(8),
.decompressed_size = r.get<U32T>()};
PSOV2Encryption crypt(r.get<U32T>());
if (IsBigEndian) {
crypt.encrypt_big_endian(ret.compressed_data.data(), ret.compressed_data.size());
} else {
crypt.decrypt(ret.compressed_data.data(), ret.compressed_data.size());
}
return ret;
}
template <bool IsBigEndian>
std::string encrypt_pr2_data(const std::string& data, size_t decompressed_size, uint32_t seed) {
using U32T = std::conditional_t<IsBigEndian, be_uint32_t, le_uint32_t>;
StringWriter w;
w.put<U32T>(decompressed_size);
w.put<U32T>(seed);
w.write(data);
std::string ret = std::move(w.str());
PSOV2Encryption crypt(seed);
if (IsBigEndian) {
crypt.encrypt_big_endian(ret.data() + 8, ret.size() - 8);
} else {
crypt.decrypt(ret.data() + 8, ret.size() - 8);
}
return ret;
}
+302
View File
@@ -0,0 +1,302 @@
#include "TextArchive.hh"
#include <phosg/Encoding.hh>
#include <phosg/Filesystem.hh>
#include <phosg/Random.hh>
#include <set>
#include <stdexcept>
#include "Compression.hh"
#include "PSOEncryption.hh"
#include "Text.hh"
using namespace std;
TextArchive::TextArchive(const string& pr2_data, bool big_endian) {
if (big_endian) {
this->load_t<true>(pr2_data);
} else {
this->load_t<false>(pr2_data);
}
}
TextArchive::TextArchive(const JSON& json) {
for (const auto& collection_json : json.at("collections").as_list()) {
auto& collection = this->collections.emplace_back();
for (const auto& string_json : collection_json->as_list()) {
collection.emplace_back(string_json->as_string());
}
}
for (const auto& keyboard_json : json.at("keyboards").as_list()) {
auto& keyboard = this->keyboards.emplace_back(new Keyboard());
for (size_t y = 0; y < keyboard->size(); y++) {
auto& row = keyboard->at(y);
const auto& row_json = keyboard_json->at(y);
for (size_t x = 0; x < row.size(); x++) {
row[x] = row_json.at(x).as_int();
}
}
}
this->keyboard_selector_width = json.at("keyboard_selector_width").as_int();
}
JSON TextArchive::json() const {
auto collections_json = JSON::list();
for (const auto& collection : this->collections) {
auto collection_json = JSON::list();
for (const auto& s : collection) {
collection_json.emplace_back(s);
}
collections_json.emplace_back(std::move(collection_json));
}
auto keyboards_json = JSON::list();
for (const auto& kb : this->keyboards) {
JSON keyboard_json = JSON::list();
for (size_t y = 0; y < kb->size(); y++) {
const auto& row = kb->at(y);
JSON row_json = JSON::list();
for (size_t x = 0; x < row.size(); x++) {
row_json.emplace_back(row[x]);
}
keyboard_json.emplace_back(std::move(row_json));
}
keyboards_json.emplace_back(std::move(keyboard_json));
}
return JSON::dict({
{"collections", std::move(collections_json)},
{"keyboards", std::move(keyboards_json)},
{"keyboard_selector_width", this->keyboard_selector_width},
});
}
const string& TextArchive::get_string(size_t collection_index, size_t index) const {
return this->collections.at(collection_index).at(index);
}
void TextArchive::set_string(size_t collection_index, size_t index, const string& data) {
if (collection_index >= this->collections.size()) {
this->collections.resize(collection_index + 1);
}
auto& coll = this->collections[collection_index];
if (index >= coll.size()) {
coll.resize(index + 1);
}
coll[index] = data;
}
void TextArchive::set_string(size_t collection_index, size_t index, string&& data) {
if (collection_index >= this->collections.size()) {
this->collections.resize(collection_index + 1);
}
auto& coll = this->collections[collection_index];
if (index >= coll.size()) {
coll.resize(index + 1);
}
coll[index] = std::move(data);
}
void TextArchive::resize_collection(size_t collection_index, size_t size) {
if (collection_index >= this->collections.size()) {
this->collections.resize(collection_index + 1);
}
this->collections[collection_index].resize(size);
}
void TextArchive::resize_collection(size_t num_collections) {
this->collections.resize(num_collections);
}
TextArchive::Keyboard TextArchive::get_keyboard(size_t kb_index) const {
return *this->keyboards.at(kb_index);
}
void TextArchive::set_keyboard(size_t kb_index, const Keyboard& kb) {
if (kb_index >= this->keyboards.size()) {
this->keyboards.resize(kb_index + 1);
}
this->keyboards[kb_index].reset(new Keyboard(kb));
}
void TextArchive::resize_keyboards(size_t num_keyboards) {
this->keyboards.resize(num_keyboards);
}
pair<string, string> TextArchive::serialize(bool big_endian) const {
if (big_endian) {
return this->serialize_t<true>();
} else {
return this->serialize_t<false>();
}
}
template <bool IsBigEndian>
void TextArchive::load_t(const string& pr2_data) {
using U32T = std::conditional_t<IsBigEndian, be_uint32_t, le_uint32_t>;
using U16T = std::conditional_t<IsBigEndian, be_uint16_t, le_uint16_t>;
// The structure is as follows:
// Footer:
// U32T keyboard_index_offset ->:
// U8 num_keyboards
// U8 keyboard_selector_width
// U8 unused[2]
// U32T keyboards_offset ->:
// U32T keyboard_offset[num_keyboards] ->:
// U16T key_defs[7][16]
// U32T collections_offset ->:
// U32T[...] strings_offset ->:
// U32T[...] string_offset ->:
// char string[...\0]
// <EOF>
auto pr2_decrypted = decrypt_pr2_data<IsBigEndian>(pr2_data);
auto decompressed = prs_decompress(pr2_decrypted.compressed_data);
StringReader r(decompressed);
// Annoyingly, there doesn't appear to be any bounds-checking on the language
// functions, so there are no counts of strings in each collection. We have to
// figure out where each collection ends by collecting all the relevant
// offsets in the file instead.
set<uint32_t> used_offsets;
used_offsets.emplace(r.size() - 8);
uint32_t keyboard_index_offset = r.pget<U32T>(r.size() - 8);
used_offsets.emplace(keyboard_index_offset);
size_t num_keyboards = r.pget_u8(keyboard_index_offset);
this->keyboard_selector_width = r.pget_u8(keyboard_index_offset + 1);
uint32_t keyboards_offset = r.pget<U32T>(keyboard_index_offset + 4);
used_offsets.emplace(keyboards_offset);
while (this->keyboards.size() < num_keyboards) {
uint32_t keyboard_offset = r.pget<U32T>(keyboards_offset + 4 * this->keyboards.size());
used_offsets.emplace(keyboard_offset);
auto& kb = this->keyboards.emplace_back(new Keyboard());
auto key_r = r.sub(keyboard_offset, sizeof(Keyboard));
for (size_t y = 0; y < kb->size(); y++) {
auto& row = kb->at(y);
for (size_t x = 0; x < row.size(); x++) {
row[x] = key_r.get<U16T>();
}
}
}
uint32_t collections_offset = r.pget<U32T>(r.size() - 4);
for (uint32_t offset = collections_offset; !used_offsets.count(offset); offset += 4) {
used_offsets.emplace(r.pget<U32T>(offset));
}
used_offsets.emplace(collections_offset);
for (uint32_t offset = collections_offset; (offset == collections_offset) || !used_offsets.count(offset); offset += 4) {
auto& collection = this->collections.emplace_back();
uint32_t first_string_offset_offset = r.pget<U32T>(offset);
for (uint32_t string_offset_offset = first_string_offset_offset;
(string_offset_offset == first_string_offset_offset) || !used_offsets.count(string_offset_offset);
string_offset_offset += 4) {
collection.emplace_back(r.pget_cstr(r.pget<U32T>(string_offset_offset)));
}
}
}
template <bool IsBigEndian>
pair<string, string> TextArchive::serialize_t() const {
using U32T = std::conditional_t<IsBigEndian, be_uint32_t, le_uint32_t>;
using U16T = std::conditional_t<IsBigEndian, be_uint16_t, le_uint16_t>;
StringWriter w;
set<size_t> relocation_offsets;
auto put_offset_u32 = [&](uint32_t v) {
relocation_offsets.emplace(w.size());
w.put<U32T>(v);
};
uint32_t collections_offset;
{
unordered_map<string, uint32_t> string_to_offset;
for (const auto& collection : this->collections) {
for (const auto& s : collection) {
if (string_to_offset.emplace(s, w.size()).second) {
w.write(s);
w.put_u8(0);
while (w.size() & 3) {
w.put_u8(0);
}
}
}
}
vector<uint32_t> collection_offsets;
for (const auto& collection : this->collections) {
collection_offsets.emplace_back(w.size());
for (const auto& s : collection) {
put_offset_u32(string_to_offset.at(s));
}
}
collections_offset = w.size();
for (uint32_t collection_offset : collection_offsets) {
put_offset_u32(collection_offset);
}
}
uint32_t keyboard_index_offset;
{
vector<uint32_t> keyboard_offsets;
for (const auto& keyboard : this->keyboards) {
keyboard_offsets.emplace_back(w.size());
for (size_t y = 0; y < keyboard->size(); y++) {
const auto& row = keyboard->at(y);
for (size_t x = 0; x < row.size(); x++) {
w.put<U16T>(row[x]);
}
}
}
uint32_t keyboards_offset = w.size();
for (uint32_t keyboard_offset : keyboard_offsets) {
put_offset_u32(keyboard_offset);
}
keyboard_index_offset = w.size();
w.put_u8(keyboard_offsets.size());
w.put_u8(this->keyboard_selector_width);
w.put_u16(0);
put_offset_u32(keyboards_offset);
}
put_offset_u32(keyboard_index_offset);
put_offset_u32(collections_offset);
StringWriter reloc_w;
reloc_w.put_u32(0);
reloc_w.put<U32T>(relocation_offsets.size());
reloc_w.put_u64(0);
reloc_w.put<U32T>(w.size() - 8);
reloc_w.put_u32(0);
reloc_w.put_u64(0);
{
size_t offset = 0;
for (size_t reloc_offset : relocation_offsets) {
if (reloc_offset & 3) {
throw logic_error("misaligned relocation");
}
size_t num_words = (reloc_offset - offset) >> 2;
if (num_words > 0xFFFF) {
throw runtime_error("relocation offset too far away");
}
reloc_w.put<U16T>(num_words);
offset = reloc_offset;
}
}
const string& pr2_data = w.str();
const string& pr3_data = reloc_w.str();
print_data(stderr, pr2_data);
string pr2_compressed = prs_compress_optimal(pr2_data.data(), pr2_data.size());
string pr3_compressed = prs_compress_optimal(pr3_data.data(), pr3_data.size());
print_data(stderr, pr2_compressed);
string pr2_ret = encrypt_pr2_data<IsBigEndian>(pr2_compressed, pr2_data.size(), random_object<uint32_t>());
string pr3_ret = encrypt_pr2_data<IsBigEndian>(pr3_compressed, pr3_data.size(), random_object<uint32_t>());
print_data(stderr, pr2_ret);
return make_pair(std::move(pr2_ret), std::move(pr3_ret));
}
+51
View File
@@ -0,0 +1,51 @@
#pragma once
#include <stdint.h>
#include <phosg/JSON.hh>
#include <string>
#include <utility>
#include <vector>
#include "Text.hh"
// This class implements loading and saving of text archives, commonly found in
// PSO games with filenames like TextEnglish.pr2 and TextEnglish.pr3. The game
// requires both files, but newserv needs only the pr2 file to load a text
// archive. When saving (serializing), both pr2 and pr3 files are generated.
class TextArchive {
public:
using Keyboard = parray<parray<uint16_t, 0x10>, 7>;
explicit TextArchive(const JSON& json);
TextArchive(const std::string& pr2_data, bool big_endian);
~TextArchive() = default;
JSON json() const;
const std::string& get_string(size_t collection_index, size_t index) const;
void set_string(size_t collection_index, size_t index, const std::string& data);
void set_string(size_t collection_index, size_t index, std::string&& data);
void resize_collection(size_t collection_index, size_t size);
void resize_collection(size_t num_collections);
Keyboard get_keyboard(size_t kb_index) const;
void set_keyboard(size_t kb_index, const Keyboard& kb);
void resize_keyboards(size_t num_keyboards);
uint8_t get_keyboard_selector_width() const;
void set_keyboard_selector_width(uint8_t width);
// Returns (pr2_data, pr3_data)
std::pair<std::string, std::string> serialize(bool big_endian) const;
private:
template <bool IsBigEndian>
void load_t(const std::string& pr2_data);
template <bool IsBigEndian>
std::pair<std::string, std::string> serialize_t() const;
std::vector<std::vector<std::string>> collections;
std::vector<std::unique_ptr<Keyboard>> keyboards;
uint8_t keyboard_selector_width;
};