219 lines
5.3 KiB
C++
219 lines
5.3 KiB
C++
#include "Text.hh"
|
|
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <vector>
|
|
#include <phosg/Filesystem.hh>
|
|
#include <phosg/Strings.hh>
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
int char16cmp(const char16_t* s1, const char16_t* s2, size_t count) {
|
|
return char_traits<char16_t>::compare(s1, s2, count);
|
|
}
|
|
|
|
char16_t* char16cpy(char16_t* dest, const char16_t* src, size_t count) {
|
|
return char_traits<char16_t>::copy(dest, src, count);
|
|
}
|
|
|
|
size_t char16len(const char16_t* s) {
|
|
return char_traits<char16_t>::length(s);
|
|
}
|
|
|
|
|
|
|
|
static vector<char16_t> unicode_to_sjis_table_data;
|
|
static vector<char16_t> sjis_to_unicode_table_data;
|
|
|
|
static void load_sjis_tables() {
|
|
unicode_to_sjis_table_data.resize(0x10000);
|
|
sjis_to_unicode_table_data.resize(0x10000);
|
|
|
|
// TODO: this is inefficient; it makes multiple copies of the string
|
|
auto file_contents = load_file("system/sjis-table.ini");
|
|
auto lines = split(file_contents, '\n');
|
|
for (auto line : lines) {
|
|
auto tokens = split(line, '\t');
|
|
if (tokens.size() < 2) {
|
|
continue;
|
|
}
|
|
char16_t sjis_char = stoul(tokens[0], NULL, 0);
|
|
char16_t unicode_char = stoul(tokens[1], NULL, 0);
|
|
|
|
unicode_to_sjis_table_data[unicode_char] = sjis_char;
|
|
sjis_to_unicode_table_data[sjis_char] = unicode_char;
|
|
}
|
|
}
|
|
|
|
static const vector<char16_t>& sjis_to_unicode_table() {
|
|
if (sjis_to_unicode_table_data.empty()) {
|
|
load_sjis_tables();
|
|
}
|
|
return sjis_to_unicode_table_data;
|
|
}
|
|
|
|
static const vector<char16_t>& unicode_to_sjis_table() {
|
|
if (unicode_to_sjis_table_data.empty()) {
|
|
load_sjis_tables();
|
|
}
|
|
return unicode_to_sjis_table_data;
|
|
}
|
|
|
|
void encode_sjis(char* dest, const char16_t* source, size_t max) {
|
|
const auto& table = unicode_to_sjis_table();
|
|
while (*source && (--max)) {
|
|
*(dest++) = table[*(source++)];
|
|
};
|
|
*dest = 0;
|
|
}
|
|
|
|
void decode_sjis(char16_t* dest, const char* source, size_t max) {
|
|
const auto& table = sjis_to_unicode_table();
|
|
while (*source && (--max)) {
|
|
char16_t src_char = *(source++);
|
|
if (src_char & 0x80) {
|
|
src_char = (src_char << 8) | *(source++);
|
|
if ((src_char & 0xFF) == 0) {
|
|
return;
|
|
}
|
|
}
|
|
*(dest++) = table[src_char];
|
|
};
|
|
*dest = 0;
|
|
}
|
|
|
|
std::string encode_sjis(const char16_t* source) {
|
|
const auto& table = unicode_to_sjis_table();
|
|
string ret;
|
|
while (*source) {
|
|
ret.push_back(table[*(source++)]);
|
|
};
|
|
return ret;
|
|
}
|
|
|
|
std::u16string decode_sjis(const char* source) {
|
|
const auto& table = sjis_to_unicode_table();
|
|
u16string ret;
|
|
while (*source) {
|
|
char16_t src_char = *(source++);
|
|
if (src_char & 0x80) {
|
|
src_char = (src_char << 8) | *(source++);
|
|
if ((src_char & 0xFF) == 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
ret.push_back(table[src_char]);
|
|
};
|
|
return ret;
|
|
}
|
|
|
|
std::string encode_sjis(const std::u16string& source) {
|
|
const auto& table = unicode_to_sjis_table();
|
|
string ret;
|
|
for (char16_t ch : source) {
|
|
ret.push_back(table[ch]);
|
|
};
|
|
return ret;
|
|
}
|
|
|
|
std::u16string decode_sjis(const std::string& source) {
|
|
const auto& table = sjis_to_unicode_table();
|
|
u16string ret;
|
|
for (size_t x = 0; x < source.size(); x++) {
|
|
char16_t src_char = source[x];
|
|
if (src_char & 0x80) {
|
|
src_char = (src_char << 8) | source[++x];
|
|
if ((src_char & 0xFF) == 0) {
|
|
return ret;
|
|
}
|
|
}
|
|
ret.push_back(table[src_char]);
|
|
};
|
|
return ret;
|
|
}
|
|
|
|
|
|
|
|
void add_language_marker_inplace(char* a, char e, size_t dest_count) {
|
|
if ((a[0] == '\t') && (a[1] != 'C')) {
|
|
return;
|
|
}
|
|
|
|
size_t existing_count = strlen(a);
|
|
if (existing_count > dest_count - 3) {
|
|
existing_count = dest_count - 3;
|
|
}
|
|
memmove(&a[2], a, (existing_count + 1) * sizeof(char));
|
|
a[0] = '\t';
|
|
a[1] = e;
|
|
a[existing_count + 2] = 0;
|
|
}
|
|
|
|
void add_language_marker_inplace(char16_t* a, char16_t e, size_t dest_count) {
|
|
if ((a[0] == '\t') && (a[1] != 'C')) {
|
|
return;
|
|
}
|
|
|
|
size_t existing_count = char16len(a);
|
|
if (existing_count > dest_count - 3) {
|
|
existing_count = dest_count - 3;
|
|
}
|
|
memmove(&a[2], a, (existing_count + 1) * sizeof(char16_t));
|
|
a[0] = '\t';
|
|
a[1] = e;
|
|
a[existing_count + 2] = 0;
|
|
}
|
|
|
|
void remove_language_marker_inplace(char* a) {
|
|
if ((a[0] == '\t') && (a[1] != 'C')) {
|
|
strcpy(a, &a[2]);
|
|
}
|
|
}
|
|
|
|
void remove_language_marker_inplace(char16_t* a) {
|
|
if ((a[0] == '\t') && (a[1] != 'C')) {
|
|
char16cpy(a, &a[2], char16len(a) - 2);
|
|
}
|
|
}
|
|
|
|
std::string add_language_marker(const std::string& s, char marker) {
|
|
if ((s.size() >= 2) && (s[0] == '\t') && (s[1] != 'C')) {
|
|
return s;
|
|
}
|
|
|
|
string ret;
|
|
ret.push_back('\t');
|
|
ret.push_back(marker);
|
|
return ret + s;
|
|
}
|
|
|
|
std::u16string add_language_marker(const std::u16string& s, char16_t marker) {
|
|
if ((s.size() >= 2) && (s[0] == L'\t') && (s[1] != L'C')) {
|
|
return s;
|
|
}
|
|
|
|
u16string ret;
|
|
ret.push_back(L'\t');
|
|
ret.push_back(marker);
|
|
return ret + s;
|
|
}
|
|
|
|
std::string remove_language_marker(const std::string& s) {
|
|
if ((s.size() < 2) || (s[0] != '\t') || (s[1] == 'C')) {
|
|
return s;
|
|
}
|
|
return s.substr(2);
|
|
}
|
|
|
|
std::u16string remove_language_marker(const std::u16string& s) {
|
|
if ((s.size() < 2) || (s[0] != L'\t') || (s[1] == L'C')) {
|
|
return s;
|
|
}
|
|
return s.substr(2);
|
|
}
|