Files
psopeeps-newserv/src/Text.cc
T
2023-04-16 15:44:12 -07:00

182 lines
5.0 KiB
C++

#include "Text.hh"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <phosg/Filesystem.hh>
#include <phosg/Strings.hh>
#include <vector>
using namespace std;
int char16ncmp(const char16_t* s1, const char16_t* s2, size_t count) {
size_t x;
for (x = 0; x < count && s1[x] != 0 && s2[x] != 0; x++) {
if (s1[x] < s2[x]) {
return -1;
} else if (s1[x] > s2[x]) {
return 1;
}
}
if (s1[x] < s2[x]) {
return -1;
} else if (s1[x] > s2[x]) {
return 1;
}
return 0;
}
static vector<char16_t> unicode_to_sjis_table_data;
static vector<char16_t> sjis_to_unicode_table_data;
static void load_sjis_tables() {
unicode_to_sjis_table_data.resize(0x10000, 0);
sjis_to_unicode_table_data.resize(0x10000, 0);
// TODO: this is inefficient; it makes multiple copies of the string
auto file_contents = load_file("system/sjis-table.ini");
auto lines = split(file_contents, '\n');
for (auto line : lines) {
auto tokens = split(line, '\t');
if (tokens.size() < 2) {
continue;
}
char16_t sjis_char = stoul(tokens[0], nullptr, 0);
char16_t unicode_char = stoul(tokens[1], nullptr, 0);
unicode_to_sjis_table_data[unicode_char] = sjis_char;
sjis_to_unicode_table_data[sjis_char] = unicode_char;
}
}
static const vector<char16_t>& sjis_to_unicode_table() {
if (sjis_to_unicode_table_data.empty()) {
load_sjis_tables();
}
return sjis_to_unicode_table_data;
}
static const vector<char16_t>& unicode_to_sjis_table() {
if (unicode_to_sjis_table_data.empty()) {
load_sjis_tables();
}
return unicode_to_sjis_table_data;
}
std::string encode_sjis(const char16_t* src, size_t src_count) {
const auto& table = unicode_to_sjis_table();
const char16_t* src_end = src + src_count;
string ret;
while ((src != src_end) && *src) {
uint16_t ch = *(src++);
uint16_t translated_c = table[ch];
if (translated_c == 0) {
throw runtime_error("untranslatable unicode character");
} else if (translated_c & 0xFF00) {
ret.push_back((translated_c >> 8) & 0xFF);
ret.push_back(translated_c & 0xFF);
} else {
ret.push_back(translated_c & 0xFF);
}
};
return ret;
}
size_t encode_sjis(
char* dest,
size_t dest_count,
const char16_t* src,
size_t src_count,
bool allow_skip_terminator) {
const auto& table = unicode_to_sjis_table();
if (dest_count == 0) {
throw logic_error("cannot encode into zero-length buffer");
}
const char* dest_start = dest;
const char16_t* src_end = src + src_count;
const char* dest_end = dest + (allow_skip_terminator ? dest_count : (dest_count - 1));
while ((dest != dest_end) && (src != src_end) && *src) {
uint16_t ch = *(src++);
uint16_t translated_c = table[ch];
if (translated_c == 0) {
throw runtime_error("untranslatable unicode character");
} else if (translated_c & 0xFF00) {
*(dest++) = (translated_c >> 8) & 0xFF;
// If the second byte of this character would cause the null to overrun
// the buffer, erase the first byte instead and return early
if (dest == dest_end) {
*(dest - 1) = 0;
} else {
*(dest++) = translated_c & 0xFF;
}
} else {
*(dest++) = translated_c & 0xFF;
}
}
if (!allow_skip_terminator || (dest != dest_end)) {
*dest = 0;
dest++;
}
return dest - dest_start;
}
std::u16string decode_sjis(const char* src, size_t src_count) {
const auto& table = sjis_to_unicode_table();
const char* src_end = src + src_count;
u16string ret;
while ((src != src_end) && *src) {
uint16_t src_char = *(src++);
if (src_char & 0x80) {
if (src == src_end) {
throw runtime_error("incomplete extended character");
}
src_char = (src_char << 8) | *(src++);
if ((src_char & 0xFF) == 0) {
throw runtime_error("incomplete extended character");
}
}
ret.push_back(table[src_char]);
};
return ret;
}
size_t decode_sjis(
char16_t* dest,
size_t dest_count,
const char* src,
size_t src_count,
bool allow_skip_terminator) {
const auto& table = sjis_to_unicode_table();
if (dest_count == 0) {
throw logic_error("cannot decode into zero-length buffer");
}
const char16_t* dest_start = dest;
const char* src_end = src + src_count;
const char16_t* dest_end = dest + (allow_skip_terminator ? dest_count : (dest_count - 1));
while ((dest != dest_end) && (src != src_end) && *src) {
uint16_t src_char = *(src++);
if (src_char & 0x80) {
if (src == src_end) {
throw runtime_error("incomplete extended character");
}
src_char = (src_char << 8) | *(src++);
if ((src_char & 0xFF) == 0) {
throw runtime_error("incomplete extended character");
}
}
*(dest++) = table[src_char];
};
if (!allow_skip_terminator || (dest != dest_end)) {
*(dest++) = 0;
}
return dest - dest_start;
}