use safe packed string types
This commit is contained in:
+75
-140
@@ -30,20 +30,14 @@ int char16ncmp(const char16_t* s1, const char16_t* s2, size_t count) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t char16len(const char16_t* s) {
|
||||
size_t x;
|
||||
for (x = 0; s[x] != 0; x++);
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static vector<char16_t> unicode_to_sjis_table_data;
|
||||
static vector<char16_t> sjis_to_unicode_table_data;
|
||||
|
||||
static void load_sjis_tables() {
|
||||
unicode_to_sjis_table_data.resize(0x10000);
|
||||
sjis_to_unicode_table_data.resize(0x10000);
|
||||
unicode_to_sjis_table_data.resize(0x10000, 0);
|
||||
sjis_to_unicode_table_data.resize(0x10000, 0);
|
||||
|
||||
// TODO: this is inefficient; it makes multiple copies of the string
|
||||
auto file_contents = load_file("system/sjis-table.ini");
|
||||
@@ -75,166 +69,107 @@ static const vector<char16_t>& unicode_to_sjis_table() {
|
||||
return unicode_to_sjis_table_data;
|
||||
}
|
||||
|
||||
// TODO: It looks like these functions are probably wrong. Specifically, we
|
||||
// don't write the high byte when encoding non-ASCII chars, do we?
|
||||
|
||||
void encode_sjis(char* dest, const char16_t* source, size_t max) {
|
||||
const auto& table = unicode_to_sjis_table();
|
||||
while (*source && (--max)) {
|
||||
*(dest++) = table[*(source++)];
|
||||
};
|
||||
*dest = 0;
|
||||
}
|
||||
|
||||
void decode_sjis(char16_t* dest, const char* source, size_t max) {
|
||||
const auto& table = sjis_to_unicode_table();
|
||||
while (*source && (--max)) {
|
||||
char16_t src_char = *(source++);
|
||||
if (src_char & 0x80) {
|
||||
src_char = (src_char << 8) | *(source++);
|
||||
if ((src_char & 0xFF) == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
*(dest++) = table[src_char];
|
||||
};
|
||||
*dest = 0;
|
||||
}
|
||||
|
||||
std::string encode_sjis(const char16_t* src, size_t src_count) {
|
||||
const auto& table = unicode_to_sjis_table();
|
||||
|
||||
const char16_t* src_end = src + src_count;
|
||||
string ret;
|
||||
for (; *src && (src_count > 0); src_count--) {
|
||||
ret.push_back(table[*(src++)]);
|
||||
while ((src != src_end) && *src) {
|
||||
uint16_t ch = *(src++);
|
||||
uint16_t translated_c = table[ch];
|
||||
if (translated_c == 0) {
|
||||
throw runtime_error("untranslatable unicode character");
|
||||
} else if (translated_c & 0xFF00) {
|
||||
ret.push_back((translated_c >> 8) & 0xFF);
|
||||
ret.push_back(translated_c & 0xFF);
|
||||
} else {
|
||||
ret.push_back(translated_c & 0xFF);
|
||||
}
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
void encode_sjis(
|
||||
char* dest,
|
||||
size_t dest_count,
|
||||
const char16_t* src,
|
||||
size_t src_count) {
|
||||
const auto& table = unicode_to_sjis_table();
|
||||
|
||||
if (dest_count == 0) {
|
||||
throw logic_error("cannot encode into zero-length buffer");
|
||||
}
|
||||
|
||||
const char16_t* src_end = src + src_count;
|
||||
const char* dest_end = dest + (dest_count - 1);
|
||||
while ((dest != dest_end) && (src != src_end) && *src) {
|
||||
uint16_t ch = *(src++);
|
||||
uint16_t translated_c = table[ch];
|
||||
if (translated_c == 0) {
|
||||
throw runtime_error("untranslatable unicode character");
|
||||
} else if (translated_c & 0xFF00) {
|
||||
*(dest++) = (translated_c >> 8) & 0xFF;
|
||||
// If the second byte of this character would cause the null to overrun
|
||||
// the buffer, erase the first byte instead and return early
|
||||
if (dest == dest_end) {
|
||||
*(dest - 1) = 0;
|
||||
} else {
|
||||
*(dest++) = translated_c & 0xFF;
|
||||
}
|
||||
} else {
|
||||
*(dest++) = translated_c & 0xFF;
|
||||
}
|
||||
}
|
||||
*dest = 0;
|
||||
}
|
||||
|
||||
std::u16string decode_sjis(const char* src, size_t src_count) {
|
||||
const auto& table = sjis_to_unicode_table();
|
||||
|
||||
const char* src_end = src + src_count;
|
||||
u16string ret;
|
||||
while (*src && (src_count > 0)) {
|
||||
char16_t src_char = *(src++);
|
||||
src_count--;
|
||||
while ((src != src_end) && *src) {
|
||||
uint16_t src_char = *(src++);
|
||||
if (src_char & 0x80) {
|
||||
if (src_count == 0) {
|
||||
return ret;
|
||||
if (src == src_end) {
|
||||
throw runtime_error("incomplete extended character");
|
||||
}
|
||||
src_char = (src_char << 8) | *(src++);
|
||||
if ((src_char & 0xFF) == 0) {
|
||||
return ret;
|
||||
throw runtime_error("incomplete extended character");
|
||||
}
|
||||
src_count--;
|
||||
}
|
||||
ret.push_back(table[src_char]);
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string encode_sjis(const std::u16string& source) {
|
||||
const auto& table = unicode_to_sjis_table();
|
||||
string ret;
|
||||
for (char16_t ch : source) {
|
||||
ret.push_back(table[ch]);
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::u16string decode_sjis(const std::string& source) {
|
||||
void decode_sjis(
|
||||
char16_t* dest,
|
||||
size_t dest_count,
|
||||
const char* src,
|
||||
size_t src_count) {
|
||||
const auto& table = sjis_to_unicode_table();
|
||||
u16string ret;
|
||||
for (size_t x = 0; x < source.size();) {
|
||||
char16_t src_char = source[x++];
|
||||
|
||||
if (dest_count == 0) {
|
||||
throw logic_error("cannot decode into zero-length buffer");
|
||||
}
|
||||
|
||||
const char* src_end = src + src_count;
|
||||
const char16_t* dest_end = dest + (dest_count - 1);
|
||||
while ((dest != dest_end) && (src != src_end) && *src) {
|
||||
uint16_t src_char = *(src++);
|
||||
if (src_char & 0x80) {
|
||||
if (x == source.size()) {
|
||||
return ret;
|
||||
if (src == src_end) {
|
||||
throw runtime_error("incomplete extended character");
|
||||
}
|
||||
src_char = (src_char << 8) | source[x++];
|
||||
src_char = (src_char << 8) | *(src++);
|
||||
if ((src_char & 0xFF) == 0) {
|
||||
return ret;
|
||||
throw runtime_error("incomplete extended character");
|
||||
}
|
||||
}
|
||||
ret.push_back(table[src_char]);
|
||||
*(dest++) = table[src_char];
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void add_language_marker_inplace(char* a, char e, size_t dest_count) {
|
||||
if ((a[0] == '\t') && (a[1] != 'C')) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t existing_count = strlen(a);
|
||||
if (existing_count > dest_count - 3) {
|
||||
existing_count = dest_count - 3;
|
||||
}
|
||||
memmove(&a[2], a, (existing_count + 1) * sizeof(char));
|
||||
a[0] = '\t';
|
||||
a[1] = e;
|
||||
a[existing_count + 2] = 0;
|
||||
}
|
||||
|
||||
void add_language_marker_inplace(char16_t* a, char16_t e, size_t dest_count) {
|
||||
if ((a[0] == '\t') && (a[1] != 'C')) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t existing_count = char16len(a);
|
||||
if (existing_count > dest_count - 3) {
|
||||
existing_count = dest_count - 3;
|
||||
}
|
||||
memmove(&a[2], a, (existing_count + 1) * sizeof(char16_t));
|
||||
a[0] = '\t';
|
||||
a[1] = e;
|
||||
a[existing_count + 2] = 0;
|
||||
}
|
||||
|
||||
void remove_language_marker_inplace(char* a) {
|
||||
if ((a[0] == '\t') && (a[1] != 'C')) {
|
||||
strcpy(a, &a[2]);
|
||||
}
|
||||
}
|
||||
|
||||
void remove_language_marker_inplace(char16_t* a) {
|
||||
if ((a[0] == '\t') && (a[1] != 'C')) {
|
||||
strcpy_z(a, &a[2], char16len(a) - 2);
|
||||
}
|
||||
}
|
||||
|
||||
std::string add_language_marker(const std::string& s, char marker) {
|
||||
if ((s.size() >= 2) && (s[0] == '\t') && (s[1] != 'C')) {
|
||||
return s;
|
||||
}
|
||||
|
||||
string ret;
|
||||
ret.push_back('\t');
|
||||
ret.push_back(marker);
|
||||
return ret + s;
|
||||
}
|
||||
|
||||
std::u16string add_language_marker(const std::u16string& s, char16_t marker) {
|
||||
if ((s.size() >= 2) && (s[0] == L'\t') && (s[1] != L'C')) {
|
||||
return s;
|
||||
}
|
||||
|
||||
u16string ret;
|
||||
ret.push_back(L'\t');
|
||||
ret.push_back(marker);
|
||||
return ret + s;
|
||||
}
|
||||
|
||||
std::string remove_language_marker(const std::string& s) {
|
||||
if ((s.size() < 2) || (s[0] != '\t') || (s[1] == 'C')) {
|
||||
return s;
|
||||
}
|
||||
return s.substr(2);
|
||||
}
|
||||
|
||||
std::u16string remove_language_marker(const std::u16string& s) {
|
||||
if ((s.size() < 2) || (s[0] != L'\t') || (s[1] == L'C')) {
|
||||
return s;
|
||||
}
|
||||
return s.substr(2);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user