diff --git a/src/Main.cc b/src/Main.cc
index a9c0adbe..ea1eafc8 100644
--- a/src/Main.cc
+++ b/src/Main.cc
@@ -249,6 +249,10 @@ The actions are:\n\
   encode-text-archive [INPUT-FILENAME [OUTPUT-FILENAME]]\n\
     Decode a text archive (e.g. TextEnglish.pr2) to JSON for easy editing, or\n\
     encode a JSON file to a text archive.\n\
+  decode-unicode-text-set [INPUT-FILENAME [OUTPUT-FILENAME]]\n\
+  encode-unicode-text-set [INPUT-FILENAME [OUTPUT-FILENAME]]\n\
+    Decode a Unicode text set (e.g. unitxt_e.prs) to JSON for easy editing, or\n\
+    encode a JSON file to a Unicode text set.\n\
   format-rare-item-set [--json] [INPUT-FILENAME]\n\
     Print the contents of a rare item table in a human-readable format. If\n\
     --json is given, the input is parsed as a JSON rare item set (see\n\
@@ -1504,10 +1508,13 @@ int main(int argc, char** argv) {
       break;
     }
     case Behavior::DECODE_UNICODE_TEXT_SET: {
-      auto strings = parse_unicode_text_set(read_input_data());
+      auto collections = parse_unicode_text_set(read_input_data());
       JSON j = JSON::list();
-      for (const string& s : strings) {
-        j.emplace_back(s);
+      for (const auto& collection : collections) {
+        JSON& coll_j = j.emplace_back(JSON::list());
+        for (const auto& s : collection) {
+          coll_j.emplace_back(s);
+        }
       }
       string out_data = j.serialize(JSON::SerializeOption::FORMAT);
       write_output_data(out_data.data(), out_data.size());
@@ -1515,11 +1522,14 @@ int main(int argc, char** argv) {
     }
     case Behavior::ENCODE_UNICODE_TEXT_SET: {
       auto json = JSON::parse(read_input_data());
-      vector<string> strings;
-      for (const auto& s_json : json.as_list()) {
-        strings.emplace_back(s_json->as_string());
+      vector<vector<string>> collections;
+      for (const auto& coll_json : json.as_list()) {
+        auto& collection = collections.emplace_back();
+        for (const auto& s_json : coll_json->as_list()) {
+          collection.emplace_back(std::move(s_json->as_string()));
+        }
       }
-      string encoded = serialize_unicode_text_set(strings);
+      string encoded = serialize_unicode_text_set(collections);
       write_output_data(encoded.data(), encoded.size());
       break;
     }
diff --git a/src/UnicodeTextSet.cc b/src/UnicodeTextSet.cc
index 27283815..3972dc4f 100644
--- a/src/UnicodeTextSet.cc
+++ b/src/UnicodeTextSet.cc
@@ -8,35 +8,68 @@
 
 using namespace std;
 
-vector<string> parse_unicode_text_set(const string& prs_data) {
+vector<vector<string>> parse_unicode_text_set(const string& prs_data) {
   string data = prs_decompress(prs_data);
   StringReader r(data);
-  r.skip(4);
-  uint32_t count = r.get_u32l();
 
-  vector<string> ret;
-  while (ret.size() < count) {
-    u16string s(&r.pget<char16_t>(r.get_u32l()));
-    ret.emplace_back(tt_utf16_to_utf8(s.data(), s.size() * 2));
+  uint32_t num_collections = r.get_u32l();
+  deque<uint32_t> collection_sizes;
+  while (collection_sizes.size() < num_collections) {
+    collection_sizes.emplace_back(r.get_u32l());
+  }
+
+  vector<vector<string>> ret;
+  ret.reserve(collection_sizes.size());
+  while (!collection_sizes.empty()) {
+    uint32_t num_strings = collection_sizes.front();
+    collection_sizes.pop_front();
+
+    auto& strings = ret.emplace_back();
+    strings.reserve(num_strings);
+    while (strings.size() < num_strings) {
+      StringReader sub_r = r.sub(r.get_u32l());
+      StringWriter w;
+      for (uint16_t ch = sub_r.get_u16l(); ch != 0; ch = sub_r.get_u16l()) {
+        w.put_u16l(ch);
+      }
+      strings.emplace_back(tt_utf16_to_utf8(w.str()));
+    }
   }
   return ret;
 }
 
-string serialize_unicode_text_set(const vector<string>& strings) {
-  StringWriter w;
-  w.put_u32l(strings.size());
-  size_t string_offset = (strings.size() * 4) + 4; // Header size
-  for (const auto& s : strings) {
-    w.put_u32l(string_offset);
-    string_offset = (((s.size() + 1) << 1) + 3) & (~3);
+string serialize_unicode_text_set(const vector<vector<string>>& collections) {
+  StringWriter header_w;
+  StringWriter data_w;
+
+  size_t total_num_strings = 0;
+  header_w.put_u32l(collections.size());
+  for (const auto& collection : collections) {
+    header_w.put_u32l(collection.size());
+    total_num_strings += collection.size();
   }
-  for (const auto& s : strings) {
-    string s_utf16 = tt_utf8_to_utf16(s);
-    w.write(s_utf16.data(), s_utf16.size());
-    w.put_u16(0);
-    while (w.size() & 3) {
-      w.put_u8(0);
+
+  unordered_map<string, uint32_t> encoded;
+
+  size_t data_base_offset = (total_num_strings * 4) + header_w.size();
+  for (const auto& collection : collections) {
+    for (const auto& s : collection) {
+      auto encoded_it = encoded.find(s);
+      if (encoded_it == encoded.end()) {
+        uint32_t offset = data_base_offset + data_w.size();
+        encoded.emplace(s, offset);
+        string s_utf16 = tt_utf8_to_utf16(s);
+        data_w.write(s_utf16.data(), s_utf16.size());
+        data_w.put_u16(0);
+        while (data_w.size() & 3) {
+          data_w.put_u8(0);
+        }
+      } else {
+        header_w.put_u32l(encoded_it->second);
+      }
     }
   }
-  return std::move(w.str());
+
+  header_w.write(data_w.str());
+  return std::move(header_w.str());
 }
diff --git a/src/UnicodeTextSet.hh b/src/UnicodeTextSet.hh
index abc77134..99d4e0e0 100644
--- a/src/UnicodeTextSet.hh
+++ b/src/UnicodeTextSet.hh
@@ -3,5 +3,5 @@
 #include <string>
 #include <vector>
 
-std::vector<std::string> parse_unicode_text_set(const std::string& prs_data);
-std::string serialize_unicode_text_set(const std::vector<std::string>& strings);
+std::vector<std::vector<std::string>> parse_unicode_text_set(const std::string& prs_data);
+std::string serialize_unicode_text_set(const std::vector<std::vector<std::string>>& collections);