diff --git a/src/Main.cc b/src/Main.cc index 520eb684..49b6ed87 100644 --- a/src/Main.cc +++ b/src/Main.cc @@ -424,6 +424,51 @@ Action a_disassemble_bc0( bc0_disassemble(stdout, read_input_data(args)); }); +Action a_psov2_encrypt_single_test( + "psov2-encrypt-single-test", nullptr, + [](phosg::Arguments& args) { + size_t num_threads = args.get("threads", std::thread::hardware_concurrency()); + + vector crypt_times(num_threads, 0); + vector single_times(num_threads, 0); + uint64_t num_mismatches = 0; + mutex output_lock; + auto thread_fn = [&](uint64_t seed, size_t thread_index) -> bool { + uint64_t start_t = phosg::now(); + uint32_t crypt_v = PSOV2Encryption(seed).next(); + uint64_t mid_t = phosg::now(); + uint32_t single_v = PSOV2Encryption::single(seed); + uint64_t end_t = phosg::now(); + + crypt_times[thread_index] += (mid_t - start_t); + single_times[thread_index] += (end_t - mid_t); + + if (crypt_v != single_v) { + lock_guard g(output_lock); + phosg::fwrite_fmt(stderr, "Mismatched result on seed {:08X}: crypt={:08X}, single={:08X}\n", + seed, crypt_v, single_v); + num_mismatches++; + } + return false; + }; + auto progress_fn = [&](uint64_t, uint64_t, uint64_t current_value, uint64_t) -> void { + uint64_t crypt_time = 0, single_time = 0; + for (uint64_t t : crypt_times) { + crypt_time += t; + } + for (uint64_t t : single_times) { + single_time += t; + } + lock_guard g(output_lock); + phosg::log_info_f("... {:08X} => {} mismatches, {} crypt, {} single ({:g}x)", + current_value, num_mismatches, phosg::format_duration(crypt_time), phosg::format_duration(single_time), + static_cast(crypt_time) / single_time); + }; + phosg::parallel_range_blocks(thread_fn, 0, 0x100000000, 0x1000, num_threads, progress_fn); + + progress_fn(0, 0, 0xFFFFFFFF, 0); + }); + static void a_encrypt_decrypt_fn(phosg::Arguments& args) { bool is_decrypt = (args.get(0) == "decrypt-data"); string seed = args.get("seed"); diff --git a/src/Map.cc b/src/Map.cc index ccff1f79..715bd294 100644 --- a/src/Map.cc +++ b/src/Map.cc @@ -6163,10 +6163,7 @@ void MapState::index_super_map(const FloorConfig& fc, shared_ptr((crypt.next() >> 16) & 0xFFFF) / 65536.0f); + det = (static_cast((PSOV2Encryption::single(seed) >> 16) & 0xFFFF) / 65536.0f); det_cache.emplace(seed, det); } diff --git a/src/PSOEncryption.cc b/src/PSOEncryption.cc index f2b71d3f..14d76530 100644 --- a/src/PSOEncryption.cc +++ b/src/PSOEncryption.cc @@ -103,6 +103,32 @@ PSOEncryption::Type PSOV2Encryption::type() const { return Type::V2; } +uint32_t PSOV2Encryption::single(uint32_t seed) { + // This function is an optimized implementation of `PSOV2Encryption(seed).next()`; that is, it allows the caller to + // get a single value from a PSOV2Encryption instance without actually constructing it. This method is 22x-100x + // faster (depending on build configuration) than constructing a PSOV2Encryption and calling .next() on it once. + + // If fib(n) is the n'th Fibonacci number (starting with 1, 1, 2, 3, 5, etc.), then a closed form for the integer + // sequence generated by the first loop in PSOV2Encryption::PSOV2Encryption is: + // a(n) = (-1)^n * (fib(n) - fib(n-1) * seed) + // The recurrence used in that loop is a(n) = a(n-2) - a(n-1), which we can use to prove the closed form correct: + // a(n) = a(n-2) - a(n-1) + // a(n) = (-1)^(n-2) * (fib(n-2) - fib(n-3) * seed) - ((-1)^(n-1) * (fib(n-1) - fib(n-2) * seed)) + // a(n) = (-1)^(n-2) * (fib(n-2) - fib(n-3) * seed) + ((-1)^(n-2) * (fib(n-1) - fib(n-2) * seed)) + // a(n) = (-1)^(n-2) * (fib(n-2) - fib(n-3) * seed + fib(n-1) - fib(n-2) * seed) + // a(n) = (-1)^(n-2) * (fib(n-2) + fib(n-1) - (fib(n-3) + fib(n-2)) * seed) + // a(n) = (-1)^(n-2) * (fib(n) - fib(n-1) * seed) + // a(n) = (-1)^(n) * (fib(n) - fib(n-1) * seed) + // The sequence begins with a(-1) = seed (which is not generated by the loop but is used as an initial input, hence + // the negative index) and a(0) = 1. Using the closed form and the values of a(-1) and a(0), we can eliminate all + // arithmetic done in the normal constructor that isn't necessary to produce the first result value. To do so, we + // trace backward from the result value, through the 5 update_stream calls and the initialization loop, to see which + // indexes within the stream are actually needed, and the expression to generate each one. We can then simplify the + // overall expression and truncate constants to 32 bits (since it's a linear equation, overflow bits cannot affect + // the final 32-bit result). The full expression simplifies to: + return 0xC6DCAB76 * seed - 0x9E1977BA; +} + PSOV3Encryption::PSOV3Encryption(uint32_t seed) : PSOLFGEncryption(seed, STREAM_LENGTH, STREAM_LENGTH) { uint32_t x, y, basekey, source1, source2, source3; basekey = 0; diff --git a/src/PSOEncryption.hh b/src/PSOEncryption.hh index ecbdf0db..05b9a939 100644 --- a/src/PSOEncryption.hh +++ b/src/PSOEncryption.hh @@ -132,6 +132,9 @@ public: explicit PSOV2Encryption(uint32_t seed); virtual Type type() const; + // Optimized implementation of `PSOV2Encryption(seed).next()` for when the caller needs only the first value + static uint32_t single(uint32_t seed); + protected: virtual void update_stream();