add B2 patch support on PSO Plus

2024-06-22 21:38:24 -07:00
parent 998664d2fb
commit 862b3d27da
21 changed files with 465 additions and 96 deletions
@@ -0,0 +1,59 @@
+.version GC_V3
+.quest_num 88500
+.language 1
+.episode Episode1
+.name "GC v1.2 USA patch enabler"
+.short_desc ""
+.long_desc ""
+
+start:
+  // Create quest opcode handlers for F9FE to call flush_code and F9FF to call
+  // the copied code. Fortunately, quest_call_l leaved the byteswapped value of
+  // the opcode argument in r4, so as long as the address ends with 00, it will
+  // be valid as the size argument to flush_code. We'll end up flushing many
+  // more bytes than needed, but this isn't a problem.
+  leti               r3, 0x80004000  // dest addr
+  write4             0x804C81C8, 0x801F2A14  // quest_call_l
+  write4             0x804C81CC, 0x8000C274  // flush_code
+  write4             0x804C81D0, r3  // written code ptr
+
+  read4              r0, 0x805D5E70  // quest_script_base
+  leto               r4, code
+  read4              r4, r4
+  add                r4, r0  // r4 = address of code label
+  leto               r5, code_end
+  read4              r5, r5
+  add                r5, r0  // r5 = address of code_end label
+
+  // Copy all data from [code, code_end) to 80004000
+copy_byte:
+  jmp_eq             r4, r5, copy_done
+  read1              r0, r4
+  write1             r3, r0
+  addi               r3, 1
+  addi               r4, 1
+  jmp                copy_byte
+
+copy_done:
+  // Call flush_code(0x80004000, 0x00400080) to commit it to memory
+  .data              F9FE00400080
+  // Call the copied native code
+  .data              F9FF
+
+  // This script runs on the first frame during the quest loading procedure,
+  // but this procedure is started from the lobby overview, not from a game!
+  // To make the result of loading a quest sane, we need to set some extra
+  // state that will take effect when loading is done.
+  ba_initial_floor   17  // Make player spawn in lobby (for one frame)
+  write2             0x805D5CE8, 1  // Leave "game" immediately (sends 98)
+
+  // Clean up quest handler table
+  write4             0x804C81C8, 0
+  write4             0x804C81CC, 0
+  write4             0x804C81D0, 0
+
+  ret
+
+code:
+  .include_native    q88500-gc.s
+code_end:
@@ -0,0 +1,50 @@
+.version GC_V3
+.quest_num 88500
+.language 0
+.episode Episode1
+.name "GC v1.5 JP patch enabler"
+.short_desc ""
+.long_desc ""
+
+start:
+  // This script is identical to q88500-gc-e.bin.txt, except the addresses are
+  // changed to be suitable for JP v1.5.
+  leti               r3, 0x80004000
+
+  write4             0x804C88F0, 0x801F29C0
+  write4             0x804C88F4, 0x8000C274
+  write4             0x804C88F8, r3
+
+  read4              r0, 0x805D6560
+  leto               r4, code
+  read4              r4, r4
+  add                r4, r0
+  leto               r5, code_end
+  read4              r5, r5
+  add                r5, r0
+
+copy_byte:
+  jmp_eq             r4, r5, copy_done
+  read1              r0, r4
+  write1             r3, r0
+  addi               r3, 1
+  addi               r4, 1
+  jmp                copy_byte
+
+copy_done:
+  .data              F9FE00400080
+  .data              F9FF
+
+  ba_initial_floor   17
+  write2             0x805D63D8, 1
+
+  // Clean up quest handler table
+  write4             0x804C88F0, 0
+  write4             0x804C88F4, 0
+  write4             0x804C88F8, 0
+
+  ret
+
+code:
+  .include_native    q88500-gc.s
+code_end:
@@ -0,0 +1,189 @@
+// This function copies an inline handler for the B2 command (function call)
+// to an unused area of memory, and inserts it into the game's command handler
+// table, thus making the B2 command fully functional as it is on most other
+// versions of the game.
+
+// We could do the code copy and callsite modification directly in the quest
+// script, but that would restrict us to only using addresses that end in 00.
+// Furthermore, doing it this way provides an example of how to embed native
+// code in a quest script and run it from within the script.
+
+start:
+  mflr    r11
+  bl      get_handle_B2_ptr
+
+handle_B2:
+  # Arguments:
+  # r3 = TProtocol* proto (we use this to call the send function)
+  # r4 = void* data
+  # Returns: void
+
+  mflr    r0
+  stwu    [r1 - 0x40], r1
+  stw     [r1 + 0x44], r0
+
+  # Stack:
+  # [r1+08] = B3 XX 0C 00
+  # [r1+0C] = code section's return value
+  # [r1+10] = checksum
+  # [r1+14] = saved ctx argument
+  # [r1+18] = saved data argument
+  stw     [r1 + 0x14], r3
+  stw     [r1 + 0x18], r4
+
+  # Set up the reply header (B3 XX 0C 00, where XX comes from the B2 command)
+  lbz     r5, [r4 + 1]
+  rlwinm  r5, r5, 16, 8, 15
+  oris    r5, r5, 0xB300
+  ori     r5, r5, 0x0C00
+  stw     [r1 + 0x08], r5
+
+  # If there's no code section, skip it. We also write the code section size to
+  # the return value field (which will be overwritten later if the size is not
+  # zero). This is because I'm lazy and this gives the behavior we want: the
+  # code return value is always zero if the code section size is zero.
+  li      r6, 4
+  lwbrx   r5, [r4 + r6] # r5 = code_size
+  stw     [r1 + 0x0C], r5 # response.code_return_value = code_size
+  cmplwi  r5, 0
+  beq     handle_B2_skip_code
+
+  # Get the code section base and footer addresses
+  addi    r6, r4, 0x10 # r6 = code base address
+  add     r7, r6, r5
+  subi    r7, r7, 0x20 # r7 = footer address (code base + code size - 0x20)
+
+  # Check if there are relocations to do
+  lwz     r8, [r7 + 4] # r8 = num relocations
+  cmplwi  r8, 0
+  beq     handle_B2_skip_relocations
+
+  # Execute the relocations
+  mtctr   r8
+  lwz     r8, [r7] # r8 = relocations list offset
+  add     r8, r8, r6 # r8 = relocations list address
+  subi    r8, r8, 2 # Back up one space so we can use lhzu in the loop
+  mr      r10, r6 # relocation pointer = code base address
+handle_B2_relocate_again:
+  lhzu    r9, [r8 + 2]
+  rlwinm  r9, r9, 2, 0, 29 # r9 = next_relocation_offset * 4
+  add     r10, r10, r9 # relocation pointer += next_relocation_offset * 4
+  lwz     r9, [r10]
+  add     r9, r9, r6
+  stw     [r10], r9 # (*relocation pointer) += code base address
+  bdnz    handle_B2_relocate_again
+handle_B2_skip_relocations:
+
+  # Invalidate the caches appropriately for the newly-copied code
+  lis     r0, 0x8000
+  ori     r0, r0, 0xC274
+  mr      r3, r6
+  mr      r4, r5
+  mtctr   r0
+  bctrl   # flush_code(code_base_addr, code_section_size)
+
+  # Call the code section and put the return value (byteswapped) on the stack
+  # Note: flush_code only uses r3, r4, and r5, so we don't need to reload r7
+  # after the above call
+  lwz     r8, [r7 + 0x10]
+  lwzx    r8, [r8 + r6]
+  mtctr   r8
+  bctrl
+  li      r8, 0x0C
+  stwbrx  [r1 + r8], r3
+handle_B2_skip_code:
+
+  # Get the checksum function args
+  lwz     r4, [r1 + 0x18]
+  li      r5, 0x08
+  lwbrx   r3, [r4 + r5] # checksum addr
+  li      r5, 0x0C
+  lwbrx   r4, [r4 + r5] # checksum size
+  bl      crc32   # crc32(checksum_addr, checksum_size)
+  li      r8, 0x10
+  stwbrx  [r1 + r8], r3
+
+  # Send the response (B3 command)
+  lwz     r3, [r1 + 0x14]
+  lwz     r4, [r3 + 0x18]
+  lwz     r4, [r4 + 0x28]
+  mtctr   r4
+  addi    r4, r1, 0x08
+  li      r5, 0x0C
+  bctrl   # TProtocol::send_command(ctx, &reply_data, 0x0C)
+
+  # Clean up stack and return
+  lwz     r0, [r1 + 0x44]
+  addi    r1, r1, 0x40
+  mtlr    r0
+  blr
+
+crc32:
+  subi    r3, r3, 1  # So we can use lbzu
+  add     r4, r3, r4  # r4 = end ptr (also adjusted for lbzu, implicitly)
+  li      r5, -1  # r5 = result value (0xFFFFFFFF initially)
+  lis     r7, 0xEDB8
+  ori     r7, r7, 0x8320  # 1-bit xor value
+  li      r8, 8  # Number of bits per byte
+
+crc32_again:
+  cmpl    r3, r4
+  beq     crc32_done
+
+  lbzu    r9, [r3 + 1]
+  xor     r5, r5, r9  # result ^= next_input_value
+
+  mtctr   r8
+crc32_next_bit:
+  rlwinm  r6, r5, 0, 31, 31  # r6 = low bit of result
+  rlwinm  r5, r5, 31, 1, 31  # result >>= 1
+  neg     r6, r6
+  and     r6, r6, r7
+  xor     r5, r5, r6  # result ^= (0xEDB88320 if low bit was 1, else 0)
+  bdnz    crc32_next_bit
+  b       crc32_again
+
+crc32_done:
+  xoris   r3, r5, 0xFFFF
+  xori    r3, r3, 0xFFFF
+  blr     # return (result ^ 0xFFFFFFFF)
+
+
+get_handle_B2_ptr:
+  mflr    r9  # r9 = &handle_B2
+  bl      get_handle_B2_end_ptr
+get_handle_B2_end_ptr:
+  mflr    r10
+  subi    r10, r10, 8  # r10 = pointer to end of handle_B2
+
+  # Copy handle_B2 to 8000B0E0, which is normally unused by the game
+  lis     r12, 0x8000
+  ori     r12, r12, 0xB0E0  # r12 = 0x8000B0E0
+  sub     r7, r10, r9
+  rlwinm  r7, r7, 30, 2, 31  # r7 = number of words to copy
+  mtctr   r7
+  subi    r8, r12, 4  # r8 = r12 - 4 (so we can use stwu)
+  subi    r9, r9, 4  # r9 = r9 - 4 (so we can use lwzu)
+copy_handle_B2_word_again:
+  lwzu    r0, [r9 + 4]
+  stwu    [r8 + 4], r0
+  bdnz    copy_handle_B2_word_again
+
+  # Invalidate the caches appropriately for the newly-copied code
+  lis     r9, 0x8000
+  ori     r9, r9, 0xC274
+  mtctr   r9
+  mr      r3, r12
+  rlwinm  r4, r7, 30, 2, 31
+  bctrl   # flush_code(copied_B2_handler, copied_B2_handler_bytes)
+
+  # Replace the command handler table entry for command 0E (which is an unused
+  # legacy command and has very broken behavior) with our B2 implementation
+  lis     r5, 0x804C
+  ori     r5, r5, 0x4E08
+  li      r0, 0x00B2
+  stw     [r5], r0
+  stw     [r5 + 0x0C], r12
+
+  mtlr    r11
+  blr