rewrite client function compiler

2026-05-11 07:29:25 -07:00
parent 2f2a0bcf2b
commit e78e2ba887
174 changed files with 3931 additions and 5807 deletions
@@ -1,89 +0,0 @@
-start:
-    mflr     r7
-
-    # If this patch has already been run, then the opcode that led here will
-    # not be bctrl (4E800421). In that case, do nothing.
-    lis      r3, 0x4E80
-    ori      r3, r3, 0x0421
-    lwz      r4, [r7 - 4]
-    cmp      r3, r4
-    beq      apply_patch
-    blr
-apply_patch:
-
-    bl       patch_end
-    .offsetof patch
-    .offsetof patch_end
-patch:
-    mfctr    r6
-    mr       r3, r6
-    li       r4, 0x7C00
-    .include FlushCachedCode-GC
-    mtctr    r6
-    bctr
-patch_end:
-    mflr     r4
-
-    addi     r4, r4, 8
-    lwz      r3, [r4 - 8]
-    lwz      r5, [r4 - 4]
-    sub      r5, r5, r3
-
-    # At this point:
-    # r4 = address of patch label
-    # r5 = patch size in bytes
-    # r7 = saved LR
-
-    # Find a spot in the interrupt handlers with enough memory for the patch
-    lis      r3, 0x8000
-    ori      r3, r3, 0x0200
-    sub      r3, r3, r5
-
-check_location:
-    rlwinm   r0, r5, 30, 2, 31
-    mtctr    r0  # ctr = patch size in words
-    subi     r8, r3, 4
-check_location_next_word:
-    lwzu     r0, [r8 + 4]
-    cmpwi    r0, 0
-    beq      check_location_word_ok
-    addi     r3, r3, 0x0100
-    rlwinm   r0, r3, 0, 16, 31
-    cmpwi    r0, 0x1800
-    blt      check_location
-    # No suitable location was found - return null
-    li       r3, 0
-    mtlr     r7
-    blr
-
-check_location_word_ok:
-    bdnz     check_location_next_word
-
-location_ok:
-    mr       r6, r3
-    # Now:
-    # r3 = destination location
-    # r4 = patch src data
-    # r5 = patch size in bytes
-    # r6 = destination location
-    # r7 = saved LR
-    .include CopyCode-GC
-
-setup_branch:
-    # Replace the bctrl opcode that led to this call with a bl opcode that
-    # leads to the copied patch code
-    subi     r3, r7, 4
-    sub      r4, r6, r3
-    rlwinm   r4, r4, 0, 6, 31
-    oris     r4, r4, 0x4800
-    ori      r4, r4, 0x0001
-    stw      [r3], r4
-    dcbst    r0, r3
-    sync
-    icbi     r0, r3
-    isync
-
-    # Return the address that the patch was copied to
-    mr       r3, r6
-    mtlr     r7
-    blr
@@ -1,4 +0,0 @@
-entry_ptr:
-    .data     0x8000C274
-start:
-    .include  CacheClearFix
@@ -0,0 +1,6 @@
+.versions PPC
+
+entry_ptr:
+  .data     0x8000C274
+start:
+  .include  CacheClearFix
@@ -1,5 +0,0 @@
-entry_ptr:
-reloc0:
-    .offsetof start
-start:
-    .include  CacheClearFix
@@ -0,0 +1,7 @@
+.versions PPC
+
+entry_ptr:
+reloc0:
+  .offsetof start
+start:
+  .include  CacheClearFix
@@ -0,0 +1,61 @@
+.versions PPC
+
+start:
+  mflr      r7
+
+  # If this patch has already been run, then the opcode that led here will not be bctrl (4E800421). In that case, do
+  # nothing.
+  lis       r3, 0x4E80
+  ori       r3, r3, 0x0421
+  lwz       r4, [r7 - 4]
+  cmp       r3, r4
+  beq       apply_patch
+  blr
+apply_patch:
+
+  bl        patch_end
+  .offsetof patch
+  .offsetof patch_end
+patch:
+  mfctr     r6
+  mr        r3, r6
+  li        r4, 0x7C00
+  .include  FlushCachedCode
+  mtctr     r6
+  bctr
+patch_end:
+  mflr      r4
+
+  addi      r4, r4, 8
+  lwz       r3, [r4 - 8]
+  lwz       r5, [r4 - 4]
+  sub       r5, r5, r3
+
+  lis       r3, 0x8000
+  ori       r3, r3, 0x01BC
+  mr        r6, r3
+  # At this point:
+  # r3 = destination location (overwritten by CopyCode)
+  # r4 = patch src data (overwritten by CopyCode)
+  # r5 = patch size in bytes (overwritten by CopyCode)
+  # r6 = destination location
+  # r7 = saved LR
+  .include  CopyCode
+
+setup_branch:
+  # Replace the bctrl opcode that led to this call with a bl opcode that leads to the copied patch code
+  subi      r3, r7, 4
+  sub       r4, r6, r3
+  rlwinm    r4, r4, 0, 6, 31
+  oris      r4, r4, 0x4800
+  ori       r4, r4, 0x0001
+  stw       [r3], r4
+  dcbst     r0, r3
+  sync
+  icbi      r0, r3
+  isync
+
+  # Return the address that the patch was copied to
+  mr        r3, r6
+  mtlr      r7
+  blr
@@ -1,60 +0,0 @@
-start:
-    mflr     r7
-
-    # If this patch has already been run, then the opcode that led here will
-    # not be bctrl (4E800421). In that case, do nothing.
-    lis      r3, 0x4E80
-    ori      r3, r3, 0x0421
-    lwz      r4, [r7 - 4]
-    cmp      r3, r4
-    beq      apply_patch
-    blr
-apply_patch:
-
-    bl       patch_end
-    .offsetof patch
-    .offsetof patch_end
-patch:
-    mfctr    r6
-    mr       r3, r6
-    li       r4, 0x7C00
-    .include FlushCachedCode
-    mtctr    r6
-    bctr
-patch_end:
-    mflr     r4
-
-    addi     r4, r4, 8
-    lwz      r3, [r4 - 8]
-    lwz      r5, [r4 - 4]
-    sub      r5, r5, r3
-
-    lis      r3, 0x8000
-    ori      r3, r3, 0x01BC
-    mr       r6, r3
-    # At this point:
-    # r3 = destination location (overwritten by CopyCode)
-    # r4 = patch src data (overwritten by CopyCode)
-    # r5 = patch size in bytes (overwritten by CopyCode)
-    # r6 = destination location
-    # r7 = saved LR
-    .include CopyCode
-
-setup_branch:
-    # Replace the bctrl opcode that led to this call with a bl opcode that
-    # leads to the copied patch code
-    subi     r3, r7, 4
-    sub      r4, r6, r3
-    rlwinm   r4, r4, 0, 6, 31
-    oris     r4, r4, 0x4800
-    ori      r4, r4, 0x0001
-    stw      [r3], r4
-    dcbst    r0, r3
-    sync
-    icbi     r0, r3
-    isync
-
-    # Return the address that the patch was copied to
-    mr       r3, r6
-    mtlr     r7
-    blr
@@ -1,9 +1,42 @@
-# This function implements the $nativecall chat command on GameCube clients.
+# This function implements the $nativecall chat command.

 entry_ptr:
 reloc0:
  .offsetof start

+
+
+.versions SH4
+
+start:
+  sts.l     -[r15], pr
+  mov.l     r0, [call_addr]
+  mov.l     r4, [arg0]
+  mov.l     r5, [arg1]
+  mov.l     r6, [arg2]
+  mov.l     r7, [arg3]
+  calls     [r0]
+  nop
+  lds.l     pr, [r15]+
+  rets
+  nop
+
+  .align 4
+call_addr:
+  .data     0
+arg0:
+  .data     0
+arg1:
+  .data     0
+arg2:
+  .data     0
+arg3:
+  .data     0
+
+
+
+.versions PPC
+
 start:
  mflr   r0
  stw    [r1 + 4], r0
@@ -1,30 +0,0 @@
-# This function implements the $nativecall chat command on DC clients.
-
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  sts.l     -[r15], pr
-  mov.l     r0, [call_addr]
-  mov.l     r4, [arg0]
-  mov.l     r5, [arg1]
-  mov.l     r6, [arg2]
-  mov.l     r7, [arg3]
-  calls     [r0]
-  nop
-  lds.l     pr, [r15]+
-  rets
-  nop
-
-  .align 4
-call_addr:
-  .data     0
-arg0:
-  .data     0
-arg1:
-  .data     0
-arg2:
-  .data     0
-arg3:
-  .data     0
@@ -0,0 +1,22 @@
+# r3 = dest ptr
+# r4 = src ptr
+# r5 = size
+# Clobbers r0, r3, r4, r5
+
+.versions PPC
+
+copy_code:
+  addi    r5, r5, 3
+  rlwinm  r5, r5, 30, 2, 31  # r5 = number of words to copy
+  mtctr   r5
+  subi    r3, r3, 4  # r3 -= 4 (so we can use stwu)
+  subi    r4, r4, 4  # r4 -= 4 (so we can use lwzu)
+copy_word_again:
+  lwzu    r0, [r4 + 4]
+  stwu    [r3 + 4], r0
+  bdnz    copy_word_again
+
+  rlwinm  r4, r5, 2, 0, 29
+  addi    r3, r3, 4
+  sub     r3, r3, r4
+  .include FlushCachedCode
@@ -1,18 +0,0 @@
-  # r3 = dest ptr
-  # r4 = src ptr
-  # r5 = size
-  # Clobbers r0, r3, r4, r5
-  addi    r5, r5, 3
-  rlwinm  r5, r5, 30, 2, 31 # r5 = number of words to copy
-  mtctr   r5
-  subi    r3, r3, 4 # r3 = r3 - 4 (so we can use stwu)
-  subi    r4, r4, 4 # r4 = r4 - 4 (so we can use lwzu)
-copy_word_again:
-  lwzu    r0, [r4 + 4]
-  stwu    [r3 + 4], r0
-  bdnz    copy_word_again
-
-  rlwinm  r4, r5, 2, 0, 29
-  addi    r3, r3, 4
-  sub     r3, r3, r4
-  .include FlushCachedCode
@@ -1,7 +1,11 @@
-  # eax = dest ptr
-  # edx = src ptr
-  # ecx = size
-  # Clobbers eax, ecx, edx
+# eax = dest ptr
+# edx = src ptr
+# ecx = size
+# Clobbers eax, ecx, edx
+
+.versions X86
+
+copy_data:
  push    ebx
 again:
  test    ecx, ecx
@@ -1,7 +1,11 @@
-  # r3 = dest ptr
-  # r4 = src ptr
-  # r5 = size
-  # Clobbers r3, r4, r5, ctr
+# r3 = dest ptr
+# r4 = src ptr
+# r5 = size
+# Clobbers r3, r4, r5, ctr
+
+.versions PPC
+
+copy_data_words:
  addi    r5, r5, 3
  rlwinm  r5, r5, 30, 2, 31 # r5 = number of words to copy
  mtctr   r5
@@ -1,108 +1,86 @@
-# This program was an early attempt at restoring B2 patching functionality to
-# Episode 3. It is no longer used, since the quest loading method is more
-# reliable, but this file remains for documentation purposes.
+# This program was an early attempt at restoring B2 patching functionality to Episode 3. It is no longer used, since
+# the quest loading method is more reliable, but this file remains for documentation purposes.

-# There is a buffer overflow bug in PSO Episode 3 that this program uses to
-# achieve arbitrary code execution. (This bug is likely present in all versions
-# of PSO, but the code here is specific to the USA version of Episode 3.) This
-# is only necessary because the non-Japanese versions of Episode 3 lack the B2
-# command, which is used on other console PSO versions to send patches and other
-# bits of code. Here, we use a buffer overflow bug to re-implement the B2
-# command, which allows the server to treat PSO Episode 3 like any other version
-# of PSO with respect to patching or loading DOL files.
+# There is a buffer overflow bug in PSO Episode 3 that this program uses to achieve arbitrary code execution. (This bug
+# is likely present in all versions of PSO, but the code here is specific to the USA version of Episode 3.) This is
+# only necessary because the non-Japanese versions of Episode 3 lack the B2 command, which is used on other console PSO
+# versions to send patches and other bits of code. Here, we use a buffer overflow bug to re-implement the B2 command,
+# which allows the server to treat PSO Episode 3 like any other version of PSO with respect to patching or loading DOL
+# files.

-# For some background, PSO sends download quest files via the A6 and A7
-# commands. The A6 command is used to start sending a download quest file; it
-# includes the quest name, file name, and total file size. The A7 command is
-# used to send a chunk of 1KB (0x400 bytes) of data, or less if it's the final
-# chunk of the file. When the client receives an A6 command for a filename
-# ending in .bin, it allocates a buffer of (file size + 0x48) bytes. When it
-# later receives an A7 command, it copies (cmd.data_size) bytes from the command
-# to position (8 + 0x100 * flag) in the buffer, then if cmd.data_size was less
-# than 0x400, it marks the file as done and postprocesses it.
+# For some background, PSO sends download quest files via the A6 and A7 commands. The A6 command is used to start
+# sending a download quest file; it includes the quest name, file name, and total file size. The A7 command is used to
+# send a chunk of 1KB (0x400 bytes) of data, or less if it's the final chunk of the file. When the client receives an
+# A6 command for a filename ending in .bin, it allocates a buffer of (file size + 0x48) bytes. When it later receives
+# an A7 command, it copies (cmd.data_size) bytes from the command to position (8 + 0x100 * flag) in the buffer, then if
+# cmd.data_size was less than 0x400, it marks the file as done and postprocesses it.

-# However, the client neglects to check if the last chunk overflows the end of
-# the buffer before copying the chunk data. In this function, we send an A6
-# command with an overall file size of only 0x18 bytes, then we send a chunk of
-# 0x200 or so bytes (the compiled size of the code in this file), which
-# overflows past the end of the allocated buffer and overwrites part of a free
-# block after the allocated buffer. The memory allocator library keeps some of
-# its bookkeeping structures at the beginning of this free block, which we use
-# to cause the next call to malloc() to overwrite its own return address on the
-# stack. Conveniently, this call happens soon afterward, during the
+# However, the client neglects to check if the last chunk overflows the end of the buffer before copying the chunk
+# data. In this function, we send an A6 command with an overall file size of only 0x18 bytes, then we send a chunk of
+# 0x200 or so bytes (the compiled size of the code in this file), which overflows past the end of the allocated buffer
+# and overwrites part of a free block after the allocated buffer. The memory allocator library keeps some of its
+# bookkeeping structures at the beginning of this free block, which we use to cause the next call to malloc() to
+# overwrite its own return address on the stack. Conveniently, this call happens soon afterward, during the
 # postprocessing step.

-# The PSO memory allocator is a simple free-list allocator. The allocator
-# maintains two linked lists of blocks: one for allocated blocks and one for
-# free blocks. The list of free blocks is sorted in order of memory address, but
-# the list of allocated blocks is sorted in the order they were allocated. (The
-# order of the allocated block list does not matter for the allocator's
-# performance or correctness.)
+# The PSO memory allocator is a simple free-list allocator. The allocator maintains two linked lists of blocks: one for
+# allocated blocks and one for free blocks. The list of free blocks is sorted in order of memory address, but the list
+# of allocated blocks is sorted in the order they were allocated. (The order of the allocated block list does not
+# matter for the allocator's performance or correctness.)

-# Each block begins with two pointers, prev and next, which point to other
-# blocks in the allocated or free list. (As with a typical doubly-linked list,
-# the first block has prev == nullptr and the last block has next == nullptr;
-# there is no sentinel node on either end.) After these two pointers is the
-# block's size in bytes, followed by 0x14 unused bytes. The block data
-# immediately follows this 0x20-byte header structure. All block sizes are
-# rounded up to a multiple of 0x20 bytes.
+# Each block begins with two pointers, prev and next, which point to other blocks in the allocated or free list. (As
+# with a typical doubly-linked list, the first block has prev == nullptr and the last block has next == nullptr; there
+# is no sentinel node on either end.) After these two pointers is the block's size in bytes, followed by 0x14 unused
+# bytes. The block data immediately follows this 0x20-byte header structure. All block sizes are rounded up to a
+# multiple of 0x20 bytes.

-# The malloc() routine simply searches for the first free block that has enough
-# space to satisfy the request, and either splits it into an allocated and a
-# free block (if the free block's size is at least 0x40 bytes more than the
-# requested size), or converts the free block entirely into an allocated block
-# and returns it. It is the second case that we take advantage of here.
+# The malloc() routine simply searches for the first free block that has enough space to satisfy the request, and
+# either splits it into an allocated and a free block (if the free block's size is at least 0x40 bytes more than the
+# requested size), or converts the free block entirely into an allocated block and returns it. It is the second case
+# that we take advantage of here.

-# When we send our A7 command containing this program, the first 0x58 bytes of
-# it fill the quest file data buffer. The next 0x0C bytes of it overwrite the
-# header fields of the following free block (noted below in the comments), and
-# the remainder of the data goes into that block's unused header fields and the
-# block's data (which is also otherwise unused, since it is a free block). We
-# overwrite the free block's prev and next pointers with specific nonzero values
-# and overwrite the size with the exact size that the caller will request, so we
-# trigger the malloc() case that does not split the free block. When that code
-# attempts to remove the free block from its doubly-linked list, it writes
-# block->next to block->prev->next and block->prev to block->next->prev. We set
-# block->prev to the address where we want execution to jump to (the start label
-# here), and block->next to the address of malloc()'s return address on the
-# stack. This overwrites the return address with the start label's address, and
-# overwrites the word after the start label with an address within the stack. We
-# can't avoid this second write since both pointers must be non-null and the
-# values and addresses written are dependent on each other, but we can just use
-# a branch opcode to ignore the value that gets written into our code.
+# When we send our A7 command containing this program, the first 0x58 bytes of it fill the quest file data buffer. The
+# next 0x0C bytes of it overwrite the header fields of the following free block (noted below in the comments), and the
+# remainder of the data goes into that block's unused header fields and the block's data (which is also otherwise
+# unused, since it is a free block). We overwrite the free block's prev and next pointers with specific nonzero values
+# and overwrite the size with the exact size that the caller will request, so we trigger the malloc() case that does
+# not split the free block. When that code attempts to remove the free block from its doubly-linked list, it writes
+# block->next to block->prev->next and block->prev to block->next->prev. We set block->prev to the address where we
+# want execution to jump to (the start label here), and block->next to the address of malloc()'s return address on the
+# stack. This overwrites the return address with the start label's address, and overwrites the word after the start
+# label with an address within the stack. We can't avoid this second write since both pointers must be non-null and the
+# values and addresses written are dependent on each other, but we can just use a branch opcode to ignore the value
+# that gets written into our code.

-# Once we have control, we clean up the allocator state (restoring the free
-# block as it was before we overwrote its header), then copy our implementation
-# of the B2 command to an otherwise-unused area of memory and apply a few more
+# Once we have control, we clean up the allocator state (restoring the free block as it was before we overwrote its
+# header), then copy our implementation of the B2 command to an otherwise-unused area of memory and apply a few more
 # patches. See the comments within the code below for more details.

+.versions 3SE0

-
-# This entry_ptr label isn't used since this code isn't sent with the B2
-# command; it just needs to be present for newserv to compile the code properly
+# This entry_ptr label isn't used since this code isn't sent with the B2 command; it just needs to be present for
+# newserv to compile the code properly
 entry_ptr:

 start:
  b       resume1
-  # This is the value overwritten by malloc() when it attempts to remove the
-  # free block from its linked list
+  # This is the value overwritten by malloc() when it attempts to remove the free block from its linked list
  .data   0xAAAAAAAA

 resume1:
  # We can use any of the caller-save registers (r0, r3-r12) here.

  # At entry time, some registers contain useful values:
-  # r5: Address of the allocator instance ("lists"). This structure includes the
-  #     allocated and free list head pointers, one of which we have to update.
-  # r12: Address of the malloc() function that was called. Conveniently, the
-  #      address that we should return to is very near this location in memory.
+  # r5: Address of the allocator instance ("lists"). This structure includes the allocated and free list head pointers,
+  #     one of which we have to update.
+  # r12: Address of the malloc() function that was called. Conveniently, the address that we should return to is very
+  #      near this location in memory.

-  # Compute the LR we should use to return from this function, but don't put it
-  # in the LR just yet - we're still going to need the LR for other shenanigans
+  # Compute the LR we should use to return from this function, but don't put it in the LR just yet - we're still going
+  # to need the LR for other shenanigans
  subi    r11, r12, 0xB0 # 8038C1B8 - B0 = 8038C108

-  # Restore the free block whose header we had destroyed with the A7 command
-  # buffer overflow
+  # Restore the free block whose header we had destroyed with the A7 command buffer overflow
  lis     r7, 0x815F
  ori     r7, r7, 0xF440
  li      r0, 0
@@ -121,8 +99,8 @@ resume1:

  b       resume2

-  # TODO: We can probably use this space for something useful. There must be
-  # exactly 20 opcodes (0x50 bytes) between resume1 and opaque2.
+  # TODO: We can probably use this space for something useful. There must be exactly 20 opcodes (0x50 bytes) between
+  # resume1 and opaque2.
  .zero
  .zero
  .zero
@@ -130,10 +108,9 @@ resume1:
  .zero

 opaque2:
-  # This block must be exactly here (the number of opcodes above is exactly how
-  # many will fit in the original buffer), and the 3 words here must have
-  # exactly these values. This is what causes malloc to overwrite the return
-  # address on the stack to call this code in the first place.
+  # This block must be exactly here (the number of opcodes above is exactly how many will fit in the original buffer),
+  # and the 3 words here must have exactly these values. This is what causes malloc to overwrite the return address on
+  # the stack to call this code in the first place.
  .data   0x815FF3E8 # free_head->prev
  .data   0x80592AC4 # free_head->next
  .data   0x00000160 # free_head->size
@@ -141,11 +118,10 @@ opaque2:
 resume2:
  bl      get_handle_B2_ptr

-  # This is the code we're going to use for the B2 command handler, which we
-  # will copy into an unused area of memory. It's convenient to put it here and
-  # use a bl opcode to get its address, so this code can be minimally position-
-  # dependent. Note that this part of the code does not run at the time the A7
-  # command is received; it will run later if the client receives a B2 command.
+  # This is the code we're going to use for the B2 command handler, which we will copy into an unused area of memory.
+  # It's convenient to put it here and use a bl opcode to get its address, so this code can be minimally position-
+  # dependent. Note that this part of the code does not run at the time the A7 command is received; it will run later
+  # if the client receives a B2 command.
 handle_B2:
  mflr    r0
  stwu    [r1 - 0x40], r1
@@ -173,10 +149,9 @@ handle_B2:
  ori     r5, r5, 0x0C00
  stw     [r1 + 0x08], r5

-  # If there's no code section, skip it. We also write the code section size to
-  # the return value field (which will be overwritten later if the size is not
-  # zero). This is because I'm lazy and this gives the behavior we want: the
-  # code return value is always zero if the code section size is zero.
+  # If there's no code section, skip it. We also write the code section size to the return value field (which will be
+  # overwritten later if the size is not zero). This is because I'm lazy and this gives the behavior we want: the code
+  # return value is always zero if the code section size is zero.
  li      r6, 4
  lwbrx   r5, [r4 + r6] # r5 = code_size
  stw     [r1 + 0x0C], r5 # response.code_return_value = code_size
@@ -218,8 +193,7 @@ handle_B2_skip_relocations:
  bctrl   # flush_code(code_base_addr, code_section_size)

  # Call the code section and put the return value (byteswapped) on the stack
-  # Note: flush_code only uses r3, r4, and r5, so we don't need to reload r7
-  # after the above call
+  # Note: flush_code only uses r3, r4, and r5, so we don't need to reload r7 after the above call
  lwz     r8, [r7 + 0x10]
  lwzx    r8, [r8 + r6]
  mtctr   r8
@@ -284,17 +258,16 @@ copy_handle_B2_word_again:
  rlwinm  r4, r7, 2, 0, 29
  bctrl   # flush_code(copied_B2_handler, copied_B2_handler_bytes)

-  # Replace the command handler table entry for command 0E (which appears to be
-  # a legacy command and has very broken behavior) with our B2 implementation
+  # Replace the command handler table entry for command 0E (which appears to be a legacy command and has very broken
+  # behavior) with our B2 implementation
  lis     r5, 0x8044
  ori     r5, r5, 0xF684
  li      r0, 0x00B2
  stw     [r5], r0
  stw     [r5 + 0x0C], r12

-  # Patch both places in the code where command 9E is sent to make them include
-  # a sentinel value that newserv can use to determine if the client has already
-  # run the code in this file
+  # Patch both places in the code where command 9E is sent to make them include a sentinel value that newserv can use
+  # to determine if the client has already run the code in this file
  bl      get_patch_9E_1_ptr
 patch_9E_1:
  lis     r4, 0x5F5C
@@ -333,12 +306,10 @@ get_patch_9E_2_ptr:
  mtctr   r9
  bctrl   # flush_code(patch_9E_2_dest, 0x20)

-  # Finally, patch the A7 handler function (which is on the current callstack)
-  # so that it does nothing else if this function returns null, which prevents
-  # further memory corruption. This changes a beq opcode (which never triggers
-  # under normal circumstances) to skip a couple more function calls, one of
-  # which would cause memory corruption if executed because the original buffer
-  # is smaller than 0x100 bytes.
+  # Finally, patch the A7 handler function (which is on the current callstack) so that it does nothing else if this
+  # function returns null, which prevents further memory corruption. This changes a beq opcode (which never triggers
+  # under normal circumstances) to skip a couple more function calls, one of which would cause memory corruption if
+  # executed because the original buffer is smaller than 0x100 bytes.
  lis     r3, 0x8010
  ori     r3, r3, 0xFD8A
  li      r4, 0x0064
@@ -348,8 +319,7 @@ get_patch_9E_2_ptr:
  mtctr   r9
  bctrl   # flush_code(patched_opcode_address & 0xFFFFFFF0, 0x20)

-  # Return null instead of a malloc'ed block, which triggers the conditional
-  # branch we just patched above
+  # Return null instead of a malloc'ed block, which triggers the conditional branch we just patched above
  li      r3, 0
  mtlr    r11
  blr
@@ -1,20 +1,23 @@
-# This code flushes the data cache and invalidates the instruction cache for a
-# block of newly-written code in memory.
+# This code flushes the data cache and invalidates the instruction cache for a block of newly-written code in memory.
 # Arguments:
 #   r3 = address of written code
 #   r4 = number of bytes
 # Returns: nothing
 # Overwrites: r3, r4, r5
+
+.versions PPC
+
+flush_cached_code:
  lis     r5, 0xFFFF
  ori     r5, r5, 0xFFF1
  and     r5, r5, r3
  subf    r3, r5, r3
  add     r4, r4, r3
-flush_cached_code_writes__again:
+flush_cached_code_again:
  dcbst   r0, r5
  sync
  icbi    r0, r5
  addic   r5, r5, 8
  subic.  r4, r4, 8
-  bge     flush_cached_code_writes__again
+  bge     flush_cached_code_again
  isync
@@ -1,44 +0,0 @@
-# (uint16_t entity_id @ eax) -> TObjectV00b421c0* @ eax
-# Preserves all registers except eax
-get_enemy_entity:
-  push      esi
-  push      edi
-  push      edx
-  push      ecx
-  xor       edx, edx
-  xchg      edx, eax
-  cmp       edx, 0x1000
-  jl        done
-  cmp       edx, 0x4000
-  jge       done
-
-  mov       esi, [0x00AABCE8]  # bs_low = next_player_entity_index
-  mov       edi, [0x00AABCE4]
-  lea       edi, [edi + esi - 1]  # bs_high = next_player_entity_index + next_enemy_entity_index - 1
-bs_again:
-  cmp       esi, edi
-  jge       bs_done
-  lea       ecx, [esi + edi]
-  shr       ecx, 1
-  mov       eax, [ecx * 4 + 0x00AAB2A0]  # all_entities[ecx]
-  cmp       [eax + 0x1C], dx
-  jge       bs_not_less
-  lea       esi, [ecx + 1]
-  jmp       bs_again
-bs_not_less:
-  mov       edi, ecx
-  jmp       bs_again
-bs_done:
-
-  mov       eax, [esi * 4 + 0x00AAB2A0]  # all_entities[bs_low]
-  test      eax, eax
-  je        done
-  xor       ecx, ecx
-  cmp       [eax + 0x1C], dx
-  cmovne    eax, ecx
-
-done:
-  pop       ecx
-  pop       edx
-  pop       edi
-  pop       esi
@@ -1,5 +1,8 @@
 # (uint16_t entity_id @ eax) -> TObjectV00b441c0* @ eax
 # Preserves all registers except eax
+
+.versions 59NJ 59NL
+
 get_enemy_entity:
  push      esi
  push      edi
@@ -12,15 +15,15 @@ get_enemy_entity:
  cmp       edx, 0x4000
  jge       done

-  mov       esi, [0x00AAE168]  # bs_low = next_player_entity_index
-  mov       edi, [0x00AAE164]
+  mov       esi, [<VERS 0x00AABCE8 0x00AAE168>]  # bs_low = next_player_entity_index
+  mov       edi, [<VERS 0x00AABCE4 0x00AAE164>]
  lea       edi, [edi + esi - 1]  # bs_high = next_player_entity_index + next_enemy_entity_index - 1
 bs_again:
  cmp       esi, edi
  jge       bs_done
  lea       ecx, [esi + edi]
  shr       ecx, 1
-  mov       eax, [ecx * 4 + 0x00AAD720]  # all_entities[ecx]
+  mov       eax, [ecx * 4 + <VERS 0x00AAB2A0 0x00AAD720>]  # all_entities[ecx]
  cmp       [eax + 0x1C], dx
  jge       bs_not_less
  lea       esi, [ecx + 1]
@@ -30,7 +33,7 @@ bs_not_less:
  jmp       bs_again
 bs_done:

-  mov       eax, [esi * 4 + 0x00AAD720]  # all_entities[bs_low]
+  mov       eax, [esi * 4 + <VERS 0x00AAB2A0 0x00AAD720>]  # all_entities[bs_low]
  test      eax, eax
  je        done
  xor       ecx, ecx
@@ -25,6 +25,8 @@
 #   XBOX_PAGE_NOCACHE           = 0x00000200
 #   XBOX_PAGE_WRITECOMBINE      = 0x00000400

+.versions X86
+
 start:
  push   ecx
  push   edx
@@ -6,6 +6,9 @@
 #   r6 = source data size
 # Returns: number of bytes written to output buffer, or -1 on error
 # Overwrites: r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
+
+.versions PPC
+
 prs_decompress__start:
  # r3 = dest ptr (used as write ptr)
  subi    r3, r3, 1
@@ -0,0 +1,49 @@
+# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
+
+entry_ptr:
+reloc0:
+  .offsetof start
+
+
+
+.versions SH4
+
+start:
+  mova   r0, [address]
+  mov.l  r0, [r0]
+  rets
+  mov.l  r0, [r0]
+
+  .align 4
+address:
+  .data  0
+
+
+
+.versions PPC
+
+start:
+  mflr   r12
+  bl     read
+address:
+  .zero
+read:
+  mflr   r3
+  lwz    r3, [r3]
+  lwz    r3, [r3]
+  mtlr   r12
+  blr
+
+
+
+.versions X86
+
+start:
+  call   resume
+address:
+  .data  0
+resume:
+  pop    eax
+  mov    eax, [eax]
+  mov    eax, [eax]
+  ret
@@ -1,13 +0,0 @@
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  mova   r0, [address]
-  mov.l  r0, [r0]
-  rets
-  mov.l  r0, [r0]
-
-  .align 4
-address:
-  .data  0
@@ -1,18 +0,0 @@
-# This function is required for loading DOLs. If it's not present, newserv can't
-# serve DOL files to GameCube clients.
-
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  mflr   r12
-  bl     read
-address:
-  .zero
-read:
-  mflr   r3
-  lwz    r3, [r3]
-  lwz    r3, [r3]
-  mtlr   r12
-  blr
@@ -1,13 +0,0 @@
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  call   resume
-address:
-  .data  0
-resume:
-  pop    eax
-  mov    eax, [eax]
-  mov    eax, [eax]
-  ret
@@ -1,3 +1,5 @@
+.versions X86
+
 entry_ptr:
 reloc0:
  .offsetof start
@@ -1,5 +1,6 @@
-# This function is required for loading DOLs. If it's not present, newserv can't
-# serve DOL files to GameCube clients.
+# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
+
+.versions PPC

 entry_ptr:
 reloc0:
@@ -16,12 +17,11 @@ dol_base_ptr:
  .zero
 get_current_addr:
  mflr    r31
-  # TODO: It'd be nice to be able to use an expression for the immediate value
-  # here - something like (dol_base_ptr - start), for example
+  # TODO: It'd be nice to be able to use an expression for the immediate value here - something like (dol_base_ptr -
+  # start), for example
  subi    r31, r31, 0x10  # r31 = base of data to copy to low memory (start label)

-  # If this code is not running from low memory (80001800-80003000), then copy
-  # it there and branch to it
+  # If this code is not running from low memory (80001800-80003000), then copy it there and branch to it
  lis     r3, 0x8000
  ori     r3, r3, 0x3000
  cmp     r31, r3
@@ -53,9 +53,8 @@ copy_code_to_low_memory__again:
 run_dol:
  lwz     r30, [r31 + 0x10]  # r30 = data base ptr

-  # Decompress the file first. If the compressed size is zero, then skip this
-  # step (the file is not compressed). The header consists of two fields:
-  # compressed size followed by decompressed size.
+  # Decompress the file first. If the compressed size is zero, then skip this step (the file is not compressed). The
+  # header consists of two fields: compressed size followed by decompressed size.
  lwz     r6, [r30]
  cmplwi  r6, 0
  beq     run_dol__not_compressed
@@ -70,9 +69,8 @@ run_dol__not_compressed:
  addi    r30, r30, 8

 run_dol__decompressed:
-  # DOL files are very simple: they have up to 7 text sections, up to 11 data
-  # sections, and a BSS section and an entrypoint. No imports or other fancy
-  # things to do - we just have to move a bunch of bytes around.
+  # DOL files are very simple: they have up to 7 text sections, up to 11 data sections, and a BSS section and an
+  # entrypoint. No imports or other fancy things to do - we just have to move a bunch of bytes around.
  mr      r29, r30  # r29 = DOL header iterator
  addi    r28, r29, 0x48  # r28 = DOL header iterator end value

@@ -87,16 +85,15 @@ run_dol__move_section:
  subi    r4, r4, 1
  add     r5, r4, r5  # r5 = source end pointer
 run_dol__move_section_data__again:
-  # TODO: We probably should implement memmove-like semantics here, in case the
-  # DOL loads at an unusually late address. This is probably very rare.
+  # TODO: We probably should implement memmove-like semantics here, in case the DOL loads at an unusually late address.
+  # This is probably very rare.
  lbzu    r0, [r4 + 1]
  stbu    [r3 + 1], r0
  cmp     r4, r5
  bne     run_dol__move_section_data__again

-  # Flush the data cache and invalidate the instruction cache after copying the
-  # section data. Technically we don't have to do this for data sections, but
-  # I'm lazy and it doesn't take too long.
+  # Flush the data cache and invalidate the instruction cache after copying the section data. Technically we don't have
+  # to do this for data sections, but I'm lazy and it doesn't take too long.
  lwz     r3, [r29 + 0x48]  # r3 = dest address of section data
  lwz     r4, [r29 + 0x90]  # r4 = size of section data
  bl      flush_cached_code_writes
@@ -1,20 +1,21 @@
-# This function returns the game version, with values more specific than can be
-# detected by the sub_version field in the various login commands (e.g. 93/9D).
+# This function returns the game version, with values more specific than can be detected by the sub_version field in
+# the various login commands (e.g. 9D/9E). We call this value specific_version in the codebase.

-# The returned value has the format SSPPRRVV, where:
-#   S = version (31 = PSOv1, 32 = PSOv2)
-#   G = game (4F = PSO)
-#   R = region (45 = E, 4A = J, 50 = P)
-#   V = minor version (31 = NTE, 32 = 11/2000, 33 = 12/2000, 24 = 01/2001,
-#       35 = 08/2001, 46 = not a prototype)
-# This results in a 4-character ASCII-printable version code which encodes all
-# of the above information. This value is called specific_version in the places
-# where it's used by the server.
+# The returned value has the format SSGGRRVV, where:
+#   S = 31 = PSOv1, 32 = PSOv2, 33 = PSOv3, 34 = Xbox, 35 = BB
+#   G = game (4F (O) = non-Ep3, 53 (S) = Ep3)
+#   R = region (45 (E), 4A (J), or 50 (P))
+#   V = minor version (meaning varies by major version)
+# This results in a 4-character ASCII-printable version code which encodes all of the above information.

 entry_ptr:
 reloc0:
  .offsetof start

+
+
+.versions SH4
+
 start:
  mova    r0, [data_start]
  mov     r1, r0
@@ -59,3 +60,46 @@ data_start:
  .data   0x8C2E7CE0  # v2 EU
  .data   0x324F5046  # 2OPF
  .data   0x00000000  # end sentinel
+
+
+
+.versions PPC
+
+start:
+  lis    r3, 0x8000
+  lwz    r4, [r3]
+
+  # For Trial Editions, set the V field to 54; for other versions, set it to 0x30 | disc_version
+  rlwinm r0, r4, 8, 24, 31
+  cmplwi r0, 0x47  # Check if high byte of game ID is 'G'
+  beq    not_trial
+  cmplwi r0, 0x44  # Check if high byte of game ID is 'D'
+  beq    is_nte
+  li     r3, 0
+  blr
+is_nte:
+  li     r3, 0x0054
+  b      end_trial_check
+not_trial:
+  lbz    r3, [r3 + 7]
+  ori    r3, r3, 0x0030
+end_trial_check:
+  oris   r3, r3, 0x3300  # Set high byte ('3')
+  rlwimi r3, r4, 8, 8, 23  # Set middle two bytes to last two bytes of game ID
+  blr
+
+
+
+.versions X86
+
+start:
+  .include GetVersionInfoXB
+
+  test     eax, eax
+  jz       version_not_found
+  mov      eax, [eax]
+  ret
+
+version_not_found:
+  mov      eax, 0x344F0000
+  ret
@@ -1,39 +0,0 @@
-# This function returns the game version, with values more specific than can be
-# detected by the sub_version field in the various login commands (e.g. 9D/9E).
-
-# The returned value has the format SSGGRRVV, where:
-#   S = 33 (which represents PSO GC)
-#   G = game (4F (O) = Ep1&2, 53 (S) = Ep3)
-#   R = region (45 (E), 4A (J), or 50 (P))
-#   V = minor version | 30 (30 = 1.0, 31 = 1.1, 32 = 1.2, etc.), or 54 for NTE
-# This results in a 4-character ASCII-printable version code which encodes all
-# of the above information. This value is called specific_version in the places
-# where it's used by the server.
-
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  lis    r3, 0x8000
-  lwz    r4, [r3]
-
-  # For Trial Editions, set the V field to 54; for other versions, set it to
-  # 0x30 | disc_version
-  rlwinm r0, r4, 8, 24, 31
-  cmplwi r0, 0x47  # Check if high byte of game ID is 'G'
-  beq    not_trial
-  cmplwi r0, 0x44  # Check if high byte of game ID is 'D'
-  beq    is_nte
-  li     r3, 0
-  blr
-is_nte:
-  li     r3, 0x0054
-  b      end_trial_check
-not_trial:
-  lbz    r3, [r3 + 7]
-  ori    r3, r3, 0x0030
-end_trial_check:
-  oris   r3, r3, 0x3300  # Set high byte ('3')
-  rlwimi r3, r4, 8, 8, 23  # Set middle two bytes to last two bytes of game ID
-  blr
@@ -1,26 +0,0 @@
-# This function returns the game version, with values more specific than can be
-# detected by the sub_version field in the various login commands (e.g. 9D/9E).
-
-# The returned value has the format SSSSRRVV, where:
-#   S = 344F (which represents PSO Xbox)
-#   R = region (45 (E), 4A (J), or 50 (P))
-#   V = version (42 (B) for beta, 44 (D) for disc, 55 (U) for title update)
-# This results in a 4-character ASCII-printable version code which encodes all
-# of the above information. This value is called specific_version in the places
-# where it's used by the server.
-
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  .include GetVersionInfoXB
-
-  test     eax, eax
-  jz       version_not_found
-  mov      eax, [eax]
-  ret
-
-version_not_found:
-  mov      eax, 0x344F0000
-  ret
@@ -1,42 +0,0 @@
-# This file defines the following function:
-#   write_address_of_code(
-#     const void* patch_code,
-#     size_t patch_code_size,
-#     void** ptr_addr);
-# This function allocates memory for patch_code, copies patch_code to that
-# memory, then writes the address of the allocated code at the specified
-# pointer. The allocated memory is never freed.
-# This function pops its arguments off the stack before returning.
-
-write_call_to_code:
-  # [esp + 0x04] = code ptr
-  # [esp + 0x08] = code size
-  # [esp + 0x0C] = ptr addr
-
-  # Allocate memory for the copied code
-  mov       ecx, [0x00AAB404]
-  push      dword [esp + 0x08]
-  mov       eax, 0x007A8A38
-  call      eax  # malloc7
-  test      eax, eax
-  je        done
-
-  # Copy the code to the newly-allocated memory
-  # eax = dest pointer (from malloc7 call above)
-  mov       edx, [esp + 0x04]  # edx = source pointer
-  mov       ecx, [esp + 0x08]  # ecx = source size
-  push      ebx
-memcpy_again:
-  dec       ecx
-  mov       bl, [edx + ecx]  # Copy one byte from source to dest
-  mov       [eax + ecx], bl
-  test      ecx, ecx
-  jne       memcpy_again
-  pop       ebx
-
-  # Write the address
-  mov       ecx, [esp + 0x0C]
-  mov       [ecx], eax
-
-done:
-  ret       0x0C
@@ -3,10 +3,11 @@
 #     const void* patch_code,
 #     size_t patch_code_size,
 #     void** ptr_addr);
-# This function allocates memory for patch_code, copies patch_code to that
-# memory, then writes the address of the allocated code at the specified
-# pointer. The allocated memory is never freed.
-# This function pops its arguments off the stack before returning.
+# This function allocates memory for patch_code, copies patch_code to that memory, then writes the address of the
+# allocated code at the specified pointer. The allocated memory is never freed. This function pops its arguments off
+# the stack before returning.
+
+.versions 59NJ 59NL

 write_call_to_code:
  # [esp + 0x04] = code ptr
@@ -14,9 +15,9 @@ write_call_to_code:
  # [esp + 0x0C] = ptr addr

  # Allocate memory for the copied code
-  mov       ecx, [0x00AA8F84]
+  mov       ecx, [<VERS 0x00AA8F84 0x00AAB404>]
  push      dword [esp + 0x08]
-  mov       eax, 0x007A984C
+  mov       eax, <VERS 0x007A984C 0x007A8A38>
  call      eax  # malloc7
  test      eax, eax
  je        done
@@ -1,76 +0,0 @@
-# This file defines the following function:
-#   write_call_to_code(
-#     const void* patch_code,
-#     size_t patch_code_size,
-#     void* call_opcode_address,
-#     ssize_t call_opcode_bytes);
-# This function allocates memory for patch_code, copies patch_code to that
-# memory, then writes a call or jmp opcode to call_opcode_address that calls
-# the code in the allocated memory region. The allocated memory is never freed.
-# call_opcode_bytes specifies how many bytes at the callsite should be
-# overwritten. This value must be at least 5; the first 5 bytes are overwritten
-# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
-# If call_opcode_bytes is positive, a call opcode is written; if it's negative,
-# a jmp opcode is written.
-# This function pops its arguments off the stack before returning.
-
-write_call_to_code:
-  # [esp + 0x04] = code ptr
-  # [esp + 0x08] = code size
-  # [esp + 0x0C] = jump callsite
-  # [esp + 0x10] = callsite size (if zero, write the address instead of a call)
-
-  # Allocate memory for the copied code
-  mov       ecx, [0x00AA8F84]
-  push      dword [esp + 0x08]
-  mov       eax, 0x007A984C
-  call      eax  # malloc7
-  test      eax, eax
-  je        done
-
-  # Copy the code to the newly-allocated memory
-  # eax = dest pointer (from malloc7 call above)
-  mov       edx, [esp + 0x04]  # edx = source pointer
-  mov       ecx, [esp + 0x08]  # ecx = source size
-  push      ebx
-memcpy_again:
-  dec       ecx
-  mov       bl, [edx + ecx]  # Copy one byte from source to dest
-  mov       [eax + ecx], bl
-  test      ecx, ecx
-  jne       memcpy_again
-  pop       ebx
-
-  mov       edx, [esp + 0x0C]  # edx = jump callsite
-
-  # If the callsite size is zero, just write the address directly
-  cmp       dword [esp + 0x10], 0
-  jne       write_call_or_jmp
-  mov       [edx], eax
-  jmp       done
-
-  # Write the call or jmp opcode
-write_call_or_jmp:
-  lea       ecx, [eax - 5]
-  sub       ecx, edx  # ecx = (dest code addr) - (jump callsite) - 5
-  cmp       dword [esp + 0x10], 0
-  setl      al
-  or        al, 0xE8
-  mov       [edx], al  # Write E8 (call), or E9 (jmp) if size was negative
-  mov       [edx + 1], ecx  # Write delta
-
-  # Write as many nops after the call opcode as necessary
-  mov       ecx, 5
-  mov       eax, [esp + 0x10]
-  cmp       eax, 0
-  jge       write_nop_again
-  neg       eax
-write_nop_again:
-  cmp       ecx, eax
-  jge       done
-  mov       byte [edx + ecx], 0x90
-  inc       ecx
-  jmp       write_nop_again
-
-done:
-  ret       0x10
@@ -1,17 +1,16 @@
+.versions 59NJ 59NL
+
 # This file defines the following function:
 #   write_call_to_code(
 #     const void* patch_code,
 #     size_t patch_code_size,
 #     void* call_opcode_address,
 #     ssize_t call_opcode_bytes);
-# This function allocates memory for patch_code, copies patch_code to that
-# memory, then writes a call or jmp opcode to call_opcode_address that calls
-# the code in the allocated memory region. The allocated memory is never freed.
-# call_opcode_bytes specifies how many bytes at the callsite should be
-# overwritten. This value must be at least 5; the first 5 bytes are overwritten
-# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
-# If call_opcode_bytes is positive, a call opcode is written; if it's negative,
-# a jmp opcode is written.
+# This function allocates memory for patch_code, copies patch_code to that memory, then writes a call or jmp opcode to
+# call_opcode_address that calls the code in the allocated memory region. The allocated memory is never freed.
+# call_opcode_bytes specifies how many bytes at the callsite should be overwritten. This value must be at least 5; the
+# first 5 bytes are overwritten with the call/jmp opcode itself; the rest are overwritten with nop opcodes. If
+# call_opcode_bytes is positive, a call opcode is written; if it's negative, a jmp opcode is written.
 # This function pops its arguments off the stack before returning.

 write_call_to_code:
@@ -21,9 +20,9 @@ write_call_to_code:
  # [esp + 0x10] = callsite size (if zero, write the address instead of a call)

  # Allocate memory for the copied code
-  mov       ecx, [0x00AAB404]
+  mov       ecx, [<VERS 0x00AA8F84 0x00AAB404>]
  push      dword [esp + 0x08]
-  mov       eax, 0x007A8A38
+  mov       eax, <VERS 0x007A984C 0x007A8A38>
  call      eax  # malloc7
  test      eax, eax
  je        done
@@ -1,83 +0,0 @@
-# This file defines the following function:
-#   void [/std] write_call_to_code(
-#     const void* patch_code @ [esp + 0x04],
-#     size_t patch_code_size @ [esp + 0x08],
-#     size_t call_count @ [esp + 0x0C],
-#     void* call_opcode_address @ [esp + 0x10],
-#     ssize_t call_opcode_bytes @ [esp + 0x14],
-#     ...);
-# This function allocates memory for patch_code, copies patch_code to that
-# memory, then writes a call or jmp opcode to call_opcode_address that calls
-# the code in the allocated memory region. The allocated memory is never freed.
-# call_opcode_bytes specifies how many bytes at the callsite should be
-# overwritten. This value must be at least 5; the first 5 bytes are overwritten
-# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
-# This function pops its arguments off the stack before returning (including
-# all the varargs).
-
-write_call_to_code:
-  # [esp + 0x04] = code ptr
-  # [esp + 0x08] = code size
-  # [esp + 0x0C] = callsite count
-  # [esp + 0x10] = callsite address
-  # [esp + 0x14] = callsite size
-  # ... (further callsite address/size pairs)
-
-  # Allocate memory for the copied code
-  mov       ecx, [0x00AA8F84]
-  push      dword [esp + 0x08]
-  mov       eax, 0x007A984C
-  call      eax  # malloc7
-  test      eax, eax
-  je        done
-
-  # Copy the code to the newly-allocated memory
-  # eax = dest pointer (from malloc7 call above)
-  mov       edx, [esp + 0x04]  # edx = source pointer
-  mov       ecx, [esp + 0x08]  # ecx = source size
-  push      ebx
-memcpy_again:
-  dec       ecx
-  mov       bl, [edx + ecx]  # Copy one byte from source to dest
-  mov       [eax + ecx], bl
-  test      ecx, ecx
-  jne       memcpy_again
-  pop       ebx
-
-  # Write the call opcodes
-  xchg      ebx, [esp + 0x0C]  # Save ebx; get callsite count
-  mov       [esp - 0x08], esi
-  mov       [esp - 0x0C], eax
-  mov       esi, 0x10  # Stack offset of first callsite pair
-
-next_callsite:
-  mov       edx, [esp + esi]  # edx = jump callsite
-  lea       ecx, [eax - 5]
-  sub       ecx, edx  # ecx = (dest code addr) - (jump callsite) - 5
-  mov       byte [edx], 0xE8
-  mov       [edx + 1], ecx  # Write E8 (call) followed by delta
-
-  # Write as many nops after the call opcode as necessary
-  mov       ecx, 5
-  mov       eax, [esp + esi + 4]
-write_nop_again:
-  cmp       ecx, eax
-  jge       this_callsite_done
-  mov       byte [edx + ecx], 0x90
-  inc       ecx
-  jmp       write_nop_again
-
-this_callsite_done:
-  mov       eax, [esp - 0x0C]
-  add       esi, 8
-  dec       ebx
-  jnz       next_callsite
-
-  mov       ecx, esi
-  mov       ebx, [esp + 0x0C]
-  mov       esi, [esp - 0x08]
-
-done:
-  mov       eax, [esp]
-  add       esp, ecx
-  jmp       eax
@@ -1,83 +0,0 @@
-# This file defines the following function:
-#   void [/std] write_call_to_code(
-#     const void* patch_code @ [esp + 0x04],
-#     size_t patch_code_size @ [esp + 0x08],
-#     size_t call_count @ [esp + 0x0C],
-#     void* call_opcode_address @ [esp + 0x10],
-#     ssize_t call_opcode_bytes @ [esp + 0x14],
-#     ...);
-# This function allocates memory for patch_code, copies patch_code to that
-# memory, then writes a call or jmp opcode to call_opcode_address that calls
-# the code in the allocated memory region. The allocated memory is never freed.
-# call_opcode_bytes specifies how many bytes at the callsite should be
-# overwritten. This value must be at least 5; the first 5 bytes are overwritten
-# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
-# This function pops its arguments off the stack before returning (including
-# all the varargs).
-
-write_call_to_code:
-  # [esp + 0x04] = code ptr
-  # [esp + 0x08] = code size
-  # [esp + 0x0C] = callsite count
-  # [esp + 0x10] = callsite address
-  # [esp + 0x14] = callsite size
-  # ... (further callsite address/size pairs)
-
-  # Allocate memory for the copied code
-  mov       ecx, [0x00AAB404]
-  push      dword [esp + 0x08]
-  mov       eax, 0x007A8A38
-  call      eax  # malloc7
-  test      eax, eax
-  je        done
-
-  # Copy the code to the newly-allocated memory
-  # eax = dest pointer (from malloc7 call above)
-  mov       edx, [esp + 0x04]  # edx = source pointer
-  mov       ecx, [esp + 0x08]  # ecx = source size
-  push      ebx
-memcpy_again:
-  dec       ecx
-  mov       bl, [edx + ecx]  # Copy one byte from source to dest
-  mov       [eax + ecx], bl
-  test      ecx, ecx
-  jne       memcpy_again
-  pop       ebx
-
-  # Write the call opcodes
-  xchg      ebx, [esp + 0x0C]  # Save ebx; get callsite count
-  mov       [esp - 0x08], esi
-  mov       [esp - 0x0C], eax
-  mov       esi, 0x10  # Stack offset of first callsite pair
-
-next_callsite:
-  mov       edx, [esp + esi]  # edx = jump callsite
-  lea       ecx, [eax - 5]
-  sub       ecx, edx  # ecx = (dest code addr) - (jump callsite) - 5
-  mov       byte [edx], 0xE8
-  mov       [edx + 1], ecx  # Write E8 (call) followed by delta
-
-  # Write as many nops after the call opcode as necessary
-  mov       ecx, 5
-  mov       eax, [esp + esi + 4]
-write_nop_again:
-  cmp       ecx, eax
-  jge       this_callsite_done
-  mov       byte [edx + ecx], 0x90
-  inc       ecx
-  jmp       write_nop_again
-
-this_callsite_done:
-  mov       eax, [esp - 0x0C]
-  add       esi, 8
-  dec       ebx
-  jnz       next_callsite
-
-  mov       ecx, esi
-  mov       ebx, [esp + 0x0C]
-  mov       esi, [esp - 0x08]
-
-done:
-  mov       eax, [esp]
-  add       esp, ecx
-  jmp       eax
@@ -1,4 +1,91 @@
-# This function has the same signature as WriteCallToCodeMulti-59NL.
+# This file defines the following function:
+#   void [/std] write_call_to_code(
+#     const void* patch_code @ [esp + 0x04],
+#     size_t patch_code_size @ [esp + 0x08],
+#     size_t call_count @ [esp + 0x0C],
+#     void* call_opcode_address @ [esp + 0x10],
+#     ssize_t call_opcode_bytes @ [esp + 0x14],
+#     ...);
+# This function allocates memory for patch_code, copies patch_code to that memory, then writes a call or jmp opcode to
+# call_opcode_address that calls the code in the allocated memory region. The allocated memory is never freed.
+# call_opcode_bytes specifies how many bytes at the callsite should be overwritten. This value must be at least 5; the
+# first 5 bytes are overwritten with the call/jmp opcode itself; the rest are overwritten with nop opcodes. This
+# function pops its arguments off the stack before returning (including all the varargs).
+
+
+
+.versions 59NJ 59NL
+
+write_call_to_code:
+  # [esp + 0x04] = code ptr
+  # [esp + 0x08] = code size
+  # [esp + 0x0C] = callsite count
+  # [esp + 0x10] = callsite address
+  # [esp + 0x14] = callsite size
+  # ... (further callsite address/size pairs)
+
+  # Allocate memory for the copied code
+  mov       ecx, [<VERS 0x00AA8F84 0x00AAB404>]
+  push      dword [esp + 0x08]
+  mov       eax, <VERS 0x007A984C 0x007A8A38>
+  call      eax  # malloc7
+  test      eax, eax
+  je        done
+
+  # Copy the code to the newly-allocated memory
+  # eax = dest pointer (from malloc7 call above)
+  mov       edx, [esp + 0x04]  # edx = source pointer
+  mov       ecx, [esp + 0x08]  # ecx = source size
+  push      ebx
+memcpy_again:
+  dec       ecx
+  mov       bl, [edx + ecx]  # Copy one byte from source to dest
+  mov       [eax + ecx], bl
+  test      ecx, ecx
+  jne       memcpy_again
+  pop       ebx
+
+  # Write the call opcodes
+  xchg      ebx, [esp + 0x0C]  # Save ebx; get callsite count
+  mov       [esp - 0x08], esi
+  mov       [esp - 0x0C], eax
+  mov       esi, 0x10  # Stack offset of first callsite pair
+
+next_callsite:
+  mov       edx, [esp + esi]  # edx = jump callsite
+  lea       ecx, [eax - 5]
+  sub       ecx, edx  # ecx = (dest code addr) - (jump callsite) - 5
+  mov       byte [edx], 0xE8
+  mov       [edx + 1], ecx  # Write E8 (call) followed by delta
+
+  # Write as many nops after the call opcode as necessary
+  mov       ecx, 5
+  mov       eax, [esp + esi + 4]
+write_nop_again:
+  cmp       ecx, eax
+  jge       this_callsite_done
+  mov       byte [edx + ecx], 0x90
+  inc       ecx
+  jmp       write_nop_again
+
+this_callsite_done:
+  mov       eax, [esp - 0x0C]
+  add       esi, 8
+  dec       ebx
+  jnz       next_callsite
+
+  mov       ecx, esi
+  mov       ebx, [esp + 0x0C]
+  mov       esi, [esp - 0x08]
+
+done:
+  mov       eax, [esp]
+  add       esp, ecx
+  jmp       eax
+
+
+
+.versions 4OJB 4OJD 4OJU 4OED 4OEU 4OPD 4OPU

 write_call_to_code:
  .include  GetVersionInfoXB
@@ -0,0 +1,151 @@
+.versions SH4
+
+  mova    r0, [first_patch_header]
+  mov     r7, r0       # r7 = read ptr
+  xor     r3, r3
+  dec     r3
+  shl     r3, 2        # r3 = 0xFFFFFFFC (mask for aligning r7)
+apply_patch:
+  add     r7, 3
+  and     r7, r3       # r7 = (r7 + 3) & (~3) (align to 4-byte boundary)
+  mov.l   r4, [r7]+    # r4 = dest addr
+  mov.l   r5, [r7]+
+  add     r5, r4       # r5 = dest end ptr (dest addr + size)
+  cmpeq   r4, r5       # if (size == 0) return
+  bt      done
+
+again:
+  cmpeq   r4, r5
+  bt      apply_patch  # if (r4 == r5) done with the patch; go to next header
+  mov.b   r0, [r7]+
+  mov.b   [r4], r0     # *(r4) = *(r7++);
+  bs      again        # r4++; continue
+  add     r4, 1
+
+done:
+  rets
+  nop
+
+  .align 4
+first_patch_header:
+
+
+
+.versions PPC
+
+  mflr    r8
+  b       get_patch_data_ptr
+get_patch_data_ptr_ret:
+  mflr    r7  # r7 = patch header
+apply_patch:
+  addi    r4, r7, 8  # r4 = start of patch data
+  lwz     r3, [r4 - 8]  # r3 = patch dest address
+  lwz     r5, [r4 - 4]  # r5 = patch data size
+  or      r0, r3, r5
+  cmplwi  r0, 0
+  mtlr    r8
+  beqlr
+  add     r7, r4, r5  # r7 = next patch header
+  .include CopyCode
+  b       apply_patch
+
+get_patch_data_ptr:
+  bl      get_patch_data_ptr_ret
+
+first_patch_header:
+
+
+
+.versions 4OJB 4OJD 4OJU 4OED 4OEU 4OPD 4OPU
+
+start:
+  .include GetVersionInfoXB
+  test     eax, eax
+  jnz      can_patch
+  ret
+
+can_patch:
+  push     esi
+  push     edi
+  push     ebx
+  mov      edi, eax              # edi = ptr to version info struct
+  jmp      get_patch_data_ptr
+get_patch_data_ptr_ret:
+  pop      ebx                   # ebx = patch header
+
+apply_next_patch:
+  cmp      dword [ebx + 4], 0
+  jne      copy_code_and_apply_again
+  pop      ebx
+  pop      edi
+  pop      esi
+  mov      eax, 1
+  ret
+
+copy_code_and_apply_again:
+  push     dword [ebx]           # dest addr
+  mov      ecx, [edi + 0x0C]
+  call     [ecx]                 # MmQueryAddressProtect
+  mov      esi, eax              # esi = prev protection flags
+
+  push     4                     # new protection flags
+  push     dword [ebx + 4]       # size
+  push     dword [ebx]           # base address
+  mov      ecx, [edi + 0x08]
+  call     [ecx]                 # MmSetAddressProtect
+
+  xor      ecx, ecx              # ecx = offset
+  mov      edx, [ebx]            # edx = dest addr
+copy_next_byte:
+  mov      al, [ebx + ecx + 8]   # copy one byte to dest
+  mov      [edx + ecx], al
+  inc      ecx                   # offset++
+  cmp      [ebx + 4], ecx        # check if all bytes have been copied
+  jne      copy_next_byte
+
+  push     esi                   # new protection flags
+  push     dword [ebx + 4]       # size
+  push     dword [ebx]           # base address
+  lea      ebx, [ebx + ecx + 8]  # advance to next block
+  mov      ecx, [edi + 0x08]
+  call     [ecx]                 # MmSetAddressProtect
+  jmp      apply_next_patch
+
+get_patch_data_ptr:
+  call     get_patch_data_ptr_ret
+
+first_patch_header:
+
+
+
+.versions 2OJW 2OJZ 59NJ 59NL
+
+start:
+  push    ebx
+  jmp     get_patch_data_ptr
+get_patch_data_ptr_ret:
+  pop     ebx                   # ebx = patch header
+
+apply_next_patch:
+  cmp     dword [ebx + 4], 0
+  jne     copy_code_and_apply_again
+  pop     ebx
+  mov     eax, 1
+  ret
+
+copy_code_and_apply_again:
+  xor     ecx, ecx              # ecx = offset
+  mov     edx, [ebx]            # edx = dest addr
+copy_next_byte:
+  mov     al, [ebx + ecx + 8]   # copy one byte to dest
+  mov     [edx + ecx], al
+  inc     ecx                   # offset++
+  cmp     [ebx + 4], ecx        # check if all bytes have been copied
+  jne     copy_next_byte
+
+  lea     ebx, [ebx + ecx + 8]  # advance to next block
+  jmp     apply_next_patch
+
+get_patch_data_ptr:
+  call    get_patch_data_ptr_ret
+first_patch_header:
@@ -1,29 +0,0 @@
-start:
-  push    ebx
-  jmp     get_patch_data_ptr
-get_patch_data_ptr_ret:
-  pop     ebx                   # ebx = patch header
-
-apply_next_patch:
-  cmp     dword [ebx + 4], 0
-  jne     copy_code_and_apply_again
-  pop     ebx
-  mov     eax, 1
-  ret
-
-copy_code_and_apply_again:
-  xor     ecx, ecx              # ecx = offset
-  mov     edx, [ebx]            # edx = dest addr
-copy_next_byte:
-  mov     al, [ebx + ecx + 8]   # copy one byte to dest
-  mov     [edx + ecx], al
-  inc     ecx                   # offset++
-  cmp     [ebx + 4], ecx        # check if all bytes have been copied
-  jne     copy_next_byte
-
-  lea     ebx, [ebx + ecx + 8]  # advance to next block
-  jmp     apply_next_patch
-
-get_patch_data_ptr:
-  call    get_patch_data_ptr_ret
-first_patch_header:
@@ -1,28 +0,0 @@
-  mova    r0, [first_patch_header]
-  mov     r7, r0       # r7 = read ptr
-  xor     r3, r3
-  dec     r3
-  shl     r3, 2        # r3 = 0xFFFFFFFC (mask for aligning r7)
-apply_patch:
-  add     r7, 3
-  and     r7, r3       # r7 = (r7 + 3) & (~3) (align to 4-byte boundary)
-  mov.l   r4, [r7]+    # r4 = dest addr
-  mov.l   r5, [r7]+
-  add     r5, r4       # r5 = dest end ptr (dest addr + size)
-  cmpeq   r4, r5       # if (size == 0) return
-  bt      done
-
-again:
-  cmpeq   r4, r5
-  bt      apply_patch  # if (r4 == r5) done with the patch; go to next header
-  mov.b   r0, [r7]+
-  mov.b   [r4], r0     # *(r4) = *(r7++);
-  bs      again        # r4++; continue
-  add     r4, 1
-
-done:
-  rets
-  nop
-
-  .align 4
-first_patch_header:
@@ -1,20 +0,0 @@
-  mflr    r8
-  b       get_patch_data_ptr
-get_patch_data_ptr_ret:
-  mflr    r7  # r7 = patch header
-apply_patch:
-  addi    r4, r7, 8  # r4 = start of patch data
-  lwz     r3, [r4 - 8]  # r3 = patch dest address
-  lwz     r5, [r4 - 4]  # r5 = patch data size
-  or      r0, r3, r5
-  cmplwi  r0, 0
-  mtlr    r8
-  beqlr
-  add     r7, r4, r5  # r7 = next patch header
-  .include CopyCode
-  b       apply_patch
-
-get_patch_data_ptr:
-  bl      get_patch_data_ptr_ret
-
-first_patch_header:
@@ -1,57 +0,0 @@
-start:
-  .include GetVersionInfoXB
-  test     eax, eax
-  jnz      can_patch
-  ret
-
-can_patch:
-  push     esi
-  push     edi
-  push     ebx
-  mov      edi, eax              # edi = ptr to version info struct
-  jmp      get_patch_data_ptr
-get_patch_data_ptr_ret:
-  pop      ebx                   # ebx = patch header
-
-apply_next_patch:
-  cmp      dword [ebx + 4], 0
-  jne      copy_code_and_apply_again
-  pop      ebx
-  pop      edi
-  pop      esi
-  mov      eax, 1
-  ret
-
-copy_code_and_apply_again:
-  push     dword [ebx]           # dest addr
-  mov      ecx, [edi + 0x0C]
-  call     [ecx]                 # MmQueryAddressProtect
-  mov      esi, eax              # esi = prev protection flags
-
-  push     4                     # new protection flags
-  push     dword [ebx + 4]       # size
-  push     dword [ebx]           # base address
-  mov      ecx, [edi + 0x08]
-  call     [ecx]                 # MmSetAddressProtect
-
-  xor      ecx, ecx              # ecx = offset
-  mov      edx, [ebx]            # edx = dest addr
-copy_next_byte:
-  mov      al, [ebx + ecx + 8]   # copy one byte to dest
-  mov      [edx + ecx], al
-  inc      ecx                   # offset++
-  cmp      [ebx + 4], ecx        # check if all bytes have been copied
-  jne      copy_next_byte
-
-  push     esi                   # new protection flags
-  push     dword [ebx + 4]       # size
-  push     dword [ebx]           # base address
-  lea      ebx, [ebx + ecx + 8]  # advance to next block
-  mov      ecx, [edi + 0x08]
-  call     [ecx]                 # MmSetAddressProtect
-  jmp      apply_next_patch
-
-get_patch_data_ptr:
-  call     get_patch_data_ptr_ret
-
-first_patch_header:
@@ -0,0 +1,193 @@
+# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
+
+# This is also the file I've chosen to document how to write code for newserv's functions subsystem. Client functions
+# are assembly snippets written in the native language of the client, which can be sent to the client with the B2
+# command. This is done at login time if the server administrator has enabled automatic patches in config.json or if
+# the client has enabled certain patches in the Patches menu. Client functions can also be sent at any time with the
+# $patch chat command, if they include .meta visibility (see below).
+
+# This file is a general function (it does not appear in the Patches menu). General functions are used to implement
+# various server operations; this one is used to write arbitrary data to the client's memory space. For example, to use
+# this function to write the bytes 38 00 00 05 to the address 8010521C, send_function_call could be called like this:
+#   auto fn = s->client_functions->name_to_function.at("WriteMemoryGC");
+#   unordered_map<string, uint32_t> label_writes({{"dest_addr", 0x8010521C}, {"size", 4}});
+#   string suffix("\x38\x00\x00\x05", 4);
+#   send_function_call(
+#       c,  // Client to send function call to
+#       fn,  // The function's code
+#       label_writes,  // Variables to pass in to the function's code
+#       suffix);  // Data to append after the code (not all functions use this)
+# The meanings of label_writes and suffix are described in the comments below.
+
+# The .versions directive is required for all client functions that can be called by the server or the player. This
+# directive specifies which architectures or specific versions of the game the client function is compatible with. The
+# version tokens may be specific game versions (e.g. 3OE1, 59NL) or architectures (PPC, X86, or SH4); in the latter
+# case, the source applies to all versions which use that architecture. All lines after a .versions directive apply
+# only to the specified versions; this set of "active" versions can be changed with another .versions
+# directive later in the file, thereby splitting the file into different sections that apply to different sets of
+# versions. Any lines in the file the appear before the first .versions directive apply to all versions. After a
+# .versions directive, expressions like "VERS value1 value2 ..." (but with <> instead of "") can be used to specialize
+# the patch for each version. In a VERS expression, the number of values must match the number of versions given in the
+# .versions directive, and the values must appear in the same order. This function is implemented on all versions and
+# all architecture, so we specify all architectures here. Later on, the implementations for each architecture are
+# segregated via further .versions directives.
+.versions SH4 PPC X86
+
+# This directive controls where the function appears. The values are (note that the quotes are required):
+#   visibility="hidden" (default): this function does not appear in the Patches menu and cannot be used via $patch
+#   visibility="cheat": this function doesn't appear in the Patches menu but can be used via $patch if cheat mode is on
+#   visibility="chat": this function doesn't appear in the Patches menu but can be used via $patch
+#   visibility="menu": this function appears in the Patches menu but can't be used via $patch
+#   visibility="all": this function appears in the Patches menu and can be used via $patch
+# Note that if the client has $debug enabled, then all functions can be run via $patch regardless if this setting.
+# .meta visibility="menu"
+
+# This directive specifies what the function's internal name is. This is the name that can be used in config.json to
+# require the patch for all clients, and is also the name used with the $patch command. If not specified, the
+# function's internal name is the same as its filename without the .s extension.
+# .meta key="WriteMemory"
+
+# These directives tell newserv what to show to the player in the Patches menu. Neither of them is required; if the
+# name is omitted, the filename is used instead. These have no real effect for this function (since .meta visibility is
+# not used), so this is primarily for documentation purposes.
+.meta name="Write memory"
+.meta description="Writes data to any location in memory"
+
+# When used for debugging purposes, it may be useful to see the value returned by the client function when run via the
+# $patch chat command. This directive causes the server to tell you the return value in-game after running it.
+# .meta show_return_value
+
+# The entry_ptr label is required for all functions. It should generally point to a .offsetof directive that itself
+# points to the actual entrypoint.
+entry_ptr:
+# All labels starting with reloc signify that the following PPC word (big-endian 32-bit value) is to be relocated at
+# runtime. That is, when the code runs on the client, the PPC word will contain the actual memory address relative to
+# the running code instead of the offset that it holds at assembly time. The entry_ptr label should almost always have
+# a reloc label next to it.
+reloc0:
+  .offsetof start
+
+
+
+# Everything following this directive (until the next .versions directive) applies only to PowerPC architectures. When
+# this function is compiled for other architectures, this section will be ignored.
+.versions PPC
+
+start:
+  mflr    r12
+  bl      get_block_ptr
+  mr      r6, r3        # r6 = address of dest_addr label
+
+copy_block:
+  lwz     r3, [r6]      # r3 = dest ptr
+  subi    r3, r3, 1     # subtract 1 so we can use stbu
+  lwz     r5, [r6 + 4]  # r5 = size (bytes remaining)
+  add     r5, r5, r3    # r5 = dest end ptr (last byte to be written)
+  addi    r4, r6, 7     # r4 = src ptr (starting at -1 so we can use lbzu)
+copy_block__again:
+  lbzu    r0, [r4 + 1]
+  stbu    [r3 + 1], r0
+  cmp     r3, r5
+  bne     copy_block__again
+
+  # Flush the data cache and clear the instruction cache at the written region
+  lwz     r3, [r6]      # r3 = dest ptr
+  lwz     r4, [r6 + 4]  # r4 = size
+  # A .include directive essentially pastes in the code from the referenced file. Here, we use the code from the file
+  # FlushCachedCode.inc.s. When compiling includes, newserv first looks in the same directory as the function's source,
+  # then looks in system/client-functions/System.
+  .include FlushCachedCode
+
+  # Return the address after the last byte written. The value returned in r3 from the function is sent back to the
+  # server in a B3 command. newserv uses the return value during DOL loading to know which section of the DOL file to
+  # send next, or to send the RunDOL function if all sections have been loaded.
+  lwz     r3, [r6]      # r3 = dest ptr
+  lwz     r4, [r6 + 4]  # r4 = size
+  add     r3, r3, r4
+  mtlr    r12
+  blr
+
+get_block_ptr__ret:
+  mflr    r3
+  mtlr    r10
+  blr
+get_block_ptr:
+  # We use a trick here to get the address of the dest_addr label: since bl puts the immediately-following address into
+  # the link register, we "call" get_block_ptr__ret and get the dest_addr pointer out of the LR. We then put r10 back
+  # into the LR so get_block_ptr__ret returns to the caller.
+  mflr    r10
+  bl      get_block_ptr__ret
+
+
+
+.versions SH4
+
+start:
+  mova    r0, [dest_addr]
+  mov     r4, r0
+  mov.l   r0, [r4]
+  mov.l   r5, [r4 + 4]
+  add     r4, 8
+again:
+  test    r5, r5
+  bt      done
+  mov.b   r6, [r4]
+  mov.b   [r0], r6
+  add     r4, 1
+  add     r0, 1
+  bs      again
+  add     r5, -1
+done:
+  rets
+  nop
+
+  .align  4
+
+
+
+.versions X86
+
+start:
+  jmp     get_block_ptr
+get_block_ptr_ret:
+  xchg    ebx, [esp]
+  mov     eax, [ebx]
+  mov     ecx, [ebx + 4]
+  add     ebx, 8
+
+again:
+  test    ecx, ecx
+  jz      done
+  mov     dl, [ebx]
+  mov     [eax], dl
+  inc     ebx
+  inc     eax
+  dec     ecx
+  jmp     again
+
+done:
+  pop     ebx
+  ret
+
+get_block_ptr:
+  call    get_block_ptr_ret
+
+
+
+# This last section applies to all architectures, so we re-enable all versions again. This directive also disables the
+# use of VERS tokens.
+.all_versions
+
+# These fields are filled in right before the command is sent to the client. Specifically, the label_writes argument to
+# send_function_call is responsible for this. The label_writes argument is a map of label name to value, and
+# send_function_call simply writes the given values after the given labels. This is a way to pass arbitrary arguments
+# to a function at call time.
+dest_addr:
+  .data   0
+size:
+  .data   0
+
+# Finally, we use the suffix argument to instruct send_function_call to append the data we want to write to memory
+# immediately after the assembled code. (The data_to_write label here is for documentation purposes only; the suffix
+# argument always appends data after the end of all the assembled code.)
+data_to_write:
@@ -1,33 +0,0 @@
-.meta name="Write memory"
-.meta description="Writes data to any location in memory"
-
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  mova    r0, [dest_addr]
-  mov     r4, r0
-  mov.l   r0, [r4]
-  mov.l   r5, [r4 + 4]
-  add     r4, 8
-again:
-  test    r5, r5
-  bt      done
-  mov.b   r6, [r4]
-  mov.b   [r0], r6
-  add     r4, 1
-  add     r0, 1
-  bs      again
-  add     r5, -1
-done:
-  rets
-  nop
-
-  .align  4
-dest_addr:
-  .data   0
-size:
-  .data   0
-
-data_to_write:
@@ -1,117 +0,0 @@
-# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
-
-# This is also the file I've chosen to document how to write code for newserv's functions subsystem. There are three
-# kinds of functions: includes, patches, and general functions.
-
-# - General functions are not version-specific (usually) but are architecture-specific. This file, WriteMemoryGC, is a
-#   general function for all PowerPC versions of PSO, which means all GameCube versions. General functions are named
-#   like NAME.ARCH.s, where ARCH is sh4, ppc, or x86.
-
-# - Includes are snippets of code that are intended to be used as part of other general functions and patches. Includes
-#   are named like NAME.ARCH.inc.s, where ARCH has the same meaning as above. These can be used with the .include
-#   directive; there is an example of this in the code below.
-
-# - Patches are functions that are available to run upon client request. They can be made available in the Patches menu
-#   or via the $patch command. Patches should be named like PATCHNAME.VERS.patch.s, where VERS denotes which specific
-#   game version the patch is for. These version codes are listed in README.md, and directly correspond to values
-#   returned by the VersionDetect functions, also in this directory.
-
-# For example, to use this function to write the bytes 38 00 00 05 to the address 8010521C, send_function_call could be
-# called like this:
-#   auto fn = s->function_code_index->name_to_function.at("WriteMemoryGC");
-#   unordered_map<string, uint32_t> label_writes({{"dest_addr", 0x8010521C}, {"size", 4}});
-#   string suffix("\x38\x00\x00\x05", 4);
-#   send_function_call(
-#       c,  // Client to send function call to
-#       fn,  // The function's code
-#       label_writes,  // Variables to pass in to the function's code
-#       suffix);  // Data to append after the code (not all functions use this)
-# The meanings of label_writes and suffix are described in the comments below.
-
-# The .versions directive may be used in patches (but not in includes or general functions) and enables
-# parameterization. If .version is used, then the patch may later use expressions like <VERS value1 value2 ...> to
-# generate the same patch with different values for different game versions. In each <VERS> expression, the number of
-# values must match the number of versions given in the .versions directive.
-# .versions VRS1 VRS2 VRS3 ...
-
-# These directives tell newserv what to show to the player in the Patches menu. Neither of them is required; if the
-# name is omitted, the filename is used instead.
-.meta name="Write memory"
-.meta description="Writes data to any location in memory"
-
-# To hide a patch from the Patches menu (so it can only be used with the $patch command), this directive can be used.
-# This has no effect if used in includes or general functions.
-# .meta hide_from_patches_menu
-
-# When used for debugging purposes, it may be useful to see the value returned by the client function when run via the
-# $patch chat command. This directive causes the server to tell you the return value in-game after running it.
-# .meta show_return_value
-
-# The entry_ptr label is required for all functions. It should generally point to a .offsetof directive that itself
-# points to the actual entrypoint.
-entry_ptr:
-# All labels starting with reloc signify that the following PPC word (big-endian 32-bit value) is to be relocated at
-# runtime. That is, when the code runs on the client, the PPC word will contain the actual memory address relative to
-# the running code instead of the offset that it holds at assembly time. The entry_ptr label should almost always have
-# a reloc label next to it.
-reloc0:
-  .offsetof start
-
-start:
-  mflr    r12
-  bl      get_block_ptr
-  mr      r6, r3        # r6 = address of dest_addr label
-
-copy_block:
-  lwz     r3, [r6]      # r3 = dest ptr
-  subi    r3, r3, 1     # subtract 1 so we can use stbu
-  lwz     r5, [r6 + 4]  # r5 = size (bytes remaining)
-  add     r5, r5, r3    # r5 = dest end ptr (last byte to be written)
-  addi    r4, r6, 7     # r4 = src ptr (starting at -1 so we can use lbzu)
-copy_block__again:
-  lbzu    r0, [r4 + 1]
-  stbu    [r3 + 1], r0
-  cmp     r3, r5
-  bne     copy_block__again
-
-  # Flush the data cache and clear the instruction cache at the written region
-  lwz     r3, [r6]      # r3 = dest ptr
-  lwz     r4, [r6 + 4]  # r4 = size
-  # A .include directive essentially pastes in the code from the referenced file. Here, we use the code from the file
-  # FlushCachedCode.inc.s. When compiling includes, newserv first looks in the same directory as the function's source,
-  # then looks in system/client-functions/System.
-  .include FlushCachedCode
-
-  # Return the address after the last byte written. The value returned in r3 from the function is sent back to the
-  # server in a B3 command. newserv uses the return value during DOL loading to know which section of the DOL file to
-  # send next, or to send the RunDOL function if all sections have been loaded.
-  lwz     r3, [r6]      # r3 = dest ptr
-  lwz     r4, [r6 + 4]  # r4 = size
-  add     r3, r3, r4
-  mtlr    r12
-  blr
-
-get_block_ptr__ret:
-  mflr    r3
-  mtlr    r10
-  blr
-get_block_ptr:
-  # We use a trick here to get the address of the dest_addr label: since bl puts the immediately-following address into
-  # the link register, we "call" get_block_ptr__ret and get the dest_addr pointer out of the LR. We then put r10 back
-  # into the LR so get_block_ptr__ret returns to the caller.
-  mflr    r10
-  bl      get_block_ptr__ret
-
-# These fields are filled in right before the command is sent to the client. Specifically, the label_writes argument to
-# send_function_call is responsible for this. The label_writes argument is a map of label name to value, and
-# send_function_call simply writes the given values after the given labels. This is a way to pass arbitrary arguments
-# to a function at call time.
-dest_addr:
-  .zero
-size:
-  .zero
-
-# Finally, we use the suffix argument to instruct send_function_call to append the data we want to write to memory
-# immediately after the assembled code. (The data_to_write label here is for documentation purposes only; the suffix
-# argument always appends data after the end of all the assembled code.)
-data_to_write:
@@ -1,37 +0,0 @@
-.meta name="Write memory"
-.meta description="Writes data to any location in memory"
-
-entry_ptr:
-reloc0:
-  .offsetof start
-
-start:
-  jmp     get_block_ptr
-get_block_ptr_ret:
-  xchg    ebx, [esp]
-  mov     eax, [ebx]
-  mov     ecx, [ebx + 4]
-  add     ebx, 8
-
-again:
-  test    ecx, ecx
-  jz      done
-  mov     dl, [ebx]
-  mov     [eax], dl
-  inc     ebx
-  inc     eax
-  dec     ecx
-  jmp     again
-
-done:
-  pop     ebx
-  ret
-
-get_block_ptr:
-  call    get_block_ptr_ret
-dest_addr:
-  .data   0
-size:
-  .data   0
-
-data_to_write: