rewrite client function compiler

This commit is contained in:
Martin Michelsen
2026-05-11 07:29:25 -07:00
parent 2f2a0bcf2b
commit e78e2ba887
174 changed files with 3931 additions and 5807 deletions
@@ -1,89 +0,0 @@
start:
mflr r7
# If this patch has already been run, then the opcode that led here will
# not be bctrl (4E800421). In that case, do nothing.
lis r3, 0x4E80
ori r3, r3, 0x0421
lwz r4, [r7 - 4]
cmp r3, r4
beq apply_patch
blr
apply_patch:
bl patch_end
.offsetof patch
.offsetof patch_end
patch:
mfctr r6
mr r3, r6
li r4, 0x7C00
.include FlushCachedCode-GC
mtctr r6
bctr
patch_end:
mflr r4
addi r4, r4, 8
lwz r3, [r4 - 8]
lwz r5, [r4 - 4]
sub r5, r5, r3
# At this point:
# r4 = address of patch label
# r5 = patch size in bytes
# r7 = saved LR
# Find a spot in the interrupt handlers with enough memory for the patch
lis r3, 0x8000
ori r3, r3, 0x0200
sub r3, r3, r5
check_location:
rlwinm r0, r5, 30, 2, 31
mtctr r0 # ctr = patch size in words
subi r8, r3, 4
check_location_next_word:
lwzu r0, [r8 + 4]
cmpwi r0, 0
beq check_location_word_ok
addi r3, r3, 0x0100
rlwinm r0, r3, 0, 16, 31
cmpwi r0, 0x1800
blt check_location
# No suitable location was found - return null
li r3, 0
mtlr r7
blr
check_location_word_ok:
bdnz check_location_next_word
location_ok:
mr r6, r3
# Now:
# r3 = destination location
# r4 = patch src data
# r5 = patch size in bytes
# r6 = destination location
# r7 = saved LR
.include CopyCode-GC
setup_branch:
# Replace the bctrl opcode that led to this call with a bl opcode that
# leads to the copied patch code
subi r3, r7, 4
sub r4, r6, r3
rlwinm r4, r4, 0, 6, 31
oris r4, r4, 0x4800
ori r4, r4, 0x0001
stw [r3], r4
dcbst r0, r3
sync
icbi r0, r3
isync
# Return the address that the patch was copied to
mr r3, r6
mtlr r7
blr
@@ -1,4 +0,0 @@
entry_ptr:
.data 0x8000C274
start:
.include CacheClearFix
@@ -0,0 +1,6 @@
.versions PPC
entry_ptr:
.data 0x8000C274
start:
.include CacheClearFix
@@ -1,5 +0,0 @@
entry_ptr:
reloc0:
.offsetof start
start:
.include CacheClearFix
@@ -0,0 +1,7 @@
.versions PPC
entry_ptr:
reloc0:
.offsetof start
start:
.include CacheClearFix
@@ -0,0 +1,61 @@
.versions PPC
start:
mflr r7
# If this patch has already been run, then the opcode that led here will not be bctrl (4E800421). In that case, do
# nothing.
lis r3, 0x4E80
ori r3, r3, 0x0421
lwz r4, [r7 - 4]
cmp r3, r4
beq apply_patch
blr
apply_patch:
bl patch_end
.offsetof patch
.offsetof patch_end
patch:
mfctr r6
mr r3, r6
li r4, 0x7C00
.include FlushCachedCode
mtctr r6
bctr
patch_end:
mflr r4
addi r4, r4, 8
lwz r3, [r4 - 8]
lwz r5, [r4 - 4]
sub r5, r5, r3
lis r3, 0x8000
ori r3, r3, 0x01BC
mr r6, r3
# At this point:
# r3 = destination location (overwritten by CopyCode)
# r4 = patch src data (overwritten by CopyCode)
# r5 = patch size in bytes (overwritten by CopyCode)
# r6 = destination location
# r7 = saved LR
.include CopyCode
setup_branch:
# Replace the bctrl opcode that led to this call with a bl opcode that leads to the copied patch code
subi r3, r7, 4
sub r4, r6, r3
rlwinm r4, r4, 0, 6, 31
oris r4, r4, 0x4800
ori r4, r4, 0x0001
stw [r3], r4
dcbst r0, r3
sync
icbi r0, r3
isync
# Return the address that the patch was copied to
mr r3, r6
mtlr r7
blr
@@ -1,60 +0,0 @@
start:
mflr r7
# If this patch has already been run, then the opcode that led here will
# not be bctrl (4E800421). In that case, do nothing.
lis r3, 0x4E80
ori r3, r3, 0x0421
lwz r4, [r7 - 4]
cmp r3, r4
beq apply_patch
blr
apply_patch:
bl patch_end
.offsetof patch
.offsetof patch_end
patch:
mfctr r6
mr r3, r6
li r4, 0x7C00
.include FlushCachedCode
mtctr r6
bctr
patch_end:
mflr r4
addi r4, r4, 8
lwz r3, [r4 - 8]
lwz r5, [r4 - 4]
sub r5, r5, r3
lis r3, 0x8000
ori r3, r3, 0x01BC
mr r6, r3
# At this point:
# r3 = destination location (overwritten by CopyCode)
# r4 = patch src data (overwritten by CopyCode)
# r5 = patch size in bytes (overwritten by CopyCode)
# r6 = destination location
# r7 = saved LR
.include CopyCode
setup_branch:
# Replace the bctrl opcode that led to this call with a bl opcode that
# leads to the copied patch code
subi r3, r7, 4
sub r4, r6, r3
rlwinm r4, r4, 0, 6, 31
oris r4, r4, 0x4800
ori r4, r4, 0x0001
stw [r3], r4
dcbst r0, r3
sync
icbi r0, r3
isync
# Return the address that the patch was copied to
mr r3, r6
mtlr r7
blr
@@ -1,9 +1,42 @@
# This function implements the $nativecall chat command on GameCube clients.
# This function implements the $nativecall chat command.
entry_ptr:
reloc0:
.offsetof start
.versions SH4
start:
sts.l -[r15], pr
mov.l r0, [call_addr]
mov.l r4, [arg0]
mov.l r5, [arg1]
mov.l r6, [arg2]
mov.l r7, [arg3]
calls [r0]
nop
lds.l pr, [r15]+
rets
nop
.align 4
call_addr:
.data 0
arg0:
.data 0
arg1:
.data 0
arg2:
.data 0
arg3:
.data 0
.versions PPC
start:
mflr r0
stw [r1 + 4], r0
@@ -1,30 +0,0 @@
# This function implements the $nativecall chat command on DC clients.
entry_ptr:
reloc0:
.offsetof start
start:
sts.l -[r15], pr
mov.l r0, [call_addr]
mov.l r4, [arg0]
mov.l r5, [arg1]
mov.l r6, [arg2]
mov.l r7, [arg3]
calls [r0]
nop
lds.l pr, [r15]+
rets
nop
.align 4
call_addr:
.data 0
arg0:
.data 0
arg1:
.data 0
arg2:
.data 0
arg3:
.data 0
@@ -0,0 +1,22 @@
# r3 = dest ptr
# r4 = src ptr
# r5 = size
# Clobbers r0, r3, r4, r5
.versions PPC
copy_code:
addi r5, r5, 3
rlwinm r5, r5, 30, 2, 31 # r5 = number of words to copy
mtctr r5
subi r3, r3, 4 # r3 -= 4 (so we can use stwu)
subi r4, r4, 4 # r4 -= 4 (so we can use lwzu)
copy_word_again:
lwzu r0, [r4 + 4]
stwu [r3 + 4], r0
bdnz copy_word_again
rlwinm r4, r5, 2, 0, 29
addi r3, r3, 4
sub r3, r3, r4
.include FlushCachedCode
@@ -1,18 +0,0 @@
# r3 = dest ptr
# r4 = src ptr
# r5 = size
# Clobbers r0, r3, r4, r5
addi r5, r5, 3
rlwinm r5, r5, 30, 2, 31 # r5 = number of words to copy
mtctr r5
subi r3, r3, 4 # r3 = r3 - 4 (so we can use stwu)
subi r4, r4, 4 # r4 = r4 - 4 (so we can use lwzu)
copy_word_again:
lwzu r0, [r4 + 4]
stwu [r3 + 4], r0
bdnz copy_word_again
rlwinm r4, r5, 2, 0, 29
addi r3, r3, 4
sub r3, r3, r4
.include FlushCachedCode
@@ -1,7 +1,11 @@
# eax = dest ptr
# edx = src ptr
# ecx = size
# Clobbers eax, ecx, edx
# eax = dest ptr
# edx = src ptr
# ecx = size
# Clobbers eax, ecx, edx
.versions X86
copy_data:
push ebx
again:
test ecx, ecx
@@ -1,7 +1,11 @@
# r3 = dest ptr
# r4 = src ptr
# r5 = size
# Clobbers r3, r4, r5, ctr
# r3 = dest ptr
# r4 = src ptr
# r5 = size
# Clobbers r3, r4, r5, ctr
.versions PPC
copy_data_words:
addi r5, r5, 3
rlwinm r5, r5, 30, 2, 31 # r5 = number of words to copy
mtctr r5
@@ -1,108 +1,86 @@
# This program was an early attempt at restoring B2 patching functionality to
# Episode 3. It is no longer used, since the quest loading method is more
# reliable, but this file remains for documentation purposes.
# This program was an early attempt at restoring B2 patching functionality to Episode 3. It is no longer used, since
# the quest loading method is more reliable, but this file remains for documentation purposes.
# There is a buffer overflow bug in PSO Episode 3 that this program uses to
# achieve arbitrary code execution. (This bug is likely present in all versions
# of PSO, but the code here is specific to the USA version of Episode 3.) This
# is only necessary because the non-Japanese versions of Episode 3 lack the B2
# command, which is used on other console PSO versions to send patches and other
# bits of code. Here, we use a buffer overflow bug to re-implement the B2
# command, which allows the server to treat PSO Episode 3 like any other version
# of PSO with respect to patching or loading DOL files.
# There is a buffer overflow bug in PSO Episode 3 that this program uses to achieve arbitrary code execution. (This bug
# is likely present in all versions of PSO, but the code here is specific to the USA version of Episode 3.) This is
# only necessary because the non-Japanese versions of Episode 3 lack the B2 command, which is used on other console PSO
# versions to send patches and other bits of code. Here, we use a buffer overflow bug to re-implement the B2 command,
# which allows the server to treat PSO Episode 3 like any other version of PSO with respect to patching or loading DOL
# files.
# For some background, PSO sends download quest files via the A6 and A7
# commands. The A6 command is used to start sending a download quest file; it
# includes the quest name, file name, and total file size. The A7 command is
# used to send a chunk of 1KB (0x400 bytes) of data, or less if it's the final
# chunk of the file. When the client receives an A6 command for a filename
# ending in .bin, it allocates a buffer of (file size + 0x48) bytes. When it
# later receives an A7 command, it copies (cmd.data_size) bytes from the command
# to position (8 + 0x100 * flag) in the buffer, then if cmd.data_size was less
# than 0x400, it marks the file as done and postprocesses it.
# For some background, PSO sends download quest files via the A6 and A7 commands. The A6 command is used to start
# sending a download quest file; it includes the quest name, file name, and total file size. The A7 command is used to
# send a chunk of 1KB (0x400 bytes) of data, or less if it's the final chunk of the file. When the client receives an
# A6 command for a filename ending in .bin, it allocates a buffer of (file size + 0x48) bytes. When it later receives
# an A7 command, it copies (cmd.data_size) bytes from the command to position (8 + 0x100 * flag) in the buffer, then if
# cmd.data_size was less than 0x400, it marks the file as done and postprocesses it.
# However, the client neglects to check if the last chunk overflows the end of
# the buffer before copying the chunk data. In this function, we send an A6
# command with an overall file size of only 0x18 bytes, then we send a chunk of
# 0x200 or so bytes (the compiled size of the code in this file), which
# overflows past the end of the allocated buffer and overwrites part of a free
# block after the allocated buffer. The memory allocator library keeps some of
# its bookkeeping structures at the beginning of this free block, which we use
# to cause the next call to malloc() to overwrite its own return address on the
# stack. Conveniently, this call happens soon afterward, during the
# However, the client neglects to check if the last chunk overflows the end of the buffer before copying the chunk
# data. In this function, we send an A6 command with an overall file size of only 0x18 bytes, then we send a chunk of
# 0x200 or so bytes (the compiled size of the code in this file), which overflows past the end of the allocated buffer
# and overwrites part of a free block after the allocated buffer. The memory allocator library keeps some of its
# bookkeeping structures at the beginning of this free block, which we use to cause the next call to malloc() to
# overwrite its own return address on the stack. Conveniently, this call happens soon afterward, during the
# postprocessing step.
# The PSO memory allocator is a simple free-list allocator. The allocator
# maintains two linked lists of blocks: one for allocated blocks and one for
# free blocks. The list of free blocks is sorted in order of memory address, but
# the list of allocated blocks is sorted in the order they were allocated. (The
# order of the allocated block list does not matter for the allocator's
# performance or correctness.)
# The PSO memory allocator is a simple free-list allocator. The allocator maintains two linked lists of blocks: one for
# allocated blocks and one for free blocks. The list of free blocks is sorted in order of memory address, but the list
# of allocated blocks is sorted in the order they were allocated. (The order of the allocated block list does not
# matter for the allocator's performance or correctness.)
# Each block begins with two pointers, prev and next, which point to other
# blocks in the allocated or free list. (As with a typical doubly-linked list,
# the first block has prev == nullptr and the last block has next == nullptr;
# there is no sentinel node on either end.) After these two pointers is the
# block's size in bytes, followed by 0x14 unused bytes. The block data
# immediately follows this 0x20-byte header structure. All block sizes are
# rounded up to a multiple of 0x20 bytes.
# Each block begins with two pointers, prev and next, which point to other blocks in the allocated or free list. (As
# with a typical doubly-linked list, the first block has prev == nullptr and the last block has next == nullptr; there
# is no sentinel node on either end.) After these two pointers is the block's size in bytes, followed by 0x14 unused
# bytes. The block data immediately follows this 0x20-byte header structure. All block sizes are rounded up to a
# multiple of 0x20 bytes.
# The malloc() routine simply searches for the first free block that has enough
# space to satisfy the request, and either splits it into an allocated and a
# free block (if the free block's size is at least 0x40 bytes more than the
# requested size), or converts the free block entirely into an allocated block
# and returns it. It is the second case that we take advantage of here.
# The malloc() routine simply searches for the first free block that has enough space to satisfy the request, and
# either splits it into an allocated and a free block (if the free block's size is at least 0x40 bytes more than the
# requested size), or converts the free block entirely into an allocated block and returns it. It is the second case
# that we take advantage of here.
# When we send our A7 command containing this program, the first 0x58 bytes of
# it fill the quest file data buffer. The next 0x0C bytes of it overwrite the
# header fields of the following free block (noted below in the comments), and
# the remainder of the data goes into that block's unused header fields and the
# block's data (which is also otherwise unused, since it is a free block). We
# overwrite the free block's prev and next pointers with specific nonzero values
# and overwrite the size with the exact size that the caller will request, so we
# trigger the malloc() case that does not split the free block. When that code
# attempts to remove the free block from its doubly-linked list, it writes
# block->next to block->prev->next and block->prev to block->next->prev. We set
# block->prev to the address where we want execution to jump to (the start label
# here), and block->next to the address of malloc()'s return address on the
# stack. This overwrites the return address with the start label's address, and
# overwrites the word after the start label with an address within the stack. We
# can't avoid this second write since both pointers must be non-null and the
# values and addresses written are dependent on each other, but we can just use
# a branch opcode to ignore the value that gets written into our code.
# When we send our A7 command containing this program, the first 0x58 bytes of it fill the quest file data buffer. The
# next 0x0C bytes of it overwrite the header fields of the following free block (noted below in the comments), and the
# remainder of the data goes into that block's unused header fields and the block's data (which is also otherwise
# unused, since it is a free block). We overwrite the free block's prev and next pointers with specific nonzero values
# and overwrite the size with the exact size that the caller will request, so we trigger the malloc() case that does
# not split the free block. When that code attempts to remove the free block from its doubly-linked list, it writes
# block->next to block->prev->next and block->prev to block->next->prev. We set block->prev to the address where we
# want execution to jump to (the start label here), and block->next to the address of malloc()'s return address on the
# stack. This overwrites the return address with the start label's address, and overwrites the word after the start
# label with an address within the stack. We can't avoid this second write since both pointers must be non-null and the
# values and addresses written are dependent on each other, but we can just use a branch opcode to ignore the value
# that gets written into our code.
# Once we have control, we clean up the allocator state (restoring the free
# block as it was before we overwrote its header), then copy our implementation
# of the B2 command to an otherwise-unused area of memory and apply a few more
# Once we have control, we clean up the allocator state (restoring the free block as it was before we overwrote its
# header), then copy our implementation of the B2 command to an otherwise-unused area of memory and apply a few more
# patches. See the comments within the code below for more details.
.versions 3SE0
# This entry_ptr label isn't used since this code isn't sent with the B2
# command; it just needs to be present for newserv to compile the code properly
# This entry_ptr label isn't used since this code isn't sent with the B2 command; it just needs to be present for
# newserv to compile the code properly
entry_ptr:
start:
b resume1
# This is the value overwritten by malloc() when it attempts to remove the
# free block from its linked list
# This is the value overwritten by malloc() when it attempts to remove the free block from its linked list
.data 0xAAAAAAAA
resume1:
# We can use any of the caller-save registers (r0, r3-r12) here.
# At entry time, some registers contain useful values:
# r5: Address of the allocator instance ("lists"). This structure includes the
# allocated and free list head pointers, one of which we have to update.
# r12: Address of the malloc() function that was called. Conveniently, the
# address that we should return to is very near this location in memory.
# r5: Address of the allocator instance ("lists"). This structure includes the allocated and free list head pointers,
# one of which we have to update.
# r12: Address of the malloc() function that was called. Conveniently, the address that we should return to is very
# near this location in memory.
# Compute the LR we should use to return from this function, but don't put it
# in the LR just yet - we're still going to need the LR for other shenanigans
# Compute the LR we should use to return from this function, but don't put it in the LR just yet - we're still going
# to need the LR for other shenanigans
subi r11, r12, 0xB0 # 8038C1B8 - B0 = 8038C108
# Restore the free block whose header we had destroyed with the A7 command
# buffer overflow
# Restore the free block whose header we had destroyed with the A7 command buffer overflow
lis r7, 0x815F
ori r7, r7, 0xF440
li r0, 0
@@ -121,8 +99,8 @@ resume1:
b resume2
# TODO: We can probably use this space for something useful. There must be
# exactly 20 opcodes (0x50 bytes) between resume1 and opaque2.
# TODO: We can probably use this space for something useful. There must be exactly 20 opcodes (0x50 bytes) between
# resume1 and opaque2.
.zero
.zero
.zero
@@ -130,10 +108,9 @@ resume1:
.zero
opaque2:
# This block must be exactly here (the number of opcodes above is exactly how
# many will fit in the original buffer), and the 3 words here must have
# exactly these values. This is what causes malloc to overwrite the return
# address on the stack to call this code in the first place.
# This block must be exactly here (the number of opcodes above is exactly how many will fit in the original buffer),
# and the 3 words here must have exactly these values. This is what causes malloc to overwrite the return address on
# the stack to call this code in the first place.
.data 0x815FF3E8 # free_head->prev
.data 0x80592AC4 # free_head->next
.data 0x00000160 # free_head->size
@@ -141,11 +118,10 @@ opaque2:
resume2:
bl get_handle_B2_ptr
# This is the code we're going to use for the B2 command handler, which we
# will copy into an unused area of memory. It's convenient to put it here and
# use a bl opcode to get its address, so this code can be minimally position-
# dependent. Note that this part of the code does not run at the time the A7
# command is received; it will run later if the client receives a B2 command.
# This is the code we're going to use for the B2 command handler, which we will copy into an unused area of memory.
# It's convenient to put it here and use a bl opcode to get its address, so this code can be minimally position-
# dependent. Note that this part of the code does not run at the time the A7 command is received; it will run later
# if the client receives a B2 command.
handle_B2:
mflr r0
stwu [r1 - 0x40], r1
@@ -173,10 +149,9 @@ handle_B2:
ori r5, r5, 0x0C00
stw [r1 + 0x08], r5
# If there's no code section, skip it. We also write the code section size to
# the return value field (which will be overwritten later if the size is not
# zero). This is because I'm lazy and this gives the behavior we want: the
# code return value is always zero if the code section size is zero.
# If there's no code section, skip it. We also write the code section size to the return value field (which will be
# overwritten later if the size is not zero). This is because I'm lazy and this gives the behavior we want: the code
# return value is always zero if the code section size is zero.
li r6, 4
lwbrx r5, [r4 + r6] # r5 = code_size
stw [r1 + 0x0C], r5 # response.code_return_value = code_size
@@ -218,8 +193,7 @@ handle_B2_skip_relocations:
bctrl # flush_code(code_base_addr, code_section_size)
# Call the code section and put the return value (byteswapped) on the stack
# Note: flush_code only uses r3, r4, and r5, so we don't need to reload r7
# after the above call
# Note: flush_code only uses r3, r4, and r5, so we don't need to reload r7 after the above call
lwz r8, [r7 + 0x10]
lwzx r8, [r8 + r6]
mtctr r8
@@ -284,17 +258,16 @@ copy_handle_B2_word_again:
rlwinm r4, r7, 2, 0, 29
bctrl # flush_code(copied_B2_handler, copied_B2_handler_bytes)
# Replace the command handler table entry for command 0E (which appears to be
# a legacy command and has very broken behavior) with our B2 implementation
# Replace the command handler table entry for command 0E (which appears to be a legacy command and has very broken
# behavior) with our B2 implementation
lis r5, 0x8044
ori r5, r5, 0xF684
li r0, 0x00B2
stw [r5], r0
stw [r5 + 0x0C], r12
# Patch both places in the code where command 9E is sent to make them include
# a sentinel value that newserv can use to determine if the client has already
# run the code in this file
# Patch both places in the code where command 9E is sent to make them include a sentinel value that newserv can use
# to determine if the client has already run the code in this file
bl get_patch_9E_1_ptr
patch_9E_1:
lis r4, 0x5F5C
@@ -333,12 +306,10 @@ get_patch_9E_2_ptr:
mtctr r9
bctrl # flush_code(patch_9E_2_dest, 0x20)
# Finally, patch the A7 handler function (which is on the current callstack)
# so that it does nothing else if this function returns null, which prevents
# further memory corruption. This changes a beq opcode (which never triggers
# under normal circumstances) to skip a couple more function calls, one of
# which would cause memory corruption if executed because the original buffer
# is smaller than 0x100 bytes.
# Finally, patch the A7 handler function (which is on the current callstack) so that it does nothing else if this
# function returns null, which prevents further memory corruption. This changes a beq opcode (which never triggers
# under normal circumstances) to skip a couple more function calls, one of which would cause memory corruption if
# executed because the original buffer is smaller than 0x100 bytes.
lis r3, 0x8010
ori r3, r3, 0xFD8A
li r4, 0x0064
@@ -348,8 +319,7 @@ get_patch_9E_2_ptr:
mtctr r9
bctrl # flush_code(patched_opcode_address & 0xFFFFFFF0, 0x20)
# Return null instead of a malloc'ed block, which triggers the conditional
# branch we just patched above
# Return null instead of a malloc'ed block, which triggers the conditional branch we just patched above
li r3, 0
mtlr r11
blr
@@ -1,20 +1,23 @@
# This code flushes the data cache and invalidates the instruction cache for a
# block of newly-written code in memory.
# This code flushes the data cache and invalidates the instruction cache for a block of newly-written code in memory.
# Arguments:
# r3 = address of written code
# r4 = number of bytes
# Returns: nothing
# Overwrites: r3, r4, r5
.versions PPC
flush_cached_code:
lis r5, 0xFFFF
ori r5, r5, 0xFFF1
and r5, r5, r3
subf r3, r5, r3
add r4, r4, r3
flush_cached_code_writes__again:
flush_cached_code_again:
dcbst r0, r5
sync
icbi r0, r5
addic r5, r5, 8
subic. r4, r4, 8
bge flush_cached_code_writes__again
bge flush_cached_code_again
isync
@@ -1,44 +0,0 @@
# (uint16_t entity_id @ eax) -> TObjectV00b421c0* @ eax
# Preserves all registers except eax
get_enemy_entity:
push esi
push edi
push edx
push ecx
xor edx, edx
xchg edx, eax
cmp edx, 0x1000
jl done
cmp edx, 0x4000
jge done
mov esi, [0x00AABCE8] # bs_low = next_player_entity_index
mov edi, [0x00AABCE4]
lea edi, [edi + esi - 1] # bs_high = next_player_entity_index + next_enemy_entity_index - 1
bs_again:
cmp esi, edi
jge bs_done
lea ecx, [esi + edi]
shr ecx, 1
mov eax, [ecx * 4 + 0x00AAB2A0] # all_entities[ecx]
cmp [eax + 0x1C], dx
jge bs_not_less
lea esi, [ecx + 1]
jmp bs_again
bs_not_less:
mov edi, ecx
jmp bs_again
bs_done:
mov eax, [esi * 4 + 0x00AAB2A0] # all_entities[bs_low]
test eax, eax
je done
xor ecx, ecx
cmp [eax + 0x1C], dx
cmovne eax, ecx
done:
pop ecx
pop edx
pop edi
pop esi
@@ -1,5 +1,8 @@
# (uint16_t entity_id @ eax) -> TObjectV00b441c0* @ eax
# Preserves all registers except eax
.versions 59NJ 59NL
get_enemy_entity:
push esi
push edi
@@ -12,15 +15,15 @@ get_enemy_entity:
cmp edx, 0x4000
jge done
mov esi, [0x00AAE168] # bs_low = next_player_entity_index
mov edi, [0x00AAE164]
mov esi, [<VERS 0x00AABCE8 0x00AAE168>] # bs_low = next_player_entity_index
mov edi, [<VERS 0x00AABCE4 0x00AAE164>]
lea edi, [edi + esi - 1] # bs_high = next_player_entity_index + next_enemy_entity_index - 1
bs_again:
cmp esi, edi
jge bs_done
lea ecx, [esi + edi]
shr ecx, 1
mov eax, [ecx * 4 + 0x00AAD720] # all_entities[ecx]
mov eax, [ecx * 4 + <VERS 0x00AAB2A0 0x00AAD720>] # all_entities[ecx]
cmp [eax + 0x1C], dx
jge bs_not_less
lea esi, [ecx + 1]
@@ -30,7 +33,7 @@ bs_not_less:
jmp bs_again
bs_done:
mov eax, [esi * 4 + 0x00AAD720] # all_entities[bs_low]
mov eax, [esi * 4 + <VERS 0x00AAB2A0 0x00AAD720>] # all_entities[bs_low]
test eax, eax
je done
xor ecx, ecx
@@ -25,6 +25,8 @@
# XBOX_PAGE_NOCACHE = 0x00000200
# XBOX_PAGE_WRITECOMBINE = 0x00000400
.versions X86
start:
push ecx
push edx
@@ -6,6 +6,9 @@
# r6 = source data size
# Returns: number of bytes written to output buffer, or -1 on error
# Overwrites: r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
.versions PPC
prs_decompress__start:
# r3 = dest ptr (used as write ptr)
subi r3, r3, 1
@@ -0,0 +1,49 @@
# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
entry_ptr:
reloc0:
.offsetof start
.versions SH4
start:
mova r0, [address]
mov.l r0, [r0]
rets
mov.l r0, [r0]
.align 4
address:
.data 0
.versions PPC
start:
mflr r12
bl read
address:
.zero
read:
mflr r3
lwz r3, [r3]
lwz r3, [r3]
mtlr r12
blr
.versions X86
start:
call resume
address:
.data 0
resume:
pop eax
mov eax, [eax]
mov eax, [eax]
ret
@@ -1,13 +0,0 @@
entry_ptr:
reloc0:
.offsetof start
start:
mova r0, [address]
mov.l r0, [r0]
rets
mov.l r0, [r0]
.align 4
address:
.data 0
@@ -1,18 +0,0 @@
# This function is required for loading DOLs. If it's not present, newserv can't
# serve DOL files to GameCube clients.
entry_ptr:
reloc0:
.offsetof start
start:
mflr r12
bl read
address:
.zero
read:
mflr r3
lwz r3, [r3]
lwz r3, [r3]
mtlr r12
blr
@@ -1,13 +0,0 @@
entry_ptr:
reloc0:
.offsetof start
start:
call resume
address:
.data 0
resume:
pop eax
mov eax, [eax]
mov eax, [eax]
ret
@@ -1,3 +1,5 @@
.versions X86
entry_ptr:
reloc0:
.offsetof start
@@ -1,5 +1,6 @@
# This function is required for loading DOLs. If it's not present, newserv can't
# serve DOL files to GameCube clients.
# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
.versions PPC
entry_ptr:
reloc0:
@@ -16,12 +17,11 @@ dol_base_ptr:
.zero
get_current_addr:
mflr r31
# TODO: It'd be nice to be able to use an expression for the immediate value
# here - something like (dol_base_ptr - start), for example
# TODO: It'd be nice to be able to use an expression for the immediate value here - something like (dol_base_ptr -
# start), for example
subi r31, r31, 0x10 # r31 = base of data to copy to low memory (start label)
# If this code is not running from low memory (80001800-80003000), then copy
# it there and branch to it
# If this code is not running from low memory (80001800-80003000), then copy it there and branch to it
lis r3, 0x8000
ori r3, r3, 0x3000
cmp r31, r3
@@ -53,9 +53,8 @@ copy_code_to_low_memory__again:
run_dol:
lwz r30, [r31 + 0x10] # r30 = data base ptr
# Decompress the file first. If the compressed size is zero, then skip this
# step (the file is not compressed). The header consists of two fields:
# compressed size followed by decompressed size.
# Decompress the file first. If the compressed size is zero, then skip this step (the file is not compressed). The
# header consists of two fields: compressed size followed by decompressed size.
lwz r6, [r30]
cmplwi r6, 0
beq run_dol__not_compressed
@@ -70,9 +69,8 @@ run_dol__not_compressed:
addi r30, r30, 8
run_dol__decompressed:
# DOL files are very simple: they have up to 7 text sections, up to 11 data
# sections, and a BSS section and an entrypoint. No imports or other fancy
# things to do - we just have to move a bunch of bytes around.
# DOL files are very simple: they have up to 7 text sections, up to 11 data sections, and a BSS section and an
# entrypoint. No imports or other fancy things to do - we just have to move a bunch of bytes around.
mr r29, r30 # r29 = DOL header iterator
addi r28, r29, 0x48 # r28 = DOL header iterator end value
@@ -87,16 +85,15 @@ run_dol__move_section:
subi r4, r4, 1
add r5, r4, r5 # r5 = source end pointer
run_dol__move_section_data__again:
# TODO: We probably should implement memmove-like semantics here, in case the
# DOL loads at an unusually late address. This is probably very rare.
# TODO: We probably should implement memmove-like semantics here, in case the DOL loads at an unusually late address.
# This is probably very rare.
lbzu r0, [r4 + 1]
stbu [r3 + 1], r0
cmp r4, r5
bne run_dol__move_section_data__again
# Flush the data cache and invalidate the instruction cache after copying the
# section data. Technically we don't have to do this for data sections, but
# I'm lazy and it doesn't take too long.
# Flush the data cache and invalidate the instruction cache after copying the section data. Technically we don't have
# to do this for data sections, but I'm lazy and it doesn't take too long.
lwz r3, [r29 + 0x48] # r3 = dest address of section data
lwz r4, [r29 + 0x90] # r4 = size of section data
bl flush_cached_code_writes
@@ -1,20 +1,21 @@
# This function returns the game version, with values more specific than can be
# detected by the sub_version field in the various login commands (e.g. 93/9D).
# This function returns the game version, with values more specific than can be detected by the sub_version field in
# the various login commands (e.g. 9D/9E). We call this value specific_version in the codebase.
# The returned value has the format SSPPRRVV, where:
# S = version (31 = PSOv1, 32 = PSOv2)
# G = game (4F = PSO)
# R = region (45 = E, 4A = J, 50 = P)
# V = minor version (31 = NTE, 32 = 11/2000, 33 = 12/2000, 24 = 01/2001,
# 35 = 08/2001, 46 = not a prototype)
# This results in a 4-character ASCII-printable version code which encodes all
# of the above information. This value is called specific_version in the places
# where it's used by the server.
# The returned value has the format SSGGRRVV, where:
# S = 31 = PSOv1, 32 = PSOv2, 33 = PSOv3, 34 = Xbox, 35 = BB
# G = game (4F (O) = non-Ep3, 53 (S) = Ep3)
# R = region (45 (E), 4A (J), or 50 (P))
# V = minor version (meaning varies by major version)
# This results in a 4-character ASCII-printable version code which encodes all of the above information.
entry_ptr:
reloc0:
.offsetof start
.versions SH4
start:
mova r0, [data_start]
mov r1, r0
@@ -59,3 +60,46 @@ data_start:
.data 0x8C2E7CE0 # v2 EU
.data 0x324F5046 # 2OPF
.data 0x00000000 # end sentinel
.versions PPC
start:
lis r3, 0x8000
lwz r4, [r3]
# For Trial Editions, set the V field to 54; for other versions, set it to 0x30 | disc_version
rlwinm r0, r4, 8, 24, 31
cmplwi r0, 0x47 # Check if high byte of game ID is 'G'
beq not_trial
cmplwi r0, 0x44 # Check if high byte of game ID is 'D'
beq is_nte
li r3, 0
blr
is_nte:
li r3, 0x0054
b end_trial_check
not_trial:
lbz r3, [r3 + 7]
ori r3, r3, 0x0030
end_trial_check:
oris r3, r3, 0x3300 # Set high byte ('3')
rlwimi r3, r4, 8, 8, 23 # Set middle two bytes to last two bytes of game ID
blr
.versions X86
start:
.include GetVersionInfoXB
test eax, eax
jz version_not_found
mov eax, [eax]
ret
version_not_found:
mov eax, 0x344F0000
ret
@@ -1,39 +0,0 @@
# This function returns the game version, with values more specific than can be
# detected by the sub_version field in the various login commands (e.g. 9D/9E).
# The returned value has the format SSGGRRVV, where:
# S = 33 (which represents PSO GC)
# G = game (4F (O) = Ep1&2, 53 (S) = Ep3)
# R = region (45 (E), 4A (J), or 50 (P))
# V = minor version | 30 (30 = 1.0, 31 = 1.1, 32 = 1.2, etc.), or 54 for NTE
# This results in a 4-character ASCII-printable version code which encodes all
# of the above information. This value is called specific_version in the places
# where it's used by the server.
entry_ptr:
reloc0:
.offsetof start
start:
lis r3, 0x8000
lwz r4, [r3]
# For Trial Editions, set the V field to 54; for other versions, set it to
# 0x30 | disc_version
rlwinm r0, r4, 8, 24, 31
cmplwi r0, 0x47 # Check if high byte of game ID is 'G'
beq not_trial
cmplwi r0, 0x44 # Check if high byte of game ID is 'D'
beq is_nte
li r3, 0
blr
is_nte:
li r3, 0x0054
b end_trial_check
not_trial:
lbz r3, [r3 + 7]
ori r3, r3, 0x0030
end_trial_check:
oris r3, r3, 0x3300 # Set high byte ('3')
rlwimi r3, r4, 8, 8, 23 # Set middle two bytes to last two bytes of game ID
blr
@@ -1,26 +0,0 @@
# This function returns the game version, with values more specific than can be
# detected by the sub_version field in the various login commands (e.g. 9D/9E).
# The returned value has the format SSSSRRVV, where:
# S = 344F (which represents PSO Xbox)
# R = region (45 (E), 4A (J), or 50 (P))
# V = version (42 (B) for beta, 44 (D) for disc, 55 (U) for title update)
# This results in a 4-character ASCII-printable version code which encodes all
# of the above information. This value is called specific_version in the places
# where it's used by the server.
entry_ptr:
reloc0:
.offsetof start
start:
.include GetVersionInfoXB
test eax, eax
jz version_not_found
mov eax, [eax]
ret
version_not_found:
mov eax, 0x344F0000
ret
@@ -1,42 +0,0 @@
# This file defines the following function:
# write_address_of_code(
# const void* patch_code,
# size_t patch_code_size,
# void** ptr_addr);
# This function allocates memory for patch_code, copies patch_code to that
# memory, then writes the address of the allocated code at the specified
# pointer. The allocated memory is never freed.
# This function pops its arguments off the stack before returning.
write_call_to_code:
# [esp + 0x04] = code ptr
# [esp + 0x08] = code size
# [esp + 0x0C] = ptr addr
# Allocate memory for the copied code
mov ecx, [0x00AAB404]
push dword [esp + 0x08]
mov eax, 0x007A8A38
call eax # malloc7
test eax, eax
je done
# Copy the code to the newly-allocated memory
# eax = dest pointer (from malloc7 call above)
mov edx, [esp + 0x04] # edx = source pointer
mov ecx, [esp + 0x08] # ecx = source size
push ebx
memcpy_again:
dec ecx
mov bl, [edx + ecx] # Copy one byte from source to dest
mov [eax + ecx], bl
test ecx, ecx
jne memcpy_again
pop ebx
# Write the address
mov ecx, [esp + 0x0C]
mov [ecx], eax
done:
ret 0x0C
@@ -3,10 +3,11 @@
# const void* patch_code,
# size_t patch_code_size,
# void** ptr_addr);
# This function allocates memory for patch_code, copies patch_code to that
# memory, then writes the address of the allocated code at the specified
# pointer. The allocated memory is never freed.
# This function pops its arguments off the stack before returning.
# This function allocates memory for patch_code, copies patch_code to that memory, then writes the address of the
# allocated code at the specified pointer. The allocated memory is never freed. This function pops its arguments off
# the stack before returning.
.versions 59NJ 59NL
write_call_to_code:
# [esp + 0x04] = code ptr
@@ -14,9 +15,9 @@ write_call_to_code:
# [esp + 0x0C] = ptr addr
# Allocate memory for the copied code
mov ecx, [0x00AA8F84]
mov ecx, [<VERS 0x00AA8F84 0x00AAB404>]
push dword [esp + 0x08]
mov eax, 0x007A984C
mov eax, <VERS 0x007A984C 0x007A8A38>
call eax # malloc7
test eax, eax
je done
@@ -1,76 +0,0 @@
# This file defines the following function:
# write_call_to_code(
# const void* patch_code,
# size_t patch_code_size,
# void* call_opcode_address,
# ssize_t call_opcode_bytes);
# This function allocates memory for patch_code, copies patch_code to that
# memory, then writes a call or jmp opcode to call_opcode_address that calls
# the code in the allocated memory region. The allocated memory is never freed.
# call_opcode_bytes specifies how many bytes at the callsite should be
# overwritten. This value must be at least 5; the first 5 bytes are overwritten
# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
# If call_opcode_bytes is positive, a call opcode is written; if it's negative,
# a jmp opcode is written.
# This function pops its arguments off the stack before returning.
write_call_to_code:
# [esp + 0x04] = code ptr
# [esp + 0x08] = code size
# [esp + 0x0C] = jump callsite
# [esp + 0x10] = callsite size (if zero, write the address instead of a call)
# Allocate memory for the copied code
mov ecx, [0x00AA8F84]
push dword [esp + 0x08]
mov eax, 0x007A984C
call eax # malloc7
test eax, eax
je done
# Copy the code to the newly-allocated memory
# eax = dest pointer (from malloc7 call above)
mov edx, [esp + 0x04] # edx = source pointer
mov ecx, [esp + 0x08] # ecx = source size
push ebx
memcpy_again:
dec ecx
mov bl, [edx + ecx] # Copy one byte from source to dest
mov [eax + ecx], bl
test ecx, ecx
jne memcpy_again
pop ebx
mov edx, [esp + 0x0C] # edx = jump callsite
# If the callsite size is zero, just write the address directly
cmp dword [esp + 0x10], 0
jne write_call_or_jmp
mov [edx], eax
jmp done
# Write the call or jmp opcode
write_call_or_jmp:
lea ecx, [eax - 5]
sub ecx, edx # ecx = (dest code addr) - (jump callsite) - 5
cmp dword [esp + 0x10], 0
setl al
or al, 0xE8
mov [edx], al # Write E8 (call), or E9 (jmp) if size was negative
mov [edx + 1], ecx # Write delta
# Write as many nops after the call opcode as necessary
mov ecx, 5
mov eax, [esp + 0x10]
cmp eax, 0
jge write_nop_again
neg eax
write_nop_again:
cmp ecx, eax
jge done
mov byte [edx + ecx], 0x90
inc ecx
jmp write_nop_again
done:
ret 0x10
@@ -1,17 +1,16 @@
.versions 59NJ 59NL
# This file defines the following function:
# write_call_to_code(
# const void* patch_code,
# size_t patch_code_size,
# void* call_opcode_address,
# ssize_t call_opcode_bytes);
# This function allocates memory for patch_code, copies patch_code to that
# memory, then writes a call or jmp opcode to call_opcode_address that calls
# the code in the allocated memory region. The allocated memory is never freed.
# call_opcode_bytes specifies how many bytes at the callsite should be
# overwritten. This value must be at least 5; the first 5 bytes are overwritten
# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
# If call_opcode_bytes is positive, a call opcode is written; if it's negative,
# a jmp opcode is written.
# This function allocates memory for patch_code, copies patch_code to that memory, then writes a call or jmp opcode to
# call_opcode_address that calls the code in the allocated memory region. The allocated memory is never freed.
# call_opcode_bytes specifies how many bytes at the callsite should be overwritten. This value must be at least 5; the
# first 5 bytes are overwritten with the call/jmp opcode itself; the rest are overwritten with nop opcodes. If
# call_opcode_bytes is positive, a call opcode is written; if it's negative, a jmp opcode is written.
# This function pops its arguments off the stack before returning.
write_call_to_code:
@@ -21,9 +20,9 @@ write_call_to_code:
# [esp + 0x10] = callsite size (if zero, write the address instead of a call)
# Allocate memory for the copied code
mov ecx, [0x00AAB404]
mov ecx, [<VERS 0x00AA8F84 0x00AAB404>]
push dword [esp + 0x08]
mov eax, 0x007A8A38
mov eax, <VERS 0x007A984C 0x007A8A38>
call eax # malloc7
test eax, eax
je done
@@ -1,83 +0,0 @@
# This file defines the following function:
# void [/std] write_call_to_code(
# const void* patch_code @ [esp + 0x04],
# size_t patch_code_size @ [esp + 0x08],
# size_t call_count @ [esp + 0x0C],
# void* call_opcode_address @ [esp + 0x10],
# ssize_t call_opcode_bytes @ [esp + 0x14],
# ...);
# This function allocates memory for patch_code, copies patch_code to that
# memory, then writes a call or jmp opcode to call_opcode_address that calls
# the code in the allocated memory region. The allocated memory is never freed.
# call_opcode_bytes specifies how many bytes at the callsite should be
# overwritten. This value must be at least 5; the first 5 bytes are overwritten
# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
# This function pops its arguments off the stack before returning (including
# all the varargs).
write_call_to_code:
# [esp + 0x04] = code ptr
# [esp + 0x08] = code size
# [esp + 0x0C] = callsite count
# [esp + 0x10] = callsite address
# [esp + 0x14] = callsite size
# ... (further callsite address/size pairs)
# Allocate memory for the copied code
mov ecx, [0x00AA8F84]
push dword [esp + 0x08]
mov eax, 0x007A984C
call eax # malloc7
test eax, eax
je done
# Copy the code to the newly-allocated memory
# eax = dest pointer (from malloc7 call above)
mov edx, [esp + 0x04] # edx = source pointer
mov ecx, [esp + 0x08] # ecx = source size
push ebx
memcpy_again:
dec ecx
mov bl, [edx + ecx] # Copy one byte from source to dest
mov [eax + ecx], bl
test ecx, ecx
jne memcpy_again
pop ebx
# Write the call opcodes
xchg ebx, [esp + 0x0C] # Save ebx; get callsite count
mov [esp - 0x08], esi
mov [esp - 0x0C], eax
mov esi, 0x10 # Stack offset of first callsite pair
next_callsite:
mov edx, [esp + esi] # edx = jump callsite
lea ecx, [eax - 5]
sub ecx, edx # ecx = (dest code addr) - (jump callsite) - 5
mov byte [edx], 0xE8
mov [edx + 1], ecx # Write E8 (call) followed by delta
# Write as many nops after the call opcode as necessary
mov ecx, 5
mov eax, [esp + esi + 4]
write_nop_again:
cmp ecx, eax
jge this_callsite_done
mov byte [edx + ecx], 0x90
inc ecx
jmp write_nop_again
this_callsite_done:
mov eax, [esp - 0x0C]
add esi, 8
dec ebx
jnz next_callsite
mov ecx, esi
mov ebx, [esp + 0x0C]
mov esi, [esp - 0x08]
done:
mov eax, [esp]
add esp, ecx
jmp eax
@@ -1,83 +0,0 @@
# This file defines the following function:
# void [/std] write_call_to_code(
# const void* patch_code @ [esp + 0x04],
# size_t patch_code_size @ [esp + 0x08],
# size_t call_count @ [esp + 0x0C],
# void* call_opcode_address @ [esp + 0x10],
# ssize_t call_opcode_bytes @ [esp + 0x14],
# ...);
# This function allocates memory for patch_code, copies patch_code to that
# memory, then writes a call or jmp opcode to call_opcode_address that calls
# the code in the allocated memory region. The allocated memory is never freed.
# call_opcode_bytes specifies how many bytes at the callsite should be
# overwritten. This value must be at least 5; the first 5 bytes are overwritten
# with the call/jmp opcode itself; the rest are overwritten with nop opcodes.
# This function pops its arguments off the stack before returning (including
# all the varargs).
write_call_to_code:
# [esp + 0x04] = code ptr
# [esp + 0x08] = code size
# [esp + 0x0C] = callsite count
# [esp + 0x10] = callsite address
# [esp + 0x14] = callsite size
# ... (further callsite address/size pairs)
# Allocate memory for the copied code
mov ecx, [0x00AAB404]
push dword [esp + 0x08]
mov eax, 0x007A8A38
call eax # malloc7
test eax, eax
je done
# Copy the code to the newly-allocated memory
# eax = dest pointer (from malloc7 call above)
mov edx, [esp + 0x04] # edx = source pointer
mov ecx, [esp + 0x08] # ecx = source size
push ebx
memcpy_again:
dec ecx
mov bl, [edx + ecx] # Copy one byte from source to dest
mov [eax + ecx], bl
test ecx, ecx
jne memcpy_again
pop ebx
# Write the call opcodes
xchg ebx, [esp + 0x0C] # Save ebx; get callsite count
mov [esp - 0x08], esi
mov [esp - 0x0C], eax
mov esi, 0x10 # Stack offset of first callsite pair
next_callsite:
mov edx, [esp + esi] # edx = jump callsite
lea ecx, [eax - 5]
sub ecx, edx # ecx = (dest code addr) - (jump callsite) - 5
mov byte [edx], 0xE8
mov [edx + 1], ecx # Write E8 (call) followed by delta
# Write as many nops after the call opcode as necessary
mov ecx, 5
mov eax, [esp + esi + 4]
write_nop_again:
cmp ecx, eax
jge this_callsite_done
mov byte [edx + ecx], 0x90
inc ecx
jmp write_nop_again
this_callsite_done:
mov eax, [esp - 0x0C]
add esi, 8
dec ebx
jnz next_callsite
mov ecx, esi
mov ebx, [esp + 0x0C]
mov esi, [esp - 0x08]
done:
mov eax, [esp]
add esp, ecx
jmp eax
@@ -1,4 +1,91 @@
# This function has the same signature as WriteCallToCodeMulti-59NL.
# This file defines the following function:
# void [/std] write_call_to_code(
# const void* patch_code @ [esp + 0x04],
# size_t patch_code_size @ [esp + 0x08],
# size_t call_count @ [esp + 0x0C],
# void* call_opcode_address @ [esp + 0x10],
# ssize_t call_opcode_bytes @ [esp + 0x14],
# ...);
# This function allocates memory for patch_code, copies patch_code to that memory, then writes a call or jmp opcode to
# call_opcode_address that calls the code in the allocated memory region. The allocated memory is never freed.
# call_opcode_bytes specifies how many bytes at the callsite should be overwritten. This value must be at least 5; the
# first 5 bytes are overwritten with the call/jmp opcode itself; the rest are overwritten with nop opcodes. This
# function pops its arguments off the stack before returning (including all the varargs).
.versions 59NJ 59NL
write_call_to_code:
# [esp + 0x04] = code ptr
# [esp + 0x08] = code size
# [esp + 0x0C] = callsite count
# [esp + 0x10] = callsite address
# [esp + 0x14] = callsite size
# ... (further callsite address/size pairs)
# Allocate memory for the copied code
mov ecx, [<VERS 0x00AA8F84 0x00AAB404>]
push dword [esp + 0x08]
mov eax, <VERS 0x007A984C 0x007A8A38>
call eax # malloc7
test eax, eax
je done
# Copy the code to the newly-allocated memory
# eax = dest pointer (from malloc7 call above)
mov edx, [esp + 0x04] # edx = source pointer
mov ecx, [esp + 0x08] # ecx = source size
push ebx
memcpy_again:
dec ecx
mov bl, [edx + ecx] # Copy one byte from source to dest
mov [eax + ecx], bl
test ecx, ecx
jne memcpy_again
pop ebx
# Write the call opcodes
xchg ebx, [esp + 0x0C] # Save ebx; get callsite count
mov [esp - 0x08], esi
mov [esp - 0x0C], eax
mov esi, 0x10 # Stack offset of first callsite pair
next_callsite:
mov edx, [esp + esi] # edx = jump callsite
lea ecx, [eax - 5]
sub ecx, edx # ecx = (dest code addr) - (jump callsite) - 5
mov byte [edx], 0xE8
mov [edx + 1], ecx # Write E8 (call) followed by delta
# Write as many nops after the call opcode as necessary
mov ecx, 5
mov eax, [esp + esi + 4]
write_nop_again:
cmp ecx, eax
jge this_callsite_done
mov byte [edx + ecx], 0x90
inc ecx
jmp write_nop_again
this_callsite_done:
mov eax, [esp - 0x0C]
add esi, 8
dec ebx
jnz next_callsite
mov ecx, esi
mov ebx, [esp + 0x0C]
mov esi, [esp - 0x08]
done:
mov eax, [esp]
add esp, ecx
jmp eax
.versions 4OJB 4OJD 4OJU 4OED 4OEU 4OPD 4OPU
write_call_to_code:
.include GetVersionInfoXB
@@ -0,0 +1,151 @@
.versions SH4
mova r0, [first_patch_header]
mov r7, r0 # r7 = read ptr
xor r3, r3
dec r3
shl r3, 2 # r3 = 0xFFFFFFFC (mask for aligning r7)
apply_patch:
add r7, 3
and r7, r3 # r7 = (r7 + 3) & (~3) (align to 4-byte boundary)
mov.l r4, [r7]+ # r4 = dest addr
mov.l r5, [r7]+
add r5, r4 # r5 = dest end ptr (dest addr + size)
cmpeq r4, r5 # if (size == 0) return
bt done
again:
cmpeq r4, r5
bt apply_patch # if (r4 == r5) done with the patch; go to next header
mov.b r0, [r7]+
mov.b [r4], r0 # *(r4) = *(r7++);
bs again # r4++; continue
add r4, 1
done:
rets
nop
.align 4
first_patch_header:
.versions PPC
mflr r8
b get_patch_data_ptr
get_patch_data_ptr_ret:
mflr r7 # r7 = patch header
apply_patch:
addi r4, r7, 8 # r4 = start of patch data
lwz r3, [r4 - 8] # r3 = patch dest address
lwz r5, [r4 - 4] # r5 = patch data size
or r0, r3, r5
cmplwi r0, 0
mtlr r8
beqlr
add r7, r4, r5 # r7 = next patch header
.include CopyCode
b apply_patch
get_patch_data_ptr:
bl get_patch_data_ptr_ret
first_patch_header:
.versions 4OJB 4OJD 4OJU 4OED 4OEU 4OPD 4OPU
start:
.include GetVersionInfoXB
test eax, eax
jnz can_patch
ret
can_patch:
push esi
push edi
push ebx
mov edi, eax # edi = ptr to version info struct
jmp get_patch_data_ptr
get_patch_data_ptr_ret:
pop ebx # ebx = patch header
apply_next_patch:
cmp dword [ebx + 4], 0
jne copy_code_and_apply_again
pop ebx
pop edi
pop esi
mov eax, 1
ret
copy_code_and_apply_again:
push dword [ebx] # dest addr
mov ecx, [edi + 0x0C]
call [ecx] # MmQueryAddressProtect
mov esi, eax # esi = prev protection flags
push 4 # new protection flags
push dword [ebx + 4] # size
push dword [ebx] # base address
mov ecx, [edi + 0x08]
call [ecx] # MmSetAddressProtect
xor ecx, ecx # ecx = offset
mov edx, [ebx] # edx = dest addr
copy_next_byte:
mov al, [ebx + ecx + 8] # copy one byte to dest
mov [edx + ecx], al
inc ecx # offset++
cmp [ebx + 4], ecx # check if all bytes have been copied
jne copy_next_byte
push esi # new protection flags
push dword [ebx + 4] # size
push dword [ebx] # base address
lea ebx, [ebx + ecx + 8] # advance to next block
mov ecx, [edi + 0x08]
call [ecx] # MmSetAddressProtect
jmp apply_next_patch
get_patch_data_ptr:
call get_patch_data_ptr_ret
first_patch_header:
.versions 2OJW 2OJZ 59NJ 59NL
start:
push ebx
jmp get_patch_data_ptr
get_patch_data_ptr_ret:
pop ebx # ebx = patch header
apply_next_patch:
cmp dword [ebx + 4], 0
jne copy_code_and_apply_again
pop ebx
mov eax, 1
ret
copy_code_and_apply_again:
xor ecx, ecx # ecx = offset
mov edx, [ebx] # edx = dest addr
copy_next_byte:
mov al, [ebx + ecx + 8] # copy one byte to dest
mov [edx + ecx], al
inc ecx # offset++
cmp [ebx + 4], ecx # check if all bytes have been copied
jne copy_next_byte
lea ebx, [ebx + ecx + 8] # advance to next block
jmp apply_next_patch
get_patch_data_ptr:
call get_patch_data_ptr_ret
first_patch_header:
@@ -1,29 +0,0 @@
start:
push ebx
jmp get_patch_data_ptr
get_patch_data_ptr_ret:
pop ebx # ebx = patch header
apply_next_patch:
cmp dword [ebx + 4], 0
jne copy_code_and_apply_again
pop ebx
mov eax, 1
ret
copy_code_and_apply_again:
xor ecx, ecx # ecx = offset
mov edx, [ebx] # edx = dest addr
copy_next_byte:
mov al, [ebx + ecx + 8] # copy one byte to dest
mov [edx + ecx], al
inc ecx # offset++
cmp [ebx + 4], ecx # check if all bytes have been copied
jne copy_next_byte
lea ebx, [ebx + ecx + 8] # advance to next block
jmp apply_next_patch
get_patch_data_ptr:
call get_patch_data_ptr_ret
first_patch_header:
@@ -1,28 +0,0 @@
mova r0, [first_patch_header]
mov r7, r0 # r7 = read ptr
xor r3, r3
dec r3
shl r3, 2 # r3 = 0xFFFFFFFC (mask for aligning r7)
apply_patch:
add r7, 3
and r7, r3 # r7 = (r7 + 3) & (~3) (align to 4-byte boundary)
mov.l r4, [r7]+ # r4 = dest addr
mov.l r5, [r7]+
add r5, r4 # r5 = dest end ptr (dest addr + size)
cmpeq r4, r5 # if (size == 0) return
bt done
again:
cmpeq r4, r5
bt apply_patch # if (r4 == r5) done with the patch; go to next header
mov.b r0, [r7]+
mov.b [r4], r0 # *(r4) = *(r7++);
bs again # r4++; continue
add r4, 1
done:
rets
nop
.align 4
first_patch_header:
@@ -1,20 +0,0 @@
mflr r8
b get_patch_data_ptr
get_patch_data_ptr_ret:
mflr r7 # r7 = patch header
apply_patch:
addi r4, r7, 8 # r4 = start of patch data
lwz r3, [r4 - 8] # r3 = patch dest address
lwz r5, [r4 - 4] # r5 = patch data size
or r0, r3, r5
cmplwi r0, 0
mtlr r8
beqlr
add r7, r4, r5 # r7 = next patch header
.include CopyCode
b apply_patch
get_patch_data_ptr:
bl get_patch_data_ptr_ret
first_patch_header:
@@ -1,57 +0,0 @@
start:
.include GetVersionInfoXB
test eax, eax
jnz can_patch
ret
can_patch:
push esi
push edi
push ebx
mov edi, eax # edi = ptr to version info struct
jmp get_patch_data_ptr
get_patch_data_ptr_ret:
pop ebx # ebx = patch header
apply_next_patch:
cmp dword [ebx + 4], 0
jne copy_code_and_apply_again
pop ebx
pop edi
pop esi
mov eax, 1
ret
copy_code_and_apply_again:
push dword [ebx] # dest addr
mov ecx, [edi + 0x0C]
call [ecx] # MmQueryAddressProtect
mov esi, eax # esi = prev protection flags
push 4 # new protection flags
push dword [ebx + 4] # size
push dword [ebx] # base address
mov ecx, [edi + 0x08]
call [ecx] # MmSetAddressProtect
xor ecx, ecx # ecx = offset
mov edx, [ebx] # edx = dest addr
copy_next_byte:
mov al, [ebx + ecx + 8] # copy one byte to dest
mov [edx + ecx], al
inc ecx # offset++
cmp [ebx + 4], ecx # check if all bytes have been copied
jne copy_next_byte
push esi # new protection flags
push dword [ebx + 4] # size
push dword [ebx] # base address
lea ebx, [ebx + ecx + 8] # advance to next block
mov ecx, [edi + 0x08]
call [ecx] # MmSetAddressProtect
jmp apply_next_patch
get_patch_data_ptr:
call get_patch_data_ptr_ret
first_patch_header:
@@ -0,0 +1,193 @@
# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
# This is also the file I've chosen to document how to write code for newserv's functions subsystem. Client functions
# are assembly snippets written in the native language of the client, which can be sent to the client with the B2
# command. This is done at login time if the server administrator has enabled automatic patches in config.json or if
# the client has enabled certain patches in the Patches menu. Client functions can also be sent at any time with the
# $patch chat command, if they include .meta visibility (see below).
# This file is a general function (it does not appear in the Patches menu). General functions are used to implement
# various server operations; this one is used to write arbitrary data to the client's memory space. For example, to use
# this function to write the bytes 38 00 00 05 to the address 8010521C, send_function_call could be called like this:
# auto fn = s->client_functions->name_to_function.at("WriteMemoryGC");
# unordered_map<string, uint32_t> label_writes({{"dest_addr", 0x8010521C}, {"size", 4}});
# string suffix("\x38\x00\x00\x05", 4);
# send_function_call(
# c, // Client to send function call to
# fn, // The function's code
# label_writes, // Variables to pass in to the function's code
# suffix); // Data to append after the code (not all functions use this)
# The meanings of label_writes and suffix are described in the comments below.
# The .versions directive is required for all client functions that can be called by the server or the player. This
# directive specifies which architectures or specific versions of the game the client function is compatible with. The
# version tokens may be specific game versions (e.g. 3OE1, 59NL) or architectures (PPC, X86, or SH4); in the latter
# case, the source applies to all versions which use that architecture. All lines after a .versions directive apply
# only to the specified versions; this set of "active" versions can be changed with another .versions
# directive later in the file, thereby splitting the file into different sections that apply to different sets of
# versions. Any lines in the file the appear before the first .versions directive apply to all versions. After a
# .versions directive, expressions like "VERS value1 value2 ..." (but with <> instead of "") can be used to specialize
# the patch for each version. In a VERS expression, the number of values must match the number of versions given in the
# .versions directive, and the values must appear in the same order. This function is implemented on all versions and
# all architecture, so we specify all architectures here. Later on, the implementations for each architecture are
# segregated via further .versions directives.
.versions SH4 PPC X86
# This directive controls where the function appears. The values are (note that the quotes are required):
# visibility="hidden" (default): this function does not appear in the Patches menu and cannot be used via $patch
# visibility="cheat": this function doesn't appear in the Patches menu but can be used via $patch if cheat mode is on
# visibility="chat": this function doesn't appear in the Patches menu but can be used via $patch
# visibility="menu": this function appears in the Patches menu but can't be used via $patch
# visibility="all": this function appears in the Patches menu and can be used via $patch
# Note that if the client has $debug enabled, then all functions can be run via $patch regardless if this setting.
# .meta visibility="menu"
# This directive specifies what the function's internal name is. This is the name that can be used in config.json to
# require the patch for all clients, and is also the name used with the $patch command. If not specified, the
# function's internal name is the same as its filename without the .s extension.
# .meta key="WriteMemory"
# These directives tell newserv what to show to the player in the Patches menu. Neither of them is required; if the
# name is omitted, the filename is used instead. These have no real effect for this function (since .meta visibility is
# not used), so this is primarily for documentation purposes.
.meta name="Write memory"
.meta description="Writes data to any location in memory"
# When used for debugging purposes, it may be useful to see the value returned by the client function when run via the
# $patch chat command. This directive causes the server to tell you the return value in-game after running it.
# .meta show_return_value
# The entry_ptr label is required for all functions. It should generally point to a .offsetof directive that itself
# points to the actual entrypoint.
entry_ptr:
# All labels starting with reloc signify that the following PPC word (big-endian 32-bit value) is to be relocated at
# runtime. That is, when the code runs on the client, the PPC word will contain the actual memory address relative to
# the running code instead of the offset that it holds at assembly time. The entry_ptr label should almost always have
# a reloc label next to it.
reloc0:
.offsetof start
# Everything following this directive (until the next .versions directive) applies only to PowerPC architectures. When
# this function is compiled for other architectures, this section will be ignored.
.versions PPC
start:
mflr r12
bl get_block_ptr
mr r6, r3 # r6 = address of dest_addr label
copy_block:
lwz r3, [r6] # r3 = dest ptr
subi r3, r3, 1 # subtract 1 so we can use stbu
lwz r5, [r6 + 4] # r5 = size (bytes remaining)
add r5, r5, r3 # r5 = dest end ptr (last byte to be written)
addi r4, r6, 7 # r4 = src ptr (starting at -1 so we can use lbzu)
copy_block__again:
lbzu r0, [r4 + 1]
stbu [r3 + 1], r0
cmp r3, r5
bne copy_block__again
# Flush the data cache and clear the instruction cache at the written region
lwz r3, [r6] # r3 = dest ptr
lwz r4, [r6 + 4] # r4 = size
# A .include directive essentially pastes in the code from the referenced file. Here, we use the code from the file
# FlushCachedCode.inc.s. When compiling includes, newserv first looks in the same directory as the function's source,
# then looks in system/client-functions/System.
.include FlushCachedCode
# Return the address after the last byte written. The value returned in r3 from the function is sent back to the
# server in a B3 command. newserv uses the return value during DOL loading to know which section of the DOL file to
# send next, or to send the RunDOL function if all sections have been loaded.
lwz r3, [r6] # r3 = dest ptr
lwz r4, [r6 + 4] # r4 = size
add r3, r3, r4
mtlr r12
blr
get_block_ptr__ret:
mflr r3
mtlr r10
blr
get_block_ptr:
# We use a trick here to get the address of the dest_addr label: since bl puts the immediately-following address into
# the link register, we "call" get_block_ptr__ret and get the dest_addr pointer out of the LR. We then put r10 back
# into the LR so get_block_ptr__ret returns to the caller.
mflr r10
bl get_block_ptr__ret
.versions SH4
start:
mova r0, [dest_addr]
mov r4, r0
mov.l r0, [r4]
mov.l r5, [r4 + 4]
add r4, 8
again:
test r5, r5
bt done
mov.b r6, [r4]
mov.b [r0], r6
add r4, 1
add r0, 1
bs again
add r5, -1
done:
rets
nop
.align 4
.versions X86
start:
jmp get_block_ptr
get_block_ptr_ret:
xchg ebx, [esp]
mov eax, [ebx]
mov ecx, [ebx + 4]
add ebx, 8
again:
test ecx, ecx
jz done
mov dl, [ebx]
mov [eax], dl
inc ebx
inc eax
dec ecx
jmp again
done:
pop ebx
ret
get_block_ptr:
call get_block_ptr_ret
# This last section applies to all architectures, so we re-enable all versions again. This directive also disables the
# use of VERS tokens.
.all_versions
# These fields are filled in right before the command is sent to the client. Specifically, the label_writes argument to
# send_function_call is responsible for this. The label_writes argument is a map of label name to value, and
# send_function_call simply writes the given values after the given labels. This is a way to pass arbitrary arguments
# to a function at call time.
dest_addr:
.data 0
size:
.data 0
# Finally, we use the suffix argument to instruct send_function_call to append the data we want to write to memory
# immediately after the assembled code. (The data_to_write label here is for documentation purposes only; the suffix
# argument always appends data after the end of all the assembled code.)
data_to_write:
@@ -1,33 +0,0 @@
.meta name="Write memory"
.meta description="Writes data to any location in memory"
entry_ptr:
reloc0:
.offsetof start
start:
mova r0, [dest_addr]
mov r4, r0
mov.l r0, [r4]
mov.l r5, [r4 + 4]
add r4, 8
again:
test r5, r5
bt done
mov.b r6, [r4]
mov.b [r0], r6
add r4, 1
add r0, 1
bs again
add r5, -1
done:
rets
nop
.align 4
dest_addr:
.data 0
size:
.data 0
data_to_write:
@@ -1,117 +0,0 @@
# This function is required for loading DOLs. If it's not present, newserv can't serve DOL files to GameCube clients.
# This is also the file I've chosen to document how to write code for newserv's functions subsystem. There are three
# kinds of functions: includes, patches, and general functions.
# - General functions are not version-specific (usually) but are architecture-specific. This file, WriteMemoryGC, is a
# general function for all PowerPC versions of PSO, which means all GameCube versions. General functions are named
# like NAME.ARCH.s, where ARCH is sh4, ppc, or x86.
# - Includes are snippets of code that are intended to be used as part of other general functions and patches. Includes
# are named like NAME.ARCH.inc.s, where ARCH has the same meaning as above. These can be used with the .include
# directive; there is an example of this in the code below.
# - Patches are functions that are available to run upon client request. They can be made available in the Patches menu
# or via the $patch command. Patches should be named like PATCHNAME.VERS.patch.s, where VERS denotes which specific
# game version the patch is for. These version codes are listed in README.md, and directly correspond to values
# returned by the VersionDetect functions, also in this directory.
# For example, to use this function to write the bytes 38 00 00 05 to the address 8010521C, send_function_call could be
# called like this:
# auto fn = s->function_code_index->name_to_function.at("WriteMemoryGC");
# unordered_map<string, uint32_t> label_writes({{"dest_addr", 0x8010521C}, {"size", 4}});
# string suffix("\x38\x00\x00\x05", 4);
# send_function_call(
# c, // Client to send function call to
# fn, // The function's code
# label_writes, // Variables to pass in to the function's code
# suffix); // Data to append after the code (not all functions use this)
# The meanings of label_writes and suffix are described in the comments below.
# The .versions directive may be used in patches (but not in includes or general functions) and enables
# parameterization. If .version is used, then the patch may later use expressions like <VERS value1 value2 ...> to
# generate the same patch with different values for different game versions. In each <VERS> expression, the number of
# values must match the number of versions given in the .versions directive.
# .versions VRS1 VRS2 VRS3 ...
# These directives tell newserv what to show to the player in the Patches menu. Neither of them is required; if the
# name is omitted, the filename is used instead.
.meta name="Write memory"
.meta description="Writes data to any location in memory"
# To hide a patch from the Patches menu (so it can only be used with the $patch command), this directive can be used.
# This has no effect if used in includes or general functions.
# .meta hide_from_patches_menu
# When used for debugging purposes, it may be useful to see the value returned by the client function when run via the
# $patch chat command. This directive causes the server to tell you the return value in-game after running it.
# .meta show_return_value
# The entry_ptr label is required for all functions. It should generally point to a .offsetof directive that itself
# points to the actual entrypoint.
entry_ptr:
# All labels starting with reloc signify that the following PPC word (big-endian 32-bit value) is to be relocated at
# runtime. That is, when the code runs on the client, the PPC word will contain the actual memory address relative to
# the running code instead of the offset that it holds at assembly time. The entry_ptr label should almost always have
# a reloc label next to it.
reloc0:
.offsetof start
start:
mflr r12
bl get_block_ptr
mr r6, r3 # r6 = address of dest_addr label
copy_block:
lwz r3, [r6] # r3 = dest ptr
subi r3, r3, 1 # subtract 1 so we can use stbu
lwz r5, [r6 + 4] # r5 = size (bytes remaining)
add r5, r5, r3 # r5 = dest end ptr (last byte to be written)
addi r4, r6, 7 # r4 = src ptr (starting at -1 so we can use lbzu)
copy_block__again:
lbzu r0, [r4 + 1]
stbu [r3 + 1], r0
cmp r3, r5
bne copy_block__again
# Flush the data cache and clear the instruction cache at the written region
lwz r3, [r6] # r3 = dest ptr
lwz r4, [r6 + 4] # r4 = size
# A .include directive essentially pastes in the code from the referenced file. Here, we use the code from the file
# FlushCachedCode.inc.s. When compiling includes, newserv first looks in the same directory as the function's source,
# then looks in system/client-functions/System.
.include FlushCachedCode
# Return the address after the last byte written. The value returned in r3 from the function is sent back to the
# server in a B3 command. newserv uses the return value during DOL loading to know which section of the DOL file to
# send next, or to send the RunDOL function if all sections have been loaded.
lwz r3, [r6] # r3 = dest ptr
lwz r4, [r6 + 4] # r4 = size
add r3, r3, r4
mtlr r12
blr
get_block_ptr__ret:
mflr r3
mtlr r10
blr
get_block_ptr:
# We use a trick here to get the address of the dest_addr label: since bl puts the immediately-following address into
# the link register, we "call" get_block_ptr__ret and get the dest_addr pointer out of the LR. We then put r10 back
# into the LR so get_block_ptr__ret returns to the caller.
mflr r10
bl get_block_ptr__ret
# These fields are filled in right before the command is sent to the client. Specifically, the label_writes argument to
# send_function_call is responsible for this. The label_writes argument is a map of label name to value, and
# send_function_call simply writes the given values after the given labels. This is a way to pass arbitrary arguments
# to a function at call time.
dest_addr:
.zero
size:
.zero
# Finally, we use the suffix argument to instruct send_function_call to append the data we want to write to memory
# immediately after the assembled code. (The data_to_write label here is for documentation purposes only; the suffix
# argument always appends data after the end of all the assembled code.)
data_to_write:
@@ -1,37 +0,0 @@
.meta name="Write memory"
.meta description="Writes data to any location in memory"
entry_ptr:
reloc0:
.offsetof start
start:
jmp get_block_ptr
get_block_ptr_ret:
xchg ebx, [esp]
mov eax, [ebx]
mov ecx, [ebx + 4]
add ebx, 8
again:
test ecx, ecx
jz done
mov dl, [ebx]
mov [eax], dl
inc ebx
inc eax
dec ecx
jmp again
done:
pop ebx
ret
get_block_ptr:
call get_block_ptr_ret
dest_addr:
.data 0
size:
.data 0
data_to_write: