add DOL file loader
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
# This code flushes the data cache and invalidates the instruction cache for a
|
||||
# block of newly-written code in memory.
|
||||
# Arguments:
|
||||
# r3 = address of written code
|
||||
# r4 = number of bytes
|
||||
# Returns: nothing
|
||||
# Overwrites: r3, r4, r5
|
||||
lis r5, 0xFFFF
|
||||
ori r5, r5, 0xFFF1
|
||||
and r5, r5, r3
|
||||
subf r3, r5, r3
|
||||
add r4, r4, r3
|
||||
flush_cached_code_writes__again:
|
||||
dcbst r0, r5
|
||||
sync
|
||||
icbi r0, r5
|
||||
addic r5, r5, 8
|
||||
subic. r4, r4, 8
|
||||
bge flush_cached_code_writes__again
|
||||
isync
|
||||
@@ -0,0 +1,18 @@
|
||||
# This macro clears the data and instruction caches at the beginning of each
|
||||
# function. This is necessary because apparently some versions of PSO don't do
|
||||
# this correctly by themselves.
|
||||
|
||||
# This macro expects to be run immediately at the entrypoint (usually the start
|
||||
# label) for all functions. It returns the original return address in r12, and
|
||||
# the address of the start label in r11.
|
||||
mflr r12 # r12 = address to return to
|
||||
mfctr r3 # r3 = address of start label (this code is called via bctrl)
|
||||
addi r4, r3, 0x7C00 # r4 = end of relevant region
|
||||
InitClearCaches__next_cache_block:
|
||||
dcbst r0, r3
|
||||
sync
|
||||
icbi r0, r3
|
||||
addi r3, r3, 0x20
|
||||
cmpl r3, r4
|
||||
blt InitClearCaches__next_cache_block
|
||||
isync
|
||||
@@ -0,0 +1,21 @@
|
||||
# This function is required for loading DOLs. If it's not present, newserv can't
|
||||
# serve DOL files to GameCube clients.
|
||||
|
||||
newserv_index_E0:
|
||||
|
||||
entry_ptr:
|
||||
reloc0:
|
||||
.offsetof start
|
||||
|
||||
start:
|
||||
.include InitClearCaches
|
||||
|
||||
bl read
|
||||
address:
|
||||
.zero
|
||||
read:
|
||||
mflr r3
|
||||
lwz r3, [r3]
|
||||
lwz r3, [r3]
|
||||
mtlr r12
|
||||
blr
|
||||
@@ -0,0 +1,130 @@
|
||||
# This function is required for loading DOLs. If it's not present, newserv can't
|
||||
# serve DOL files to GameCube clients.
|
||||
|
||||
newserv_index_E2:
|
||||
|
||||
entry_ptr:
|
||||
reloc0:
|
||||
.offsetof start
|
||||
|
||||
start:
|
||||
.include InitClearCaches
|
||||
|
||||
disable_interrupts:
|
||||
mfmsr r3
|
||||
rlwinm r3, r3, 0, 17, 15
|
||||
mtmsr r3
|
||||
|
||||
bl get_current_addr
|
||||
dol_base_ptr:
|
||||
.zero
|
||||
get_current_addr:
|
||||
mflr r31
|
||||
# TODO: It'd be nice to be able to use an expression for the immediate value
|
||||
# here - something like (dol_base_ptr - start), for example
|
||||
subi r31, r31, 0x38 # r31 = base of data to copy to low memory (start label)
|
||||
|
||||
# If this code is not running from low memory (80001800-80003000), then copy
|
||||
# it there and branch to it
|
||||
lis r3, 0x8000
|
||||
ori r3, r3, 0x3000
|
||||
cmp r31, r3
|
||||
blt run_dol
|
||||
|
||||
copy_code_to_low_memory:
|
||||
bl get_end_ptr
|
||||
sub r30, r3, r31 # r30 = size of code to copy (for cache flushing later)
|
||||
subi r5, r3, 4 # r5 = end ptr
|
||||
subi r4, r31, 4
|
||||
lis r3, 0x8000
|
||||
ori r3, r3, 0x17FC
|
||||
copy_code_to_low_memory__again:
|
||||
lwzu r0, [r4 + 4]
|
||||
stwu [r3 + 4], r0
|
||||
cmp r4, r5
|
||||
bne copy_code_to_low_memory__again
|
||||
|
||||
# Flush the data cache and clear the instruction cache before running the
|
||||
# moved code
|
||||
lis r3, 0x8000
|
||||
ori r3, r3, 0x1800
|
||||
mr r4, r30
|
||||
mtlr r3
|
||||
b flush_cached_code_writes
|
||||
|
||||
|
||||
|
||||
run_dol:
|
||||
lwz r30, [r31 + 0x38] # r30 = DOL base ptr
|
||||
|
||||
# DOL files are very simple: they have up to 7 text sections, up to 11 data
|
||||
# sections, and a BSS section and an entrypoint. No imports or other fancy
|
||||
# things to do - we just have to move a bunch of bytes around.
|
||||
mr r29, r30 # r29 = DOL header iterator
|
||||
addi r28, r29, 0x48 # r28 = DOL header iterator end value
|
||||
|
||||
run_dol__move_section:
|
||||
lwz r4, [r29] # r4 = file offset of section data
|
||||
add r4, r4, r30 # r4 = address of section data
|
||||
lwz r3, [r29 + 0x48] # r3 = dest address of section data
|
||||
lwz r5, [r29 + 0x90] # r5 = number of bytes to move
|
||||
cmplwi r5, 0 # If size is 0, skip the section entirely
|
||||
beq skip_section
|
||||
subi r3, r3, 1
|
||||
subi r4, r4, 1
|
||||
add r5, r4, r5 # r5 = source end pointer
|
||||
run_dol__move_section_data__again:
|
||||
# TODO: We probably should implement memmove-like semantics here, in case the
|
||||
# DOL loads at an unusually late address. This is probably very rare.
|
||||
lbzu r0, [r4 + 1]
|
||||
stbu [r3 + 1], r0
|
||||
cmp r4, r5
|
||||
bne run_dol__move_section_data__again
|
||||
|
||||
# Flush the data cache and invalidate the instruction cache after copying the
|
||||
# section data. Technically we don't have to do this for data sections, but
|
||||
# I'm lazy and it doesn't take too long.
|
||||
lwz r3, [r29 + 0x48] # r3 = dest address of section data
|
||||
lwz r4, [r29 + 0x90] # r4 = size of section data
|
||||
bl flush_cached_code_writes
|
||||
|
||||
skip_section:
|
||||
# Move to the next section
|
||||
addi r29, r29, 4
|
||||
cmp r29, r28
|
||||
bne run_dol__move_section
|
||||
|
||||
run_dol__zero_bss:
|
||||
lwz r3, [r30 + 0xD8] # r3 = BSS address
|
||||
lwz r4, [r30 + 0xDC] # r4 = BSS size
|
||||
cmplwi r4, 0
|
||||
beq run_dol__skip_zero_bss
|
||||
add r4, r3, r4 # r4 = BSS end address
|
||||
subi r3, r3, 1
|
||||
li r0, 0
|
||||
run_dol__zero_bss__again:
|
||||
stbu [r3 + 1], r0
|
||||
cmp r3, r4
|
||||
bne run_dol__zero_bss__again
|
||||
run_dol__skip_zero_bss:
|
||||
|
||||
run_dol__go_to_entrypoint:
|
||||
lwz r0, [r30 + 0xE0] # r30 = entrypoint
|
||||
mtctr r0
|
||||
bctr
|
||||
|
||||
|
||||
|
||||
flush_cached_code_writes:
|
||||
.include FlushCachedCode
|
||||
blr
|
||||
|
||||
|
||||
|
||||
return_end_ptr:
|
||||
mflr r3
|
||||
bctr
|
||||
get_end_ptr:
|
||||
mflr r0
|
||||
mtctr r0
|
||||
bl return_end_ptr
|
||||
+67
-50
@@ -1,11 +1,17 @@
|
||||
# This example shows how to use newserv's send_function_call function for PSO
|
||||
# GameCube clients. This code writes a variable-length block of data to a
|
||||
# specified address in the client's memory.
|
||||
# This function is required for loading DOLs. If it's not present, newserv can't
|
||||
# serve DOL files to GameCube clients.
|
||||
|
||||
# For example, to write the bytes 38 00 00 05 to the address 8010521C,
|
||||
# send_function_call could be called like this:
|
||||
# This is also the file I've chosen to document how to write code for newserv's
|
||||
# functions subsystem. The code implemented in this file writes a
|
||||
# variable-length block of data to a specified address in the client's memory.
|
||||
# Note that WriteMemory is a general function that uses many of the subsystem's
|
||||
# features. If you're writing a patch (not a general function), you cannot use
|
||||
# the suffix or label_offsets features that are described here.
|
||||
|
||||
# For example, to use this function to write the bytes 38 00 00 05 to the
|
||||
# address 8010521C, send_function_call could be called like this:
|
||||
# auto fn = s->function_code_index->name_to_function.at("WriteMemory");
|
||||
# unordered_map<string, uint32_t label_writes(
|
||||
# unordered_map<string, uint32_t> label_writes(
|
||||
# {{"dest_addr", 0x8010521C}, {"size", 4}});
|
||||
# string suffix("\x38\x00\x00\x05", 4);
|
||||
# send_function_call(
|
||||
@@ -15,77 +21,88 @@
|
||||
# suffix); // Data to append after the code (not all functions use this)
|
||||
# The meanings of label_writes and suffix are described in the comments below.
|
||||
|
||||
# A label newserv_id_XX tells newserv what value to use in the flag field when
|
||||
# sending the B2 command. This is needed if the server needs to do something
|
||||
# when the B3 response is received.
|
||||
newserv_id_C0:
|
||||
# A label newserv_index_XX tells newserv what value to use in the flag field
|
||||
# when sending the B2 command. This is needed if the server needs to do
|
||||
# something when the B3 response is received. For GameCube functions, if
|
||||
# specified, the index must be in the range 01-FF. The DOL loading
|
||||
# functionality, which this function is a part of, uses indexes E0, E1, and E2,
|
||||
# but this function can also be used for other purposes.
|
||||
newserv_index_E1:
|
||||
|
||||
# The entry_ptr label is required. It should point to a .offsetof directive that
|
||||
# itself points to the actual entrypoint.
|
||||
# The entry_ptr label is required for all functions. It should point to a
|
||||
# .offsetof directive that itself points to the actual entrypoint.
|
||||
entry_ptr:
|
||||
# All labels starting with reloc signify that the following PPC word
|
||||
# (be_uint32_t) is to be relocated at runtime. That is, when the code is run,
|
||||
# the PPC word will contain the actual memory address relative to the running
|
||||
# code instead of the offset that it holds at assembly time. The entry_ptr label
|
||||
# should almost always have a reloc label next to it.
|
||||
# All labels starting with reloc signify that the following PPC word (big-endian
|
||||
# 32-bit value) is to be relocated at runtime. That is, when the code runs on
|
||||
# the client, the PPC word will contain the actual memory address relative to
|
||||
# the running code instead of the offset that it holds at assembly time. The
|
||||
# entry_ptr label should almost always have a reloc label next to it.
|
||||
reloc0:
|
||||
.offsetof start
|
||||
|
||||
start:
|
||||
# A .include directive essentially pastes in the code from the referenced
|
||||
# file. Here, we use the code from the file InitClearCaches.inc.s.
|
||||
# PSO GC doesn't properly clear the data and instruction caches when it
|
||||
# executes functions, so we use this include in all functions to do so. Since
|
||||
# all functions do this, this makes it safe to use more than one function in
|
||||
# each client's session.
|
||||
.include InitClearCaches
|
||||
|
||||
bl get_block_ptr
|
||||
mr r6, r3 # r6 = address of dest_addr label
|
||||
|
||||
copy_block:
|
||||
# r8 = address to return to (LR, from start label)
|
||||
mflr r6 # r6 = address of dest_addr label
|
||||
mtlr r8
|
||||
lwz r3, [r6] # r3 = dest ptr
|
||||
subi r3, r3, 1 # subtract 1 so we can use stbu
|
||||
lwz r5, [r6 + 4] # r5 = size (bytes remaining)
|
||||
add r5, r5, r3 # r5 = dest end ptr
|
||||
add r5, r5, r3 # r5 = dest end ptr (last byte to be written)
|
||||
addi r4, r6, 7 # r4 = src ptr (starting at -1 so we can use lbzu)
|
||||
|
||||
copy_block__again:
|
||||
lbzu r0, [r4 + 1]
|
||||
stbu [r3 + 1], r0
|
||||
cmp r3, r5
|
||||
bne copy_block__again
|
||||
|
||||
# Flush the data cache and clear the instruction cache at the written region
|
||||
lwz r3, [r6] # r3 = dest ptr
|
||||
lwz r4, [r6 + 4] # r4 = size
|
||||
.include FlushCachedCode
|
||||
|
||||
# Flush the data cache and clear the instruction cache at the written region
|
||||
lis r5, 0xFFFF
|
||||
ori r5, r5, 0xFFF1
|
||||
and r5, r5, r3
|
||||
subf r3, r5, r3
|
||||
add r4, r4, r3
|
||||
flush_cached_code_writes__again:
|
||||
dcbst r0, r5
|
||||
sync
|
||||
icbi r0, r5
|
||||
addic r5, r5, 8
|
||||
subic. r4, r4, 8
|
||||
bge flush_cached_code_writes__again
|
||||
isync
|
||||
|
||||
# Return 0 (this value appears in the B3 command)
|
||||
li r3, 0
|
||||
# Return the address after the last byte written. The value returned in r3
|
||||
# from the function is sent back to the server in a B3 command. newserv uses
|
||||
# the return value during DOL loading to know which section of the DOL file to
|
||||
# send next, or to send the RunDOL function if all sections have been loaded.
|
||||
lwz r3, [r6] # r3 = dest ptr
|
||||
lwz r4, [r6 + 4] # r4 = size
|
||||
add r3, r3, r4
|
||||
mtlr r12
|
||||
blr
|
||||
|
||||
start:
|
||||
get_block_ptr__ret:
|
||||
mflr r3
|
||||
mtlr r10
|
||||
blr
|
||||
get_block_ptr:
|
||||
# We use a trick here to get the address of the dest_addr label: since bl puts
|
||||
# the immediately-following address into the link register, we "call"
|
||||
# copy_block and get the dest_addr pointer out of the LR. We then put r8 back
|
||||
# into the LR so copy_block can return normally.
|
||||
mflr r8
|
||||
bl copy_block
|
||||
# get_block_ptr__ret and get the dest_addr pointer out of the LR. We then put
|
||||
# r10 back into the LR so get_block_ptr__ret returns to the caller.
|
||||
mflr r10
|
||||
bl get_block_ptr__ret
|
||||
|
||||
# These fields are filled in when the B2 command is generated. Specifically, the
|
||||
# label_writes argument to send_function_call is responsible for this.
|
||||
# These fields are filled in right before the command is sent to the client.
|
||||
# Specifically, the label_writes argument to send_function_call is responsible
|
||||
# for this. The label_writes argument is a map of label name to value, and
|
||||
# send_function_call simply writes the given values after the given labels. This
|
||||
# is a way to pass arbitrary arguments to a function at call time.
|
||||
dest_addr:
|
||||
.zero
|
||||
size:
|
||||
.zero
|
||||
|
||||
# The data to be written is appended here at B2 construction time via the suffix
|
||||
# argument to send_function_call. (This label is for documentation purposes
|
||||
# only; the suffix argument always appends data after the end of all the
|
||||
# assembled code.)
|
||||
# Finally, we use the suffix argument to instruct send_function_call to append
|
||||
# the data we want to write to memory immediately after the assembled code.
|
||||
# (The data_to_write label here is for documentation purposes only; the suffix
|
||||
# argument always appends data after the end of all the assembled code.)
|
||||
data_to_write:
|
||||
|
||||
Reference in New Issue
Block a user