add DOL file loader

This commit is contained in:
Martin Michelsen
2022-06-01 13:00:43 -07:00
parent 40aa08bd4f
commit 562bc4a40c
20 changed files with 948 additions and 264 deletions
+20
View File
@@ -0,0 +1,20 @@
# This code flushes the data cache and invalidates the instruction cache for a
# block of newly-written code in memory.
# Arguments:
# r3 = address of written code
# r4 = number of bytes
# Returns: nothing
# Overwrites: r3, r4, r5
lis r5, 0xFFFF
ori r5, r5, 0xFFF1
and r5, r5, r3
subf r3, r5, r3
add r4, r4, r3
flush_cached_code_writes__again:
dcbst r0, r5
sync
icbi r0, r5
addic r5, r5, 8
subic. r4, r4, 8
bge flush_cached_code_writes__again
isync
+18
View File
@@ -0,0 +1,18 @@
# This macro clears the data and instruction caches at the beginning of each
# function. This is necessary because apparently some versions of PSO don't do
# this correctly by themselves.
# This macro expects to be run immediately at the entrypoint (usually the start
# label) for all functions. It returns the original return address in r12, and
# the address of the start label in r11.
mflr r12 # r12 = address to return to
mfctr r3 # r3 = address of start label (this code is called via bctrl)
addi r4, r3, 0x7C00 # r4 = end of relevant region
InitClearCaches__next_cache_block:
dcbst r0, r3
sync
icbi r0, r3
addi r3, r3, 0x20
cmpl r3, r4
blt InitClearCaches__next_cache_block
isync
+21
View File
@@ -0,0 +1,21 @@
# This function is required for loading DOLs. If it's not present, newserv can't
# serve DOL files to GameCube clients.
newserv_index_E0:
entry_ptr:
reloc0:
.offsetof start
start:
.include InitClearCaches
bl read
address:
.zero
read:
mflr r3
lwz r3, [r3]
lwz r3, [r3]
mtlr r12
blr
+130
View File
@@ -0,0 +1,130 @@
# This function is required for loading DOLs. If it's not present, newserv can't
# serve DOL files to GameCube clients.
newserv_index_E2:
entry_ptr:
reloc0:
.offsetof start
start:
.include InitClearCaches
disable_interrupts:
mfmsr r3
rlwinm r3, r3, 0, 17, 15
mtmsr r3
bl get_current_addr
dol_base_ptr:
.zero
get_current_addr:
mflr r31
# TODO: It'd be nice to be able to use an expression for the immediate value
# here - something like (dol_base_ptr - start), for example
subi r31, r31, 0x38 # r31 = base of data to copy to low memory (start label)
# If this code is not running from low memory (80001800-80003000), then copy
# it there and branch to it
lis r3, 0x8000
ori r3, r3, 0x3000
cmp r31, r3
blt run_dol
copy_code_to_low_memory:
bl get_end_ptr
sub r30, r3, r31 # r30 = size of code to copy (for cache flushing later)
subi r5, r3, 4 # r5 = end ptr
subi r4, r31, 4
lis r3, 0x8000
ori r3, r3, 0x17FC
copy_code_to_low_memory__again:
lwzu r0, [r4 + 4]
stwu [r3 + 4], r0
cmp r4, r5
bne copy_code_to_low_memory__again
# Flush the data cache and clear the instruction cache before running the
# moved code
lis r3, 0x8000
ori r3, r3, 0x1800
mr r4, r30
mtlr r3
b flush_cached_code_writes
run_dol:
lwz r30, [r31 + 0x38] # r30 = DOL base ptr
# DOL files are very simple: they have up to 7 text sections, up to 11 data
# sections, and a BSS section and an entrypoint. No imports or other fancy
# things to do - we just have to move a bunch of bytes around.
mr r29, r30 # r29 = DOL header iterator
addi r28, r29, 0x48 # r28 = DOL header iterator end value
run_dol__move_section:
lwz r4, [r29] # r4 = file offset of section data
add r4, r4, r30 # r4 = address of section data
lwz r3, [r29 + 0x48] # r3 = dest address of section data
lwz r5, [r29 + 0x90] # r5 = number of bytes to move
cmplwi r5, 0 # If size is 0, skip the section entirely
beq skip_section
subi r3, r3, 1
subi r4, r4, 1
add r5, r4, r5 # r5 = source end pointer
run_dol__move_section_data__again:
# TODO: We probably should implement memmove-like semantics here, in case the
# DOL loads at an unusually late address. This is probably very rare.
lbzu r0, [r4 + 1]
stbu [r3 + 1], r0
cmp r4, r5
bne run_dol__move_section_data__again
# Flush the data cache and invalidate the instruction cache after copying the
# section data. Technically we don't have to do this for data sections, but
# I'm lazy and it doesn't take too long.
lwz r3, [r29 + 0x48] # r3 = dest address of section data
lwz r4, [r29 + 0x90] # r4 = size of section data
bl flush_cached_code_writes
skip_section:
# Move to the next section
addi r29, r29, 4
cmp r29, r28
bne run_dol__move_section
run_dol__zero_bss:
lwz r3, [r30 + 0xD8] # r3 = BSS address
lwz r4, [r30 + 0xDC] # r4 = BSS size
cmplwi r4, 0
beq run_dol__skip_zero_bss
add r4, r3, r4 # r4 = BSS end address
subi r3, r3, 1
li r0, 0
run_dol__zero_bss__again:
stbu [r3 + 1], r0
cmp r3, r4
bne run_dol__zero_bss__again
run_dol__skip_zero_bss:
run_dol__go_to_entrypoint:
lwz r0, [r30 + 0xE0] # r30 = entrypoint
mtctr r0
bctr
flush_cached_code_writes:
.include FlushCachedCode
blr
return_end_ptr:
mflr r3
bctr
get_end_ptr:
mflr r0
mtctr r0
bl return_end_ptr
+67 -50
View File
@@ -1,11 +1,17 @@
# This example shows how to use newserv's send_function_call function for PSO
# GameCube clients. This code writes a variable-length block of data to a
# specified address in the client's memory.
# This function is required for loading DOLs. If it's not present, newserv can't
# serve DOL files to GameCube clients.
# For example, to write the bytes 38 00 00 05 to the address 8010521C,
# send_function_call could be called like this:
# This is also the file I've chosen to document how to write code for newserv's
# functions subsystem. The code implemented in this file writes a
# variable-length block of data to a specified address in the client's memory.
# Note that WriteMemory is a general function that uses many of the subsystem's
# features. If you're writing a patch (not a general function), you cannot use
# the suffix or label_offsets features that are described here.
# For example, to use this function to write the bytes 38 00 00 05 to the
# address 8010521C, send_function_call could be called like this:
# auto fn = s->function_code_index->name_to_function.at("WriteMemory");
# unordered_map<string, uint32_t label_writes(
# unordered_map<string, uint32_t> label_writes(
# {{"dest_addr", 0x8010521C}, {"size", 4}});
# string suffix("\x38\x00\x00\x05", 4);
# send_function_call(
@@ -15,77 +21,88 @@
# suffix); // Data to append after the code (not all functions use this)
# The meanings of label_writes and suffix are described in the comments below.
# A label newserv_id_XX tells newserv what value to use in the flag field when
# sending the B2 command. This is needed if the server needs to do something
# when the B3 response is received.
newserv_id_C0:
# A label newserv_index_XX tells newserv what value to use in the flag field
# when sending the B2 command. This is needed if the server needs to do
# something when the B3 response is received. For GameCube functions, if
# specified, the index must be in the range 01-FF. The DOL loading
# functionality, which this function is a part of, uses indexes E0, E1, and E2,
# but this function can also be used for other purposes.
newserv_index_E1:
# The entry_ptr label is required. It should point to a .offsetof directive that
# itself points to the actual entrypoint.
# The entry_ptr label is required for all functions. It should point to a
# .offsetof directive that itself points to the actual entrypoint.
entry_ptr:
# All labels starting with reloc signify that the following PPC word
# (be_uint32_t) is to be relocated at runtime. That is, when the code is run,
# the PPC word will contain the actual memory address relative to the running
# code instead of the offset that it holds at assembly time. The entry_ptr label
# should almost always have a reloc label next to it.
# All labels starting with reloc signify that the following PPC word (big-endian
# 32-bit value) is to be relocated at runtime. That is, when the code runs on
# the client, the PPC word will contain the actual memory address relative to
# the running code instead of the offset that it holds at assembly time. The
# entry_ptr label should almost always have a reloc label next to it.
reloc0:
.offsetof start
start:
# A .include directive essentially pastes in the code from the referenced
# file. Here, we use the code from the file InitClearCaches.inc.s.
# PSO GC doesn't properly clear the data and instruction caches when it
# executes functions, so we use this include in all functions to do so. Since
# all functions do this, this makes it safe to use more than one function in
# each client's session.
.include InitClearCaches
bl get_block_ptr
mr r6, r3 # r6 = address of dest_addr label
copy_block:
# r8 = address to return to (LR, from start label)
mflr r6 # r6 = address of dest_addr label
mtlr r8
lwz r3, [r6] # r3 = dest ptr
subi r3, r3, 1 # subtract 1 so we can use stbu
lwz r5, [r6 + 4] # r5 = size (bytes remaining)
add r5, r5, r3 # r5 = dest end ptr
add r5, r5, r3 # r5 = dest end ptr (last byte to be written)
addi r4, r6, 7 # r4 = src ptr (starting at -1 so we can use lbzu)
copy_block__again:
lbzu r0, [r4 + 1]
stbu [r3 + 1], r0
cmp r3, r5
bne copy_block__again
# Flush the data cache and clear the instruction cache at the written region
lwz r3, [r6] # r3 = dest ptr
lwz r4, [r6 + 4] # r4 = size
.include FlushCachedCode
# Flush the data cache and clear the instruction cache at the written region
lis r5, 0xFFFF
ori r5, r5, 0xFFF1
and r5, r5, r3
subf r3, r5, r3
add r4, r4, r3
flush_cached_code_writes__again:
dcbst r0, r5
sync
icbi r0, r5
addic r5, r5, 8
subic. r4, r4, 8
bge flush_cached_code_writes__again
isync
# Return 0 (this value appears in the B3 command)
li r3, 0
# Return the address after the last byte written. The value returned in r3
# from the function is sent back to the server in a B3 command. newserv uses
# the return value during DOL loading to know which section of the DOL file to
# send next, or to send the RunDOL function if all sections have been loaded.
lwz r3, [r6] # r3 = dest ptr
lwz r4, [r6 + 4] # r4 = size
add r3, r3, r4
mtlr r12
blr
start:
get_block_ptr__ret:
mflr r3
mtlr r10
blr
get_block_ptr:
# We use a trick here to get the address of the dest_addr label: since bl puts
# the immediately-following address into the link register, we "call"
# copy_block and get the dest_addr pointer out of the LR. We then put r8 back
# into the LR so copy_block can return normally.
mflr r8
bl copy_block
# get_block_ptr__ret and get the dest_addr pointer out of the LR. We then put
# r10 back into the LR so get_block_ptr__ret returns to the caller.
mflr r10
bl get_block_ptr__ret
# These fields are filled in when the B2 command is generated. Specifically, the
# label_writes argument to send_function_call is responsible for this.
# These fields are filled in right before the command is sent to the client.
# Specifically, the label_writes argument to send_function_call is responsible
# for this. The label_writes argument is a map of label name to value, and
# send_function_call simply writes the given values after the given labels. This
# is a way to pass arbitrary arguments to a function at call time.
dest_addr:
.zero
size:
.zero
# The data to be written is appended here at B2 construction time via the suffix
# argument to send_function_call. (This label is for documentation purposes
# only; the suffix argument always appends data after the end of all the
# assembled code.)
# Finally, we use the suffix argument to instruct send_function_call to append
# the data we want to write to memory immediately after the assembled code.
# (The data_to_write label here is for documentation purposes only; the suffix
# argument always appends data after the end of all the assembled code.)
data_to_write: