mirror of
https://github.com/JHUAPL/CodeCut.git
synced 2026-01-09 13:28:06 -05:00
1356 lines
66 KiB
Python
Executable File
1356 lines
66 KiB
Python
Executable File
## Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
|
|
## (JHU/APL). All Rights Reserved.
|
|
#
|
|
## This material may be only be used, modified, or reproduced by or for
|
|
## the U.S. Government pursuant to the license rights granted under the
|
|
## clauses at DFARS 252.227-7013/7014 or FAR 52.227-14. For any other
|
|
## permission, please contact the Office of Technology Transfer at
|
|
## JHU/APL.
|
|
#
|
|
## NO WARRANTY, NO LIABILITY. THIS MATERIAL IS PROVIDED "AS IS." JHU/APL
|
|
## MAKES NO REPRESENTATION OR WARRANTY WITH RESPECT TO THE PERFORMANCE OF
|
|
## THE MATERIALS, INCLUDING THEIR SAFETY, EFFECTIVENESS, OR COMMERCIAL
|
|
## VIABILITY, AND DISCLAIMS ALL WARRANTIES IN THE MATERIAL, WHETHER
|
|
## EXPRESS OR IMPLIED, INCLUDING (BUT NOT LIMITED TO) ANY AND ALL IMPLIED
|
|
## WARRANTIES OF PERFORMANCE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
|
## PURPOSE, AND NON-INFRINGEMENT OF INTELLECTUAL PROPERTY OR OTHER THIRD
|
|
## PARTY RIGHTS. ANY USER OF THE MATERIAL ASSUMES THE ENTIRE RISK AND
|
|
## LIABILITY FOR USING THE MATERIAL. IN NO EVENT SHALL JHU/APL BE LIABLE
|
|
## TO ANY USER OF THE MATERIAL FOR ANY ACTUAL, INDIRECT, CONSEQUENTIAL,
|
|
## SPECIAL OR OTHER DAMAGES ARISING FROM THE USE OF, OR INABILITY TO USE,
|
|
## THE MATERIAL, INCLUDING, BUT NOT LIMITED TO, ANY DAMAGES FOR LOST
|
|
## PROFITS.
|
|
## HAVE A NICE DAY.
|
|
##
|
|
## This script takes the boundaries found by deepcut and outputs an object file for the module selected by the user
|
|
##
|
|
##@author
|
|
##@keybinding
|
|
##@menupath
|
|
##@toolbar
|
|
#@author
|
|
#@category MINDSIGHT
|
|
#@keybinding
|
|
#@menupath
|
|
#@toolbar
|
|
|
|
import sys, copy, os
|
|
import ghidra.app.script.GhidraScript
|
|
from ghidra.program.database.module import *
|
|
from ghidra.program.flatapi import *
|
|
from ghidra.app.services import *
|
|
from ghidra.program.model.mem import *
|
|
from ghidra.program.model.lang import *
|
|
from ghidra.program.model.pcode import *
|
|
from ghidra.program.model.util import *
|
|
from ghidra.program.model.reloc import *
|
|
from ghidra.program.model.data import *
|
|
from ghidra.program.model.block import *
|
|
from ghidra.program.model.symbol import *
|
|
from ghidra.program.model.scalar import *
|
|
from ghidra.program.model.listing import *
|
|
from ghidra.program.model.address import *
|
|
from ghidra.program.util import ProgramLocation
|
|
from ghidra.program.database.mem.FileBytes import *
|
|
from ghidra.app.util import *
|
|
import ghidra.app.util.bin.ByteProvider
|
|
import ghidra.util.exception.CancelledException
|
|
import ghidra.util.task.TaskMonitor
|
|
|
|
|
|
# Need this line so we can use pyelftools
|
|
script_path = parseFile(getSourceFile().getCanonicalPath()).getPath()
|
|
|
|
pyelftools_path = os.path.join(os.path.dirname(script_path), "pyelftools-0.28")
|
|
sys.path.append(pyelftools_path)
|
|
|
|
# Pyelf imports
|
|
from elftools.elf.elffile import ELFFile
|
|
from elftools.elf.enums import *
|
|
|
|
DEBUG = True
|
|
|
|
# Will hold the symbol table
|
|
symtab = None
|
|
# Will hold the string table
|
|
# The strtab and shstrtab being with \x00
|
|
strtab = []
|
|
# Will hold the section header string table
|
|
shstrtab = '\x00'
|
|
|
|
# Will hold all sections
|
|
sections = []
|
|
NUM_SECTIONS = 0 # Total number of sections
|
|
|
|
# Number of relocations
|
|
NUM_RELOCS = 0
|
|
# Size of relocation fields
|
|
RELOC_SIZE = 8
|
|
|
|
# relocations contains a nested dictionary containing details about the functions in the module
|
|
# It contains the name of all functions it called as well as the address those functions were called
|
|
relocations = {}
|
|
|
|
# Dictionary of all symbols in the module
|
|
# mapping is LOCAL: [list of local symbols], GLOBAL: [list of global symbols]
|
|
symbols = {}
|
|
symbols['LOCAL'] = []
|
|
symbols['GLOBAL'] = []
|
|
data_symbols = []
|
|
|
|
# These will take up an entry in the symbol table
|
|
section_symbols = ['null', '.text', '.rodata', '.data', '.bss']
|
|
|
|
required_sections = ['.text', '.rel.text', '.data', '.bss', '.symtab', '.strtab', '.shstrtab']
|
|
|
|
# Total number of symbols
|
|
total_symbols = len(section_symbols)
|
|
|
|
# The number of entries in the rodata section
|
|
rodata_entries = 0
|
|
|
|
bss_fragment = None
|
|
data_fragment = None
|
|
rodata_fragment = None
|
|
|
|
"""
|
|
Symbol Table Entry
|
|
typedef struct {
|
|
Elf32_Word st_name; (4 bytes)
|
|
Elf32_Addr st_value; (4 bytes)
|
|
Elf32_Word st_size; (4 bytes)
|
|
unsigned char st_info; (1 byte)
|
|
unsigned char st_other; (1 byte)
|
|
Elf32_Half st_shndx; (2 bytes)
|
|
} Elf32_Sym; (16 bytes total)
|
|
"""
|
|
SYM_SIZE = 16 # Size of symbols in symtab section
|
|
|
|
"""
|
|
Elf Header
|
|
typedef struct {
|
|
unsigned char e_ident[EI_NIDENT]; (16 bytes)
|
|
Elf32_Half e_type; (2 bytes)
|
|
Elf32_Half e_machine; (2 bytes)
|
|
Elf32_Word e_version; (4 bytes)
|
|
Elf32_Addr e_entry; (4 bytes)
|
|
Elf32_Off e_phoff; (4 bytes)
|
|
Elf32_Off e_shoff; (4 bytes)
|
|
Elf32_Word e_flags; (4 bytes)
|
|
Elf32_Half e_ehsize; (2 bytes)
|
|
Elf32_Half e_phentsize; (2 bytes)
|
|
Elf32_Half e_phnum; (2 bytes)
|
|
Elf32_Half e_shentsize; (2 bytes)
|
|
Elf32_Half e_shnum; (2 bytes)
|
|
Elf32_Half e_shstrndx; (2 bytes)
|
|
} Elf32_Ehdr; (52 bytes total)
|
|
"""
|
|
ELFHDRSZ = 52 # size of the ELF header
|
|
|
|
"""
|
|
typedef struct {
|
|
Elf32_Word sh_name; (4 bytes)
|
|
Elf32_Word sh_type; (4 bytes)
|
|
Elf32_Word sh_flags; (4 bytes)
|
|
Elf32_Addr sh_addr; (4 bytes)
|
|
Elf32_Off sh_offset; (4 bytes)
|
|
Elf32_Word sh_size; (4 bytes)
|
|
Elf32_Word sh_link; (4 bytes)
|
|
Elf32_Word sh_info; (4 bytes)
|
|
Elf32_Word sh_addralign; (4 bytes)
|
|
Elf32_Word sh_entsize; (4 bytes)
|
|
} Elf32_Shdr; (40 bytes total)
|
|
"""
|
|
SHSIZE = 40 # size of section headers
|
|
OFFSET = 0 # Offset into the file
|
|
moduleName = ''
|
|
elffile = None
|
|
minModuleAddress = None
|
|
maxModuleAddress = None
|
|
moduleBytes = bytearray()
|
|
rodata_bytes = bytearray()
|
|
mod = None
|
|
alignment = 4
|
|
|
|
# These sections are specific to an executable binary. Relocatable object files don't need these
|
|
exclude_sections = ['.init', '.fini', '.eh_frame', '.stab', '.debug', '.noinit', '.exidx', '.got',
|
|
'__', '.rel.ro', 'plt', 'dyn']
|
|
|
|
|
|
def SelectModule(module=""):
|
|
global moduleName, minModuleAddress, maxModuleAddress, prog, mod, ns
|
|
|
|
# Check if there is a program open
|
|
if (currentProgram == None):
|
|
popup("There is no open program")
|
|
return False
|
|
|
|
# Get Program
|
|
prog = currentProgram
|
|
|
|
# Get the symbol table
|
|
st = prog.getSymbolTable()
|
|
SymbolIterator = st.getAllSymbols(True)
|
|
if module == "":
|
|
Modulelist = {}
|
|
# This loop attempts to get all modules (namespaces) within this program
|
|
while(SymbolIterator.hasNext()):
|
|
sym = SymbolIterator.next()
|
|
ns = sym.getParentNamespace()
|
|
# Want to get any namespace that isn't global or external
|
|
# These are likely namespaces that were created by deepcut
|
|
if (ns.getName() == u"Global") or (ns.getName() == u"<EXTERNAL>"):
|
|
continue
|
|
if (ns not in Modulelist):
|
|
Modulelist[ns.getName()] = ns
|
|
# There weren't any namespaces found
|
|
if not Modulelist:
|
|
popup("There doesn't seem to be any modules created by DeepCut (Namespaces is empty?).\nPlease run the DeepCut Analyzer before running this script.\nFor additional fidelity run GuessModuleNames from the CodeCut GUI.")
|
|
return False
|
|
else:
|
|
# Ask user for module to export as object file
|
|
Modulechoice = askChoice("Select a Module", getCategory(), Modulelist.keys(), None);
|
|
|
|
print(Modulechoice + " was chosen!\n")
|
|
moduleName = Modulechoice
|
|
|
|
# Get min and max address of selected namespace
|
|
# getBody() returns an AddressSetView which can be used to get the min and max address of the namespace
|
|
mod = Modulelist[Modulechoice]
|
|
asv = mod.getBody()
|
|
|
|
else:
|
|
moduleName = module
|
|
mod = st.getNamespace(module, prog.getGlobalNamespace())
|
|
if mod:
|
|
asv = mod.getBody()
|
|
println("Module: %s" % mod.getName())
|
|
else:
|
|
popup("Something went wrong. %s doesn't seem to be a valid module..." % mod.getName())
|
|
return False
|
|
|
|
# Get the minimum and maximum address for the module
|
|
minModuleAddress = asv.getMinAddress()
|
|
maxModuleAddress = asv.getMaxAddress()
|
|
|
|
# This module is not valid
|
|
if minModuleAddress == None or maxModuleAddress == None or (minModuleAddress == maxModuleAddress):
|
|
popup("The module you selected is invalid. This could be the min and/or max Address do not exist or the min and max address of the module are equal to each other. Please select another module")
|
|
return False
|
|
else:
|
|
newMax = maxModuleAddress
|
|
# It's possible that the max module address does not include the
|
|
# entirety of the module. This is meant to adjust the max boundary
|
|
# so every byte within the module is accounted for.
|
|
while not getFunctionAt(newMax.add(1)):
|
|
maxModuleAddress = maxModuleAddress.add(1)
|
|
newMax = maxModuleAddress
|
|
|
|
|
|
println("Min module address: %s" % (str(minModuleAddress)))
|
|
println("Max module address; %s" % (str(maxModuleAddress)))
|
|
return True
|
|
|
|
|
|
def get_symbols():
|
|
global minModuleAddress, maxModuleAddress, mod, symbols, bss_fragment, data_fragment, rodata_fragment, rodata_entries
|
|
# Get the symbol table
|
|
st = prog.getSymbolTable()
|
|
listing = prog.getListing()
|
|
bss_fragment = listing.getFragment("Program Tree", ".bss")
|
|
data_fragment = listing.getFragment("Program Tree", ".data")
|
|
rodata_fragment = listing.getFragment("Program Tree", ".rodata")
|
|
|
|
definedData = listing.getDefinedData(rodata_fragment.getMinAddress(), True)
|
|
|
|
|
|
# Grab all symbols
|
|
SymbolIterator = st.getAllSymbols(True)
|
|
# This loop grabs all symbols referenced by the module, both internal symbols and external symbols
|
|
# External symbols are symbols that are referenced by an internal function but the address is
|
|
# outside the boundary of the module. For example, if the min and max address is 800000-900000,
|
|
# at some point a function within the module referenced a symbol whose address is greater than 900000
|
|
# or less than 800000. Internal symbols are those whose address fall within the boundary of the
|
|
# min and max address.
|
|
while SymbolIterator.hasNext():
|
|
sym = SymbolIterator.next()
|
|
|
|
# TODO: Make this more elegant
|
|
if "_init" in sym.getName() or "_fini" in sym.getName() or "_start" in sym.getName():
|
|
continue
|
|
# We only want ones that were imported (symbols in the .bss_fragment section seem to land in this category)
|
|
if (sym.getSource().getDisplayString() == 'Imported') or (sym.getSource().getDisplayString() == 'User Defined'):
|
|
# Check if symbol has references
|
|
if sym.hasMultipleReferences() or sym.hasReferences():
|
|
symRef = sym.getReferences()
|
|
for ref in symRef:
|
|
# Are those references in the module?
|
|
if isInternalSymbol(ref):
|
|
# Is this symbol in the data or bss section? If yes it's a local symbol
|
|
if (bss_fragment.contains(sym.getAddress()) or data_fragment.contains(sym.getAddress())) and \
|
|
sym not in symbols['LOCAL']:
|
|
println("Adding symbol: %s Address: %s Reference: %s" % (sym.getName(), sym.getAddress(), ref.getFromAddress()))
|
|
symbols['LOCAL'].append(sym)
|
|
if sym.getSymbolType().toString() == 'Function' and \
|
|
sym not in symbols['GLOBAL']:
|
|
symbols['GLOBAL'].append(sym)
|
|
# The symbol is in the bss or data section and didn't have any references but it's address is within the module boundaries
|
|
if (bss_fragment.contains(sym.getAddress()) or data_fragment.contains(sym.getAddress())) and \
|
|
sym not in symbols['LOCAL'] and \
|
|
(isInternalSymbol(sym)):
|
|
println("Adding symbol: %s Address: %s" % (sym.getName(), sym.getAddress()))
|
|
symbols['LOCAL'].append(sym)
|
|
# The symbol didn't have any references
|
|
if sym not in symbols['GLOBAL']:
|
|
if isInternalSymbol(sym):
|
|
symbols['GLOBAL'].append(sym)
|
|
else:
|
|
continue
|
|
|
|
# Defined strings need to be relocated but they aren't necessarily symbols
|
|
# We add them to our symbol dictionary here but we will need to subtract the number of symbols we add
|
|
# here to the total number of symbols. Additionally, when writing the symtab section
|
|
# we'll need to ensure we don't write these symbols.
|
|
for dat in definedData:
|
|
refs = dat.getReferenceIteratorTo()
|
|
if rodata_fragment.contains(dat.getAddress()):
|
|
for ref in refs:
|
|
if isInternalSymbol(ref):
|
|
symbols['LOCAL'].append(dat)
|
|
rodata_entries += 1
|
|
break
|
|
else:
|
|
continue
|
|
|
|
print("Global and local symbols: ", symbols)
|
|
maxModAddress = update_functions()
|
|
# Updating maxModuleAddress just in case it changed in update_functions
|
|
if maxModAddress.subtract(maxModuleAddress) > 0:
|
|
maxModuleAddress = maxModAddress
|
|
println("Minimum Module Address: %s" % minModuleAddress)
|
|
println("Maximum Module Address: %s" % maxModuleAddress)
|
|
|
|
|
|
"""
|
|
This function checks if the symbol is internal to the current module
|
|
@param symbol: The symbol we want to determine is internal or external
|
|
@return: True if the symbol is internal; false otherwise
|
|
"""
|
|
def isInternalSymbol(symbol):
|
|
|
|
# Check if reference is within module
|
|
if "reference" in str(type(symbol)).lower():
|
|
if symbol.getFromAddress().compareTo(minModuleAddress) >= 0 and \
|
|
symbol.getFromAddress().compareTo(maxModuleAddress) <= 0:
|
|
return True
|
|
else:
|
|
return False
|
|
# Check if symbol is within module
|
|
elif symbol.getAddress().compareTo(minModuleAddress) >= 0 and \
|
|
symbol.getAddress().compareTo(maxModuleAddress) <= 0:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def update_functions():
|
|
# This is the current upward bounds of the module
|
|
# It's possible this will not change so we save it here
|
|
newMaxAddress = maxModuleAddress
|
|
for sym in symbols['GLOBAL']:
|
|
if isInternalSymbol(sym):
|
|
if sym.getSymbolType().toString() == 'Function':
|
|
println("Symbol name: %s\n" % sym.getName())
|
|
func = getFunctionAt(sym.getAddress())
|
|
funcBody = func.getBody()
|
|
maxAddr = funcBody.getMaxAddress()
|
|
println("Max address: %s" % (str(maxAddr)))
|
|
|
|
newMax = maxAddr
|
|
# Currently, Ghidra does not include all bytes for function that include data elements
|
|
# We're accounting for this by checking if the next byte is a new function
|
|
# If it is a new function then Ghidra already has the proper function size. If not,
|
|
# than Ghidra will misrepresent the size of the function leading to incorrect processing
|
|
# later on.
|
|
while not getFunctionAt(newMax.add(1)):
|
|
maxAddr = maxAddr.add(1)
|
|
newMax = maxAddr
|
|
println("newMax: %s" % (str(newMax)))
|
|
|
|
# The previos loop resulted in a new maximum address for the current function
|
|
# Let's update the function to include the new bytes
|
|
if newMax.subtract(funcBody.getMaxAddress()) > 0:
|
|
println("Need to update the address set for this function\n")
|
|
# Build an address set using the entry point for the function
|
|
# and the new max address
|
|
newAddrSet = AddressSet(sym.getAddress(), newMax)
|
|
# Modify the body to include the new address set
|
|
func.setBody(newAddrSet)
|
|
|
|
# println("newMax - newMaxAddress: %d" % newMax.subtract(newMaxAddress))
|
|
# This check probably isn't completely necessary. Before entering this function we already
|
|
# modified the boundary for the module. However, there is still a chance that
|
|
# there are some new bytes not accounted for. We check if the new address is greater
|
|
# than the address we already have for the module boundary. If it is greater than we
|
|
# update the maximum address, otherwise we continue on
|
|
if newMax.subtract(newMaxAddress) > 0:
|
|
println("Updating module boundary!")
|
|
newMaxAddress = newMax
|
|
else:
|
|
continue
|
|
|
|
else:
|
|
println("This function is fine")
|
|
return newMaxAddress
|
|
|
|
"""
|
|
Taken from https://stackoverflow.com/a/20793663
|
|
Python 2.7 does not have a to_bytes method for integers :(
|
|
@param n: The number to convert to bytes
|
|
@param length: How many bits should be used to represent the integer
|
|
@param endianess: What endianess should be used? Supports little and big endian
|
|
"""
|
|
def to_bytes(n, length, endianess='little'):
|
|
h = '%x' % n
|
|
s = '{:0{}x}'.format(n, length*2).decode('hex')
|
|
return s if endianess == 'big' else s[::-1]
|
|
|
|
"""
|
|
This function writes the ELF header to a file object. A lot of the entries in the ELF header
|
|
are unchanged or only slightly modified. Each entry is commented out and any changes are
|
|
called out with a reason for the change
|
|
@param obj_file: The new object file being created.
|
|
@param elffile: The elffile object of the current ELF file being analyzed.
|
|
@var ENUM: The ENUM_X_YYYY are enumerations used by the pyelftools library.
|
|
"""
|
|
def write_elf_header(obj_file):
|
|
|
|
println("Writing the ELF Header!")
|
|
# Write the raw e_ident field to the file
|
|
# This should not change in any considerable way so just copy the field
|
|
# from the original ELF executable
|
|
obj_file.write(elffile.e_ident_raw)
|
|
|
|
# The next byte is going to determine the type of file (i.e. executable or relocatable)
|
|
# The pyelftools library enumerates lots of useful ELF values
|
|
obj_file.write(to_bytes(ENUM_E_TYPE['ET_REL'], 2, 'little'))
|
|
|
|
|
|
# Now let's write the machine type
|
|
obj_file.write(to_bytes(ENUM_E_MACHINE[elffile.header['e_machine']], 2, 'little'))
|
|
|
|
# Write the version
|
|
obj_file.write(to_bytes(ENUM_E_VERSION['EV_CURRENT'], 4, 'little'))
|
|
|
|
# Write the entry point
|
|
# This will be zero since it's an object file
|
|
e_entry = 0
|
|
obj_file.write(to_bytes(e_entry, 4, 'little'))
|
|
|
|
# Write start of program headers:
|
|
# This will be zero since an object file does not have program headers
|
|
e_phoff = 0
|
|
obj_file.write(to_bytes(e_phoff, 4, 'little'))
|
|
|
|
# Start of section headers
|
|
# This member holds the section header table's file offset in bytes
|
|
# For simplicity, I place the section header table immediately after the
|
|
# end of the last section written
|
|
e_shoff = OFFSET
|
|
println("Section Header Offset: %d" % e_shoff)
|
|
obj_file.write(to_bytes(e_shoff, 4, 'little'))
|
|
|
|
# Write flags
|
|
obj_file.write(to_bytes(elffile.header['e_flags'], 4, 'little'))
|
|
|
|
# Write Header Size
|
|
# The header size should always be the same
|
|
obj_file.write(to_bytes(elffile.header['e_ehsize'], 2, 'little'))
|
|
|
|
# Write program header size
|
|
e_phentsize = 0 # No program headers in a relocatable object file
|
|
obj_file.write(to_bytes(e_phentsize, 2, 'little'))
|
|
|
|
# Write Number of program headers
|
|
e_phnum = 0
|
|
obj_file.write(to_bytes(e_phnum, 2, 'little'))
|
|
|
|
# Write size of section headers
|
|
obj_file.write(to_bytes(elffile.header['e_shentsize'], 2, 'little'))
|
|
|
|
# Write number of section headers
|
|
e_shnum = NUM_SECTIONS
|
|
println("Number of section: %d" % NUM_SECTIONS)
|
|
obj_file.write(to_bytes(e_shnum, 2, 'little'))
|
|
|
|
# Write Section header string table index
|
|
e_shstrndx = [i for i,v in enumerate(sections) if v.name == '.shstrtab'][0]
|
|
obj_file.write(to_bytes(e_shstrndx, 2, 'little'))
|
|
|
|
|
|
"""
|
|
This function writes the section header data to the newly created object file
|
|
It takes a section that may have been modified.
|
|
@param obj_file: A file descriptor representing the new object file being created
|
|
@param section: A section in the ELF file
|
|
"""
|
|
def write_section_headers(obj_file, section):
|
|
|
|
println("Writing section header: %s" % section.name)
|
|
# Write sh_name
|
|
obj_file.write(to_bytes(section.header['sh_name'], 4, 'little'))
|
|
|
|
# Write sh_type
|
|
obj_file.write(to_bytes(ENUM_SH_TYPE_ARM[section.header['sh_type']], 4, 'little'))
|
|
|
|
# Write sh_flags
|
|
obj_file.write(to_bytes(section.header['sh_flags'], 4, 'little'))
|
|
|
|
# Write sh_addr
|
|
obj_file.write(to_bytes(section.header['sh_addr'], 4, 'little'))
|
|
|
|
# Write sh_offset
|
|
obj_file.write(to_bytes(section.header['sh_offset'], 4, 'little'))
|
|
|
|
# Write sh_size
|
|
obj_file.write(to_bytes(section.header['sh_size'], 4, 'little'))
|
|
|
|
# Write sh_link
|
|
obj_file.write(to_bytes(section.header['sh_link'], 4, 'little'))
|
|
|
|
# Write sh_info
|
|
obj_file.write(to_bytes(section.header['sh_info'], 4, 'little'))
|
|
|
|
# Write sh_addralign
|
|
obj_file.write(to_bytes(section.header['sh_addralign'], 4, 'little'))
|
|
|
|
# Write sh_entsize
|
|
obj_file.write(to_bytes(section.header['sh_entsize'], 4, 'little'))
|
|
|
|
"""
|
|
This function gets the size of the data, rodata, or bss sections. Since these sections size depend
|
|
on the presence of variables or defined strings, it's possible the size will be either zero or
|
|
the sum of all data that resides in their repsective sections
|
|
@param section: The section we want to calculate the size
|
|
@return: The size of the section or zero if it has no size
|
|
"""
|
|
def get_section_size(section):
|
|
global rodata_bytes
|
|
if section == '.bss':
|
|
bss_section_size = 0
|
|
for sym in symbols['LOCAL']:
|
|
if bss_fragment.contains(sym.getAddress()):
|
|
dat = getDataAt(sym.getAddress())
|
|
bss_section_size += dat.getLength()
|
|
println("bss_section_size: %d" % bss_section_size)
|
|
return bss_section_size
|
|
elif section == '.data':
|
|
data_section_size = 0
|
|
for sym in symbols['LOCAL']:
|
|
if data_fragment.contains(sym.getAddress()):
|
|
println("Name: %s" % sym)
|
|
dat = getDataAt(sym.getAddress())
|
|
if dat.getLength() % alignment != 0:
|
|
diff = (alignment - (dat.getLength() % alignment))
|
|
size = diff + dat.getLength()
|
|
else:
|
|
size = dat.getLength()
|
|
data_section_size += size
|
|
println("data_section_size: %d" % data_section_size)
|
|
return data_section_size
|
|
else:
|
|
return 0
|
|
|
|
"""
|
|
This function performs any necessary modifications to the section headers. Only
|
|
the .text, .data, .rodata, .bss, .strtab, .shstrtab, and .symtab require specific
|
|
modifications. This is because these sections are likely to change from the executable.
|
|
For example, the .text section is going to be MUCH smaller in the unlinked object file compared
|
|
to the full executable file because there's far less code. This is similar for the other sections.
|
|
The other section headers, only the sh_offset and sh_name field will be changed since we copy the
|
|
data from those sections directly to our new unlinked object file.
|
|
Here is a description of the fields and how they will be modified and how they are created:
|
|
sh_name
|
|
This field is an index into the string table
|
|
section['sh_name'] = strtab.find(section.name)
|
|
|
|
|
|
sh_size
|
|
This is the size of the section. For the .text section it's simply the size of the moduleBytes variable.
|
|
For .data and .bss the size is a sum of the size of all variables in the respective section. For example,
|
|
if the .data section has two integers and a char, the size of this section would be 9 (sizeof(int) + sizeof(int) + sizeof(char))
|
|
The .rodata section is simply the length of all strings in the section. For example, if the .rodata section contained only one
|
|
string ("I am in the .rodata section"), the size would be len("I am in the .rodata section")
|
|
Currently, the size for these sections are set to zero, but an update will come to provide accurate sizes if necessary. The
|
|
.rel.text section's size is (number of relocations * size of each relocation). In our case the size of each relocation is 8 bytes.
|
|
Similarly, .symtab section's size is the (number of symbols * symbol size), where the size of each symbol is 16 bytes.
|
|
|
|
sh_offset
|
|
This is the offset of where the section data is in the file. For example, if the data for .rel.text section begins 128 bytes into the
|
|
file, this field will be hex(128)
|
|
|
|
sh_info
|
|
The sh_info field for .rel.text holds the index for the .text section. For the .symtab section, this field holds the symbol index
|
|
for the first non-local symbol. In our unlinked object file, the index should always going to be one. Therefore, it's hard coded.
|
|
In the future I'd like to avoid hardcoding this value.
|
|
For the .rel.text section, I update this field using the following statement:
|
|
[j for j,v in enumerate(sections) if v.name == '.text'][0]
|
|
The sections variable is a list that contains pyelftools section objects. This would grab the index where the '.text' section
|
|
resides.
|
|
|
|
sh_link
|
|
This field is only relevant for the .rel.text section and the .symtab section. For the .rel.text section, this field represents
|
|
the index of the symbol table section. Given that the .rel.text section handles relocations, it makes sense why this section
|
|
would need a link to the symbol table. For the .symtab section, this field represents the index for the string table. Again,
|
|
because symbol's in the .symtab section don't contain names but indices into the .strtab section, it makes sense why this section
|
|
needs a link to the string table section.
|
|
This field is updated using a similar statement to the sh_info field.
|
|
For .rel.text we use:
|
|
[j for j,v in enumerate(sections) if v.name == '.symtab'][0] + 1
|
|
We add one here because we're assuming the executable does not have a .rel.text section and it needs to be added. Because we're
|
|
adding a section, the index of the .symtab section will be incremented by one.
|
|
For .symtab we use
|
|
[j for j,v in enumerate(sections) if v.name == '.strtab'][0] + 1
|
|
|
|
sh_addr
|
|
This is the address at which the section's first byte should reside in memory. Since this is a relocatable object file, this
|
|
field can safely be hardcoded to zero.
|
|
|
|
"""
|
|
def modify_section_headers():
|
|
|
|
global OFFSET, section_symbols, rodata_entries
|
|
# The offset will be accumulated
|
|
# The initial offset for the sections will be the size of the
|
|
# ELF Header because that's where we'll begin writing section our data
|
|
OFFSET = ELFHDRSZ
|
|
if DEBUG:
|
|
println("----------------------------- Modifying section headers --------------------------------")
|
|
for i, section in enumerate(sections):
|
|
if DEBUG:
|
|
println("Number of section: %d" % len(sections))
|
|
println("Section name: %s OFFSET: %d" % (section.name, OFFSET))
|
|
println("Index: %d section name: %s" % (i, sections[i].name))
|
|
|
|
if section.name == '.text':
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_size'] = len(moduleBytes)
|
|
sections[i].header['sh_addr'] = 0 # All section headers will have addr set to zero
|
|
sections[i].header['sh_offset'] = OFFSET
|
|
sections[i].header['sh_addralign'] = 4
|
|
OFFSET += sections[i].header['sh_size'] # Update offset for next section header
|
|
if DEBUG:
|
|
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
|
|
println("OFFSET: %d" % OFFSET)
|
|
|
|
elif section.name == '.rel.text':
|
|
section.header['sh_offset'] = OFFSET
|
|
section.header['sh_name'] = shstrtab.find('.rel.text')
|
|
section.header['sh_type'] = 'SHT_REL'
|
|
section.header['sh_addr'] = 0
|
|
section.header['sh_flags'] = 0x40
|
|
section.header['sh_size'] = NUM_RELOCS * RELOC_SIZE
|
|
section.header['sh_entsize'] = 0x8
|
|
section.header['sh_link'] = [i for i,v in enumerate(sections) if v.name == '.symtab'][0]
|
|
section.header['sh_info'] = [i for i,v in enumerate(sections) if v.name == '.text'][0]
|
|
section.header['sh_addralign'] = 4
|
|
OFFSET += section.header['sh_size']
|
|
if DEBUG:
|
|
println("%s size: %s Section Offset: %d Num Relocaions: %d" % (section.name, section.header['sh_size'], section.header['sh_offset'], NUM_RELOCS))
|
|
println("OFFSET: %d" % OFFSET)
|
|
|
|
elif section.name == '.symtab':
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_size'] = total_symbols * SYM_SIZE
|
|
sections[i].header['sh_addr'] = 0
|
|
# The sh_info section points to the first non local symbol in the symbol table.
|
|
# In order to get this we take the section symbols (which are local) and all
|
|
# local symbols we found earlier and subtract that from the rodata entries that may be
|
|
# present.
|
|
sections[i].header['sh_info'] = len(section_symbols) + (len(symbols['LOCAL']) - rodata_entries)
|
|
sections[i].header['sh_link'] = [j for j,v in enumerate(sections) if v.name == '.strtab'][0]
|
|
sections[i].header['sh_offset'] = OFFSET
|
|
sections[i].header['sh_addralign'] = 4
|
|
OFFSET += sections[i].header['sh_size'] # Update offset for next section header
|
|
if DEBUG:
|
|
println("%s size: %d Section Offset: %d Total Symbols: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset'], total_symbols))
|
|
println("OFFSET: %d" % OFFSET)
|
|
|
|
elif section.name == '.strtab':
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_size'] = len(strtab)
|
|
sections[i].header['sh_offset'] = OFFSET
|
|
sections[i].header['sh_addr'] = 0
|
|
sections[i].header['sh_addralign'] = 1
|
|
OFFSET += sections[i].header['sh_size']
|
|
if DEBUG:
|
|
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
|
|
println("OFFSET: %d" % OFFSET)
|
|
|
|
elif section.name == '.shstrtab':
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_size'] = len(shstrtab)
|
|
sections[i].header['sh_offset'] = OFFSET
|
|
sections[i].header['sh_addr'] = 0
|
|
sections[i].header['sh_addralign'] = 1
|
|
OFFSET += sections[i].header['sh_size']
|
|
if DEBUG:
|
|
println("shstrtab: %s length: %d" % (shstrtab, len(shstrtab)))
|
|
println("section %s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
|
|
println("OFFSET: %d" % OFFSET)
|
|
|
|
elif section.name == '.rodata':
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_size'] = len(rodata_bytes) # Size will be sum of length all strings in rodata section
|
|
sections[i].header['sh_offset'] = OFFSET
|
|
sections[i].header['sh_addr'] = 0
|
|
sections[i].header['sh_addralign'] = 4
|
|
OFFSET += sections[i].header['sh_size']
|
|
if DEBUG:
|
|
println("rodata: %s length: %d" % (rodata_bytes, len(rodata_bytes)))
|
|
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
|
|
println("OFFSET: %d" % OFFSET)
|
|
|
|
elif section.name == '.data':
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_size'] = get_section_size(section.name) # Size will be sum of all data element's size (i.e. sizeof (int) + sizeof(char) + sizeof(int), assuming there are two integers and a char in the .data section)
|
|
sections[i].header['sh_offset'] = OFFSET
|
|
sections[i].header['sh_addr'] = 0
|
|
sections[i].header['sh_addralign'] = 4
|
|
OFFSET += sections[i].header['sh_size']
|
|
if DEBUG:
|
|
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
|
|
println("OFFSET: %d" % OFFSET)
|
|
|
|
elif section.name == '.bss':
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_size'] = get_section_size(section.name) # Size will be sum of all data element's size (i.e. sizeof (int) + sizeof(char) + sizeof(int), assuming there are two integers and a char in the .data section)
|
|
sections[i].header['sh_offset'] = OFFSET
|
|
sections[i].header['sh_addr'] = 0
|
|
sections[i].header['sh_addralign'] = 1
|
|
if DEBUG:
|
|
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
|
|
println("OFFSET: %d" % OFFSET)
|
|
# The bss section doesn't actually take up any space in the ELF file, so we don't need to update the
|
|
# OFFSET with the size of this section.
|
|
|
|
else:
|
|
if DEBUG:
|
|
println("Section name in modifying headers: %s" % section.name)
|
|
println("Section size: %d" % section.header['sh_size'])
|
|
sections[i].header['sh_name'] = shstrtab.find(section.name)
|
|
sections[i].header['sh_offset'] = 0 if section.name == '' else OFFSET
|
|
sections[i].header['sh_addr'] = 0
|
|
OFFSET += sections[i].header['sh_size']
|
|
if DEBUG:
|
|
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
|
|
println("OFFSET: %d" % OFFSET)
|
|
if DEBUG:
|
|
println("----------------------------- Done modifying section headers --------------------------------")
|
|
|
|
|
|
"""
|
|
This function performs the first pass to gather the sections that will be modified
|
|
and exported. Loop through each section and append it to a list of sections if
|
|
the section name is not equal to:
|
|
.init*
|
|
.fini*
|
|
.eh_frame*
|
|
.stab*
|
|
.debug*
|
|
.noinit*
|
|
These sections aren't necessary for an object file so they're ignored
|
|
This function also grabs the symbol table, the section header string table,
|
|
and the string table
|
|
@param elffile: The ELFfile object
|
|
"""
|
|
def first_pass():
|
|
global sections, NUM_SECTIONS, shstrtab, elffile
|
|
|
|
# Open the program file
|
|
# In Python getProgramFile() returns a java.io.File object :/
|
|
# so we need to add getPath() to get the full path including the filename
|
|
try:
|
|
f = open(getProgramFile().getPath(), 'rb')
|
|
except AttributeError as e:
|
|
selectedFile = askFile("Select Executable File", "Open")
|
|
println("Path: %s" % selectedFile)
|
|
f = open(selectedFile.getPath(), 'rb')
|
|
|
|
# Create ELF object
|
|
elffile = ELFFile(f)
|
|
|
|
# Holds the element the relocation resides in the sections list
|
|
# If a relocation section is found (i.e. .plt* or .rel*) the
|
|
# RELOC_ELEM global variable will be set to the value of reloc_elem
|
|
# Otherwise, RELOC_ELEM remains negative one which is an indicator
|
|
# the ELF file has no relocation section available and one must
|
|
# be created
|
|
reloc_elem = 0
|
|
|
|
|
|
# Get all sections in ELF file excluding those listed in this functions's description
|
|
section_names = ''
|
|
for sec in elffile.iter_sections():
|
|
reloc_elem += 1
|
|
res = [ele for ele in exclude_sections if (ele in sec.name)]
|
|
if bool(res):
|
|
continue
|
|
# Build section header string table section excluding the NULL Section
|
|
if not sec.name == '':
|
|
shstrtab += sec.name + '\x00'
|
|
sections.append(sec)
|
|
# Some required sections might not be in the binary. Let's check for those and add them
|
|
for missing_section in required_sections:
|
|
if missing_section not in shstrtab:
|
|
println("We're missing a section %s! Adding it in" % missing_section)
|
|
println("s: %s" % missing_section)
|
|
if missing_section == '.rel.text':
|
|
shstrtab = shstrtab.replace('.text', '.rel.text')
|
|
else:
|
|
# Adding missing section to the section header string table
|
|
shstrtab += missing_section + '\x00'
|
|
# Copying the .text section to modify it to conform to a .rel.text section
|
|
sec = copy.deepcopy(sections[[i for i,v in enumerate(sections) if v.name == '.text'][0]])
|
|
# Insert this section into the list of sections
|
|
sec.name = missing_section
|
|
sections.insert([i for i,v in enumerate(sections) if v.name == '.text'][0] + 1, sec)
|
|
|
|
NUM_SECTIONS = len(sections) # Adding one because we're adding a relocation section
|
|
if DEBUG:
|
|
println("Number of sections: %d" % NUM_SECTIONS)
|
|
print("SHSTRTAB: ", shstrtab)
|
|
|
|
|
|
"""
|
|
This function writes the data of a section to the file.
|
|
@param obj_file: A file descriptor representing the object file being created
|
|
@param moduleBytes: The bytes in the module. This is retrieved in the output_obj_file
|
|
function
|
|
@param section: The section being written
|
|
"""
|
|
def write_section(obj_file, section):
|
|
global section_symbols
|
|
if DEBUG:
|
|
println("Writing section: %s!" % section.name)
|
|
if section.name == '.text':
|
|
obj_file.write(moduleBytes)
|
|
elif section.name == '.rel.text':
|
|
# print(relocations)
|
|
# print(sorted(relocations))
|
|
for rel in relocations.keys():
|
|
# Write offset
|
|
obj_file.write(to_bytes(int(relocations[rel]['offset'], 16), 4, 'little'))
|
|
# Write info
|
|
obj_file.write(to_bytes(relocations[rel]['info'], 4, 'little'))
|
|
elif section.name == '.symtab':
|
|
# null, .text, .data, .rodata, and .bss should all have entries in the symbol table
|
|
st_name = 0
|
|
st_value = 0
|
|
st_size = 0
|
|
st_info = 0
|
|
st_other = 0
|
|
st_shndx = 0
|
|
for l in section_symbols:
|
|
if l == 'null':
|
|
# Write the NULL symbol
|
|
if DEBUG:
|
|
println("Writing the null section symbol")
|
|
obj_file.write(to_bytes(st_name, 4, 'little'))
|
|
obj_file.write(to_bytes(st_value, 4, 'little'))
|
|
obj_file.write(to_bytes(st_size, 4, 'little'))
|
|
obj_file.write(to_bytes(st_info, 1, 'little'))
|
|
obj_file.write(to_bytes(st_other, 1, 'little'))
|
|
obj_file.write(to_bytes(st_shndx, 2, 'little'))
|
|
else:
|
|
if DEBUG:
|
|
println("Writing the %s section symbol" % l)
|
|
# Write the symbol for a section
|
|
bind = ENUM_ST_INFO_BIND['STB_LOCAL']
|
|
info_type = ENUM_ST_INFO_TYPE['STT_SECTION']
|
|
# The calculation for st_info was taken from https://refspecs.linuxfoundation.org/elf/elf.pdf
|
|
st_info = (((bind)<<4) + ((info_type)&0xf))
|
|
st_shndx = [i for i,v in enumerate(sections) if v.name == l][0]
|
|
obj_file.write(to_bytes(st_name, 4, 'little'))
|
|
obj_file.write(to_bytes(st_value, 4, 'little'))
|
|
obj_file.write(to_bytes(st_size, 4, 'little'))
|
|
obj_file.write(to_bytes(st_info, 1, 'little'))
|
|
obj_file.write(to_bytes(st_other, 1, 'little'))
|
|
obj_file.write(to_bytes(st_shndx, 2, 'little'))
|
|
|
|
for s in symbols:
|
|
if s == 'GLOBAL':
|
|
st_value = 0
|
|
for sym in symbols[s]:
|
|
if sym.name == '':
|
|
continue
|
|
# Write st_name
|
|
st_name = strtab.find(sym.name + '\x00')
|
|
obj_file.write(to_bytes(st_name, 4, 'little'))
|
|
|
|
|
|
if isInternalSymbol(sym):
|
|
st_value = write_section.offset
|
|
bind = ENUM_ST_INFO_BIND['STB_GLOBAL']
|
|
info_type = ENUM_ST_INFO_TYPE['STT_FUNC']
|
|
# The calculation for st_info was taken from https://refspecs.linuxfoundation.org/elf/elf.pdf
|
|
st_info = (((bind)<<4) + ((info_type)&0xf))
|
|
f = getFunctionAt(sym.getAddress())
|
|
body = f.getBody()
|
|
st_size = len(getBytes(body.getMinAddress(), body.getMaxAddress().subtract(body.getMinAddress()) + 1))
|
|
st_shndx = [i for i,v in enumerate(sections) if v.name == '.text'][0]
|
|
else:
|
|
bind = ENUM_ST_INFO_BIND['STB_GLOBAL']
|
|
info_type = ENUM_ST_INFO_TYPE['STT_NOTYPE']
|
|
st_info = (((bind)<<4) + ((info_type)&0xf))
|
|
st_name = strtab.find(sym.name + '\x00')
|
|
st_shndx = 0
|
|
st_size = 0
|
|
st_value = 0
|
|
|
|
# Write st_value
|
|
# st_value is essentially an offset from the first symbol onward
|
|
if DEBUG:
|
|
println("Global symbol name: %s value: %d" % (sym.name, st_value))
|
|
obj_file.write(to_bytes(st_value, 4, 'little'))
|
|
write_section.offset += st_size
|
|
# Write st_size
|
|
obj_file.write(to_bytes(st_size, 4, 'little'))
|
|
|
|
# Write st_info
|
|
obj_file.write(to_bytes(st_info, 1, 'little'))
|
|
|
|
# Write st_other
|
|
# currently holds 0 and has no defined meaning
|
|
obj_file.write(to_bytes(st_other, 1, 'little'))
|
|
|
|
# Write st_shndx
|
|
# Which section is this symbol defined?
|
|
obj_file.write(to_bytes(st_shndx, 2, 'little'))
|
|
else:
|
|
data_st_value = 0
|
|
bss_st_value = 0
|
|
for sym in symbols[s]:
|
|
try:
|
|
if sym.name == '':
|
|
continue
|
|
if DEBUG:
|
|
println("Local Symbol name: %s" % sym.name)
|
|
println("Local Symbol Address: %s" % str(sym.getAddress()))
|
|
bind = ENUM_ST_INFO_BIND['STB_LOCAL']
|
|
if bss_fragment.contains(sym.getAddress()):
|
|
info_type = ENUM_ST_INFO_TYPE['STT_OBJECT']
|
|
st_info = (((bind)<<4) + ((info_type)&0xf))
|
|
st_shndx = [i for i,v in enumerate(sections) if v.name == '.bss'][0]
|
|
else:
|
|
info_type = ENUM_ST_INFO_TYPE['STT_OBJECT']
|
|
st_info = (((bind)<<4) + ((info_type)&0xf))
|
|
st_shndx = [i for i,v in enumerate(sections) if v.name == '.data'][0]
|
|
st_name = strtab.find(sym.name + '\x00')
|
|
if DEBUG:
|
|
println("st_name: %d" % st_name)
|
|
println("st_shndx: %s" % st_shndx)
|
|
dat = getDataAt(sym.getAddress())
|
|
st_size = dat.getLength()
|
|
#write_section.offset += st_size
|
|
except AttributeError as e:
|
|
if DEBUG:
|
|
println("This is not a symbol")
|
|
continue
|
|
# Write st_name
|
|
obj_file.write(to_bytes(st_name, 4, 'little'))
|
|
if bss_fragment.contains(sym.getAddress()):
|
|
# Write st_value
|
|
obj_file.write(to_bytes(bss_st_value, 4, 'little'))
|
|
bss_st_value += st_size
|
|
else:
|
|
# Write st_value
|
|
obj_file.write(to_bytes(data_st_value, 4, 'little'))
|
|
data_st_value += st_size
|
|
# print(sym.name, " st_value ", 0)
|
|
# Write st_size
|
|
obj_file.write(to_bytes(st_size, 4, 'little'))
|
|
# Write st_info
|
|
obj_file.write(to_bytes(st_info, 1, 'little'))
|
|
# Write st_other
|
|
# currently holds 0 and has no defined meaning
|
|
obj_file.write(to_bytes(st_other, 1, 'little'))
|
|
# Write st_shndx
|
|
# Which section is this symbol defined?
|
|
# Because this is an external symbol the section is undefined or 0
|
|
obj_file.write(to_bytes(st_shndx, 2, 'little'))
|
|
|
|
elif section.name == '.strtab':
|
|
obj_file.write(strtab)
|
|
if DEBUG:
|
|
println("Wrote strtab section %d bytes" % len(strtab))
|
|
elif section.name == '.shstrtab':
|
|
obj_file.write(shstrtab)
|
|
if DEBUG:
|
|
println("Wrote shstrtab section %d bytes" % len(shstrtab))
|
|
elif section.name == '.rodata':
|
|
if DEBUG:
|
|
println("Writing %d bytes" % len(rodata_bytes))
|
|
obj_file.write(rodata_bytes)
|
|
elif section.name == '.data':
|
|
for sym in symbols['LOCAL']:
|
|
if data_fragment.contains(sym.getAddress()):
|
|
dat = getDataAt(sym.getAddress())
|
|
if dat.getLength() % alignment != 0:
|
|
diff = (alignment - (dat.getLength() % alignment))
|
|
size = diff + dat.getLength()
|
|
else:
|
|
size = dat.getLength()
|
|
if DEBUG:
|
|
println("Symbol: %s" % sym.getName())
|
|
println("Symbol Address: %s" % sym.getAddress())
|
|
println("Data Address: %s" % dat.getAddress())
|
|
println("Writing %d bytes" % size)
|
|
println("Data Type: %s" % dat.getDataType().getDisplayName())
|
|
println("TESTING: %s" % dat.getValue())
|
|
println("Data Bytes: %s" % getBytes(dat.getAddress(), dat.getLength()))
|
|
#obj_file.write(to_bytes(dat.getValue().getValue(), size, 'little'))
|
|
obj_file.write(getBytes(dat.getAddress(), size))
|
|
|
|
elif section.name == '.bss':
|
|
# The .bss section doesn't actually take up any bytes in the object file so no need to write anything :)
|
|
pass
|
|
else:
|
|
sec = elffile.get_section_by_name(section.name)
|
|
obj_file.write(sec.data())
|
|
if DEBUG:
|
|
println("Wrote %d amount" % len(sec.data()))
|
|
|
|
"""
|
|
This function is a wrapper that calls the other functions necessary for unlinking the object file.
|
|
The function creates the name of the resulting unlinked object file and opens the current program executable
|
|
so pyelftools can process it. Pyelftools is used to grab the sections in the executable so they can
|
|
be modified by other functions. The actual data is modified and written by other helper functions. The object
|
|
is created by first gathering all sections we'll want to modify and export, writing the ELF header, writing the
|
|
individual sections, and finally, writing the section headers. This is a bit different than how a compiler would
|
|
structure an object file, but the order does not matter so long as the linker knows where to find everything. In
|
|
other words, we have to make sure the offsets in the ELF and section headers are accurate. Because of this constraint,
|
|
it was easiest to create the file in this manner because each section being written can be used by a later portion
|
|
being written.
|
|
@param moduleBytes: Bytes of the module selected by the user
|
|
"""
|
|
def unlink_object_file():
|
|
|
|
|
|
# The resulting object file
|
|
# TODO: Update to use Ghidra API to get filename
|
|
# fname = moduleName + '_unlinked.o'
|
|
# path = currentProgram.getExecutablePath()
|
|
# fname = path[:path.rindex('/')+1] + fname
|
|
fname = output_arg + '/' + moduleName + '_unlinked.o'
|
|
println("File name: %s" % fname)
|
|
# fname = fname.getPath()
|
|
obj_file = open(fname, 'wb')
|
|
|
|
# Modify the section headers
|
|
modify_section_headers()
|
|
# The section headers have been modified, let's write the ELF headers to a file
|
|
write_elf_header(obj_file)
|
|
# Let's now write the section
|
|
for sec in sections:
|
|
write_section(obj_file, sec)
|
|
# Finally let's write the section headers
|
|
for sec in sections:
|
|
write_section_headers(obj_file, sec)
|
|
|
|
# The unlinked object file is now complete, close the file
|
|
obj_file.close()
|
|
|
|
"""
|
|
This function updates the module bytes at the given offset with the specific bytes
|
|
|
|
"""
|
|
def update_module_bytes(offset, newBytes):
|
|
global moduleBytes
|
|
moduleBytes[offset:offset+4] = newBytes
|
|
|
|
"""
|
|
This function grabs the text section for the module by getting the bytes between boundary1 and boundary2
|
|
"""
|
|
def get_module_bytes():
|
|
|
|
global moduleBytes
|
|
|
|
try:
|
|
# Added one at the end because a byte was being cut off if I just subtracted maxModuleAddress from minModuleAddress
|
|
# Would like to try and find a more elegant way but this works for now
|
|
moduleBytes = getBytes(minModuleAddress, (maxModuleAddress.subtract(minModuleAddress) + 1))
|
|
|
|
except MemoryAccessException:
|
|
# TODO Auto-generated catch block
|
|
println("Problem getting bytes within module... ")
|
|
e.printStackTrace()
|
|
|
|
"""
|
|
This function places all of the relocations in a relocation dictionary. Additionally, it modifies
|
|
the original bytes such that they are consistent with what a normal compiler would do. For example,
|
|
for external function calls, the compiler places the following bytes at the address where the
|
|
external function was called: FF FF FE EB, where EB is the call instruction and the other three bytes
|
|
are dummy values. The string table consists of all the symbol names (i.e. function/variable names).
|
|
This "table" is constructed here and later used for writing the strtab section.
|
|
"""
|
|
def get_relocations():
|
|
global relocations, NUM_RELOCS, sections, strtab, moduleBytes, total_symbols, rodata_bytes
|
|
bss_offset = 0
|
|
data_offset = 0
|
|
rodata_offset = 0
|
|
last_local_symbol = 0
|
|
newBytes = b'\xff\xff\xfe\xeb'
|
|
|
|
boundary1, boundary2 = minModuleAddress, maxModuleAddress
|
|
|
|
|
|
# This attempts to gather most relocations.
|
|
# Iterate through the dictionary of symbols
|
|
for key, lst in symbols.items():
|
|
# Iterate through each symbol
|
|
for sym in lst:
|
|
if DEBUG:
|
|
println("-----------------------------------")
|
|
if not rodata_fragment.contains(sym.getAddress()):
|
|
strtab.append(sym.getName())
|
|
if DEBUG:
|
|
println("Not a rodata symbol Symbol Name: %s" % sym.getName())
|
|
symbol_type = sym.getSymbolType().toString()
|
|
|
|
# Does this symbol have any references
|
|
if sym.hasReferences() or sym.hasMultipleReferences():
|
|
symRef = sym.getReferences()
|
|
if DEBUG:
|
|
println("Symbol has references...")
|
|
# Iterate through each symbols references
|
|
|
|
for ref in symRef:
|
|
# println("Reference address: %s" % ref.getFromAddress())
|
|
if ref.getFromAddress().toString() == 'Entry Point':
|
|
continue
|
|
# Is the reference within the boundary? If so, a relocation should occur
|
|
if isInternalSymbol(ref):
|
|
if DEBUG:
|
|
println("Reference is within module boundary")
|
|
# Here, the offset is the offset the relocation should occur
|
|
# That is, the address where the symbol is referenced minus
|
|
# the lower bound of the module
|
|
offset = ref.getFromAddress().subtract(boundary1)
|
|
|
|
# key for relocation will be relocation<NUM>
|
|
key = "relocation" + str(NUM_RELOCS)
|
|
|
|
|
|
# Create the info field in the relocation by appending a number
|
|
# and a hexadecimal representation of the R_ARM_CALL enumerated data type
|
|
# The info field in the relocation table is composed of the index into
|
|
# the symbol table followed by the type of relocation (i.e. R_ARM_CALL)
|
|
# The [2:] is necessary so we only grab the numbers and omit the '0x'
|
|
# TODO: modify this so that it checks what type of symbol this is
|
|
# The type of symbol will change the info field since this will be a
|
|
# different type of relocation
|
|
func_call = ENUM_RELOC_TYPE_ARM['R_ARM_CALL']
|
|
data_reloc = ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']
|
|
if symbol_type == 'Function':
|
|
if DEBUG:
|
|
println("Symbol is a function")
|
|
NUM_RELOCS += 1
|
|
if len(relocations.values()) >= 1:
|
|
# If the symbol already exists set the info field to match the other relocation
|
|
# entry for the same symbol
|
|
for d in relocations.values():
|
|
if d['name'] == sym.getName():
|
|
info = d['info']
|
|
else:
|
|
reloc_type = func_call
|
|
t = hex(reloc_type)
|
|
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
|
|
info = hex(total_symbols)[2:] + reloc_type
|
|
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
|
|
else:
|
|
reloc_type = func_call
|
|
t = hex(reloc_type)
|
|
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
|
|
info = hex(total_symbols)[2:] + reloc_type
|
|
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
|
|
|
|
else:
|
|
# The symbol is not a function
|
|
if DEBUG:
|
|
println("Symbol is not a function")
|
|
if ref.getReferenceType().toString() != 'DATA':
|
|
continue
|
|
NUM_RELOCS += 1
|
|
dat = getDataAt(sym.getAddress())
|
|
if DEBUG:
|
|
println("Symbol: %s Address: %s" % (sym.getName(), sym.getAddress()))
|
|
println("Value: %s Length: %s" % (dat.getValue(), dat.getLength()))
|
|
if len(relocations.values()) >= 1:
|
|
# If the symbol already exists set the info field to match the other relocation
|
|
# entry for the same symbol
|
|
for d in relocations.values():
|
|
if d['name'] == sym.getName():
|
|
info = d['info']
|
|
else:
|
|
reloc_type = data_reloc
|
|
t = hex(reloc_type)
|
|
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
|
|
if bss_fragment.contains(sym.getAddress()):
|
|
info = str(4)
|
|
else:
|
|
info = str(3)
|
|
info += reloc_type
|
|
if DEBUG:
|
|
println("Info bss or data: %s" % info)
|
|
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
|
|
else:
|
|
reloc_type = data_reloc
|
|
t = hex(reloc_type)
|
|
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
|
|
if bss_fragment.contains(sym.getAddress()):
|
|
info = str(4)
|
|
else:
|
|
info = str(3)
|
|
info += reloc_type
|
|
if DEBUG:
|
|
println("Info bss or data: %s" % info)
|
|
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
|
|
relocations[key] = {'name': sym.getName(), 'offset': hex(offset).lstrip("0x").rstrip("L"), 'addr': ref.getFromAddress(), 'info': info}
|
|
# Modify the bytes at the offset with the bytes \xfe\xff\xff\xeb
|
|
if symbol_type == 'Function':
|
|
#moduleBytes[offset:offset+4] = newBytes
|
|
update_module_bytes(offset, newBytes)
|
|
else:
|
|
if bss_fragment.contains(sym.getAddress()):
|
|
#moduleBytes[offset:offset+4] = to_bytes(bss_offset, 4, 'little')
|
|
update_module_bytes(offset, to_bytes(bss_offset, 4, 'little'))
|
|
bss_offset += 4
|
|
else:
|
|
#moduleBytes[offset:offset+4] = to_bytes(data_offset, 4, 'little')
|
|
update_module_bytes(offset, to_bytes(data_offset, 4, 'little'))
|
|
data_offset += 4
|
|
total_symbols += 1
|
|
# Probably not best to handle this as an exception but this is the case where we have to perform
|
|
# a relocation on the rodata section. These aren't symbols like the other elements in the symbols
|
|
# dictionary so we have to treat them a little differently
|
|
else:
|
|
if DEBUG:
|
|
println("This is in the rodata section")
|
|
refs = sym.getReferenceIteratorTo()
|
|
for ref in refs:
|
|
# if ref.getFromAddress().compareTo(minModuleAddress) >= 0 and \
|
|
# ref.getFromAddress().compareTo(maxModuleAddress) <= 0:
|
|
if isInternalSymbol(ref):
|
|
if ref.getReferenceType().toString() != 'DATA':
|
|
continue
|
|
# This is a string that's too small to be recognized by Ghidra and the
|
|
# analyst didn't manually type it as a string
|
|
short_str = False
|
|
if 'undefined' in sym.getDataType().getDisplayName():
|
|
short_str = True
|
|
str_array = bytearray()
|
|
str_array.append(getByte(sym.getAddress()))
|
|
n = sym.getAddress().next()
|
|
while getByte(n) != 0:
|
|
str_array.append(getByte(n))
|
|
n = n.next()
|
|
if DEBUG:
|
|
println("str_array: %s" % str_array)
|
|
rodata_bytes += str_array
|
|
|
|
|
|
offset = ref.getFromAddress().subtract(boundary1)
|
|
|
|
# The original bytes written here is the offset into the rodata section
|
|
# The offset is updated later on
|
|
update_module_bytes(offset, to_bytes(rodata_offset, 4, 'little'))
|
|
info = str(2) # Would like to get away from hard coding this
|
|
reloc_type = data_reloc
|
|
t = hex(reloc_type)
|
|
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
|
|
info += reloc_type
|
|
info = int(info, 16)
|
|
|
|
|
|
if short_str:
|
|
key = "relocation" + str(NUM_RELOCS)
|
|
NUM_RELOCS += 1
|
|
relocations[key] = {'name': str_array, 'offset': hex(offset).lstrip("0x").rstrip("L"), 'addr': ref.getFromAddress(), 'info': info}
|
|
rodata_offset += len(str_array)
|
|
else:
|
|
|
|
# Using getBytes will allow us to get all the bytes from the start address to the end
|
|
# We add one to the length to include the null byte at the end of the byte array. We
|
|
# add 1 yet again because after each entry in the rodata section, there's a null byte
|
|
# dividing each entry in the rodata section. So the offset will be the length of the
|
|
# current string plus that extra null byte and that's where the next
|
|
if isinstance(sym.getValue(), unicode) or isinstance(sym.getValue(), str):
|
|
key = "relocation" + str(NUM_RELOCS)
|
|
NUM_RELOCS += 1
|
|
relocations[key] = {'name': sym.getValue(), 'offset': hex(offset).lstrip("0x").rstrip("L"), 'addr': ref.getFromAddress(), 'info': info}
|
|
newString = getBytes(sym.getAddress(), len(sym.getValue()) + 1)
|
|
if DEBUG:
|
|
println("newString Data: %s" % newString)
|
|
println("newString length BEFORE: %d" % len(newString))
|
|
|
|
if len(newString) % alignment != 0:
|
|
if DEBUG:
|
|
println("This string is not 4 byte aligned. Need to add some bytes")
|
|
diff = len(newString) % alignment
|
|
padding = alignment - diff
|
|
pad = to_bytes(0, padding, 'little')
|
|
if DEBUG:
|
|
println("Length of pad: %d" % len(pad))
|
|
println("Pad bytes: %s" % pad)
|
|
println("Type of element: %s" % type(newString[0]))
|
|
println("element: %s" % newString[0])
|
|
for s in pad:
|
|
newString.append(ord(s))
|
|
|
|
# newString.append(to_bytes(0, padding, 'little'))
|
|
# rodata_bytes += to_bytes(0, padding, 'little')
|
|
if DEBUG:
|
|
println("newString length AFTER: %d" % len(newString))
|
|
println("newString: %s" % newString)
|
|
rodata_bytes += (newString)
|
|
if DEBUG:
|
|
println("rodata_bytes: %s" % rodata_bytes)
|
|
rodata_offset += len(newString)
|
|
else:
|
|
if DEBUG:
|
|
println("Address: %s" % sym.getAddress())
|
|
println("Value: %s" % sym.getValue())
|
|
# rodata_offset += 4
|
|
# rodata_bytes += to_bytes(0, 4, 'little')
|
|
# println("Rodata offset: %s" % hex(rodata_offset))
|
|
if DEBUG:
|
|
println("-----------------------------------")
|
|
if DEBUG:
|
|
println("----------------------------------------------------------------------------------------------------------------------")
|
|
print(relocations)
|
|
# println("Total Symbols: %d " % total_symbols)
|
|
println("----------------------------------------------------------------------------------------------------------------------")
|
|
|
|
strtab = '\x00' + '\x00'.join(strtab) + '\x00'
|
|
if DEBUG:
|
|
print("Strtab: " , strtab)
|
|
|
|
|
|
# get script args
|
|
args = getScriptArgs()
|
|
module_arg = args[0] if len(args) > 0 else ""
|
|
output_arg = args[1] if len(args) > 1 else os.path.dirname(parseFile(getSourceFile().getCanonicalPath()).getPath())
|
|
|
|
print("ELF Output: %s %s" % (module_arg, output_arg))
|
|
|
|
# executable_arg = args[2] if len(args) > 2 else
|
|
write_section.offset = 0
|
|
if SelectModule(module_arg):
|
|
|
|
# Perform first pass of ELF file to gather all the sections we'll modify and export.
|
|
first_pass()
|
|
get_symbols()
|
|
get_module_bytes()
|
|
get_relocations()
|
|
unlink_object_file()
|