Files
CodeCut/codecut-gui/ghidra_scripts/OutputObjFile.py
2025-09-18 16:00:39 -04:00

1359 lines
66 KiB
Python
Executable File

#@category CodeCut
#@runtime PyGhidra
#
## Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
## (JHU/APL). All Rights Reserved.
#
## This material may be only be used, modified, or reproduced by or for
## the U.S. Government pursuant to the license rights granted under the
## clauses at DFARS 252.227-7013/7014 or FAR 52.227-14. For any other
## permission, please contact the Office of Technology Transfer at
## JHU/APL.
#
## NO WARRANTY, NO LIABILITY. THIS MATERIAL IS PROVIDED "AS IS." JHU/APL
## MAKES NO REPRESENTATION OR WARRANTY WITH RESPECT TO THE PERFORMANCE OF
## THE MATERIALS, INCLUDING THEIR SAFETY, EFFECTIVENESS, OR COMMERCIAL
## VIABILITY, AND DISCLAIMS ALL WARRANTIES IN THE MATERIAL, WHETHER
## EXPRESS OR IMPLIED, INCLUDING (BUT NOT LIMITED TO) ANY AND ALL IMPLIED
## WARRANTIES OF PERFORMANCE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
## PURPOSE, AND NON-INFRINGEMENT OF INTELLECTUAL PROPERTY OR OTHER THIRD
## PARTY RIGHTS. ANY USER OF THE MATERIAL ASSUMES THE ENTIRE RISK AND
## LIABILITY FOR USING THE MATERIAL. IN NO EVENT SHALL JHU/APL BE LIABLE
## TO ANY USER OF THE MATERIAL FOR ANY ACTUAL, INDIRECT, CONSEQUENTIAL,
## SPECIAL OR OTHER DAMAGES ARISING FROM THE USE OF, OR INABILITY TO USE,
## THE MATERIAL, INCLUDING, BUT NOT LIMITED TO, ANY DAMAGES FOR LOST
## PROFITS.
## HAVE A NICE DAY.
##
## This script takes the boundaries found by deepcut and outputs an object file for the module selected by the user
##
##@author
##@keybinding
##@menupath
##@toolbar
#@author
#@category MINDSIGHT
#@keybinding
#@menupath
#@toolbar
import sys, copy, os
import ghidra.app.script.GhidraScript
from ghidra.program.database.module import *
from ghidra.program.flatapi import *
from ghidra.app.services import *
from ghidra.program.model.mem import *
from ghidra.program.model.lang import *
from ghidra.program.model.pcode import *
from ghidra.program.model.util import *
from ghidra.program.model.reloc import *
from ghidra.program.model.data import *
from ghidra.program.model.block import *
from ghidra.program.model.symbol import *
from ghidra.program.model.scalar import *
from ghidra.program.model.listing import *
from ghidra.program.model.address import *
from ghidra.program.util import ProgramLocation
from ghidra.program.database.mem.FileBytes import *
from ghidra.app.util import *
import ghidra.app.util.bin.ByteProvider
import ghidra.util.exception.CancelledException
import ghidra.util.task.TaskMonitor
# Need this line so we can use pyelftools
script_path = parseFile(getSourceFile().getCanonicalPath()).getPath()
pyelftools_path = os.path.join(os.path.dirname(script_path), "pyelftools-0.28")
sys.path.append(pyelftools_path)
# Pyelf imports
from elftools.elf.elffile import ELFFile
from elftools.elf.enums import *
DEBUG = True
# Will hold the symbol table
symtab = None
# Will hold the string table
# The strtab and shstrtab being with \x00
strtab = []
# Will hold the section header string table
shstrtab = '\x00'
# Will hold all sections
sections = []
NUM_SECTIONS = 0 # Total number of sections
# Number of relocations
NUM_RELOCS = 0
# Size of relocation fields
RELOC_SIZE = 8
# relocations contains a nested dictionary containing details about the functions in the module
# It contains the name of all functions it called as well as the address those functions were called
relocations = {}
# Dictionary of all symbols in the module
# mapping is LOCAL: [list of local symbols], GLOBAL: [list of global symbols]
symbols = {}
symbols['LOCAL'] = []
symbols['GLOBAL'] = []
data_symbols = []
# These will take up an entry in the symbol table
section_symbols = ['null', '.text', '.rodata', '.data', '.bss']
required_sections = ['.text', '.rel.text', '.data', '.bss', '.symtab', '.strtab', '.shstrtab']
# Total number of symbols
total_symbols = len(section_symbols)
# The number of entries in the rodata section
rodata_entries = 0
bss_fragment = None
data_fragment = None
rodata_fragment = None
"""
Symbol Table Entry
typedef struct {
Elf32_Word st_name; (4 bytes)
Elf32_Addr st_value; (4 bytes)
Elf32_Word st_size; (4 bytes)
unsigned char st_info; (1 byte)
unsigned char st_other; (1 byte)
Elf32_Half st_shndx; (2 bytes)
} Elf32_Sym; (16 bytes total)
"""
SYM_SIZE = 16 # Size of symbols in symtab section
"""
Elf Header
typedef struct {
unsigned char e_ident[EI_NIDENT]; (16 bytes)
Elf32_Half e_type; (2 bytes)
Elf32_Half e_machine; (2 bytes)
Elf32_Word e_version; (4 bytes)
Elf32_Addr e_entry; (4 bytes)
Elf32_Off e_phoff; (4 bytes)
Elf32_Off e_shoff; (4 bytes)
Elf32_Word e_flags; (4 bytes)
Elf32_Half e_ehsize; (2 bytes)
Elf32_Half e_phentsize; (2 bytes)
Elf32_Half e_phnum; (2 bytes)
Elf32_Half e_shentsize; (2 bytes)
Elf32_Half e_shnum; (2 bytes)
Elf32_Half e_shstrndx; (2 bytes)
} Elf32_Ehdr; (52 bytes total)
"""
ELFHDRSZ = 52 # size of the ELF header
"""
typedef struct {
Elf32_Word sh_name; (4 bytes)
Elf32_Word sh_type; (4 bytes)
Elf32_Word sh_flags; (4 bytes)
Elf32_Addr sh_addr; (4 bytes)
Elf32_Off sh_offset; (4 bytes)
Elf32_Word sh_size; (4 bytes)
Elf32_Word sh_link; (4 bytes)
Elf32_Word sh_info; (4 bytes)
Elf32_Word sh_addralign; (4 bytes)
Elf32_Word sh_entsize; (4 bytes)
} Elf32_Shdr; (40 bytes total)
"""
SHSIZE = 40 # size of section headers
OFFSET = 0 # Offset into the file
moduleName = ''
elffile = None
minModuleAddress = None
maxModuleAddress = None
moduleBytes = bytearray()
rodata_bytes = bytearray()
mod = None
alignment = 4
# These sections are specific to an executable binary. Relocatable object files don't need these
exclude_sections = ['.init', '.fini', '.eh_frame', '.stab', '.debug', '.noinit', '.exidx', '.got',
'__', '.rel.ro', 'plt', 'dyn']
def SelectModule(module=""):
global moduleName, minModuleAddress, maxModuleAddress, prog, mod, ns
# Check if there is a program open
if (currentProgram == None):
popup("There is no open program")
return False
# Get Program
prog = currentProgram
# Get the symbol table
st = prog.getSymbolTable()
SymbolIterator = st.getAllSymbols(True)
if module == "":
Modulelist = {}
# This loop attempts to get all modules (namespaces) within this program
while(SymbolIterator.hasNext()):
sym = SymbolIterator.next()
ns = sym.getParentNamespace()
# Want to get any namespace that isn't global or external
# These are likely namespaces that were created by deepcut
if (ns.getName() == u"Global") or (ns.getName() == u"<EXTERNAL>"):
continue
if (ns not in Modulelist):
Modulelist[ns.getName()] = ns
# There weren't any namespaces found
if not Modulelist:
popup("There doesn't seem to be any modules created by DeepCut (Namespaces is empty?).\nPlease run the DeepCut Analyzer before running this script.\nFor additional fidelity run GuessModuleNames from the CodeCut GUI.")
return False
else:
# Ask user for module to export as object file
Modulechoice = askChoice("Select a Module", getCategory(), Modulelist.keys(), None);
print(Modulechoice + " was chosen!\n")
moduleName = Modulechoice
# Get min and max address of selected namespace
# getBody() returns an AddressSetView which can be used to get the min and max address of the namespace
mod = Modulelist[Modulechoice]
asv = mod.getBody()
else:
moduleName = module
mod = st.getNamespace(module, prog.getGlobalNamespace())
if mod:
asv = mod.getBody()
println("Module: %s" % mod.getName())
else:
popup("Something went wrong. %s doesn't seem to be a valid module..." % mod.getName())
return False
# Get the minimum and maximum address for the module
minModuleAddress = asv.getMinAddress()
maxModuleAddress = asv.getMaxAddress()
# This module is not valid
if minModuleAddress == None or maxModuleAddress == None or (minModuleAddress == maxModuleAddress):
popup("The module you selected is invalid. This could be the min and/or max Address do not exist or the min and max address of the module are equal to each other. Please select another module")
return False
else:
newMax = maxModuleAddress
# It's possible that the max module address does not include the
# entirety of the module. This is meant to adjust the max boundary
# so every byte within the module is accounted for.
while not getFunctionAt(newMax.add(1)):
maxModuleAddress = maxModuleAddress.add(1)
newMax = maxModuleAddress
println("Min module address: %s" % (str(minModuleAddress)))
println("Max module address; %s" % (str(maxModuleAddress)))
return True
def get_symbols():
global minModuleAddress, maxModuleAddress, mod, symbols, bss_fragment, data_fragment, rodata_fragment, rodata_entries
# Get the symbol table
st = prog.getSymbolTable()
listing = prog.getListing()
bss_fragment = listing.getFragment("Program Tree", ".bss")
data_fragment = listing.getFragment("Program Tree", ".data")
rodata_fragment = listing.getFragment("Program Tree", ".rodata")
definedData = listing.getDefinedData(rodata_fragment.getMinAddress(), True)
# Grab all symbols
SymbolIterator = st.getAllSymbols(True)
# This loop grabs all symbols referenced by the module, both internal symbols and external symbols
# External symbols are symbols that are referenced by an internal function but the address is
# outside the boundary of the module. For example, if the min and max address is 800000-900000,
# at some point a function within the module referenced a symbol whose address is greater than 900000
# or less than 800000. Internal symbols are those whose address fall within the boundary of the
# min and max address.
while SymbolIterator.hasNext():
sym = SymbolIterator.next()
# TODO: Make this more elegant
if "_init" in sym.getName() or "_fini" in sym.getName() or "_start" in sym.getName():
continue
# We only want ones that were imported (symbols in the .bss_fragment section seem to land in this category)
if (sym.getSource().getDisplayString() == 'Imported') or (sym.getSource().getDisplayString() == 'User Defined'):
# Check if symbol has references
if sym.hasMultipleReferences() or sym.hasReferences():
symRef = sym.getReferences()
for ref in symRef:
# Are those references in the module?
if isInternalSymbol(ref):
# Is this symbol in the data or bss section? If yes it's a local symbol
if (bss_fragment.contains(sym.getAddress()) or data_fragment.contains(sym.getAddress())) and \
sym not in symbols['LOCAL']:
println("Adding symbol: %s Address: %s Reference: %s" % (sym.getName(), sym.getAddress(), ref.getFromAddress()))
symbols['LOCAL'].append(sym)
if sym.getSymbolType().toString() == 'Function' and \
sym not in symbols['GLOBAL']:
symbols['GLOBAL'].append(sym)
# The symbol is in the bss or data section and didn't have any references but it's address is within the module boundaries
if (bss_fragment.contains(sym.getAddress()) or data_fragment.contains(sym.getAddress())) and \
sym not in symbols['LOCAL'] and \
(isInternalSymbol(sym)):
println("Adding symbol: %s Address: %s" % (sym.getName(), sym.getAddress()))
symbols['LOCAL'].append(sym)
# The symbol didn't have any references
if sym not in symbols['GLOBAL']:
if isInternalSymbol(sym):
symbols['GLOBAL'].append(sym)
else:
continue
# Defined strings need to be relocated but they aren't necessarily symbols
# We add them to our symbol dictionary here but we will need to subtract the number of symbols we add
# here to the total number of symbols. Additionally, when writing the symtab section
# we'll need to ensure we don't write these symbols.
for dat in definedData:
refs = dat.getReferenceIteratorTo()
if rodata_fragment.contains(dat.getAddress()):
for ref in refs:
if isInternalSymbol(ref):
symbols['LOCAL'].append(dat)
rodata_entries += 1
break
else:
continue
print("Global and local symbols: ", symbols)
maxModAddress = update_functions()
# Updating maxModuleAddress just in case it changed in update_functions
if maxModAddress.subtract(maxModuleAddress) > 0:
maxModuleAddress = maxModAddress
println("Minimum Module Address: %s" % minModuleAddress)
println("Maximum Module Address: %s" % maxModuleAddress)
"""
This function checks if the symbol is internal to the current module
@param symbol: The symbol we want to determine is internal or external
@return: True if the symbol is internal; false otherwise
"""
def isInternalSymbol(symbol):
# Check if reference is within module
if "reference" in str(type(symbol)).lower():
if symbol.getFromAddress().compareTo(minModuleAddress) >= 0 and \
symbol.getFromAddress().compareTo(maxModuleAddress) <= 0:
return True
else:
return False
# Check if symbol is within module
elif symbol.getAddress().compareTo(minModuleAddress) >= 0 and \
symbol.getAddress().compareTo(maxModuleAddress) <= 0:
return True
else:
return False
def update_functions():
# This is the current upward bounds of the module
# It's possible this will not change so we save it here
newMaxAddress = maxModuleAddress
for sym in symbols['GLOBAL']:
if isInternalSymbol(sym):
if sym.getSymbolType().toString() == 'Function':
println("Symbol name: %s\n" % sym.getName())
func = getFunctionAt(sym.getAddress())
funcBody = func.getBody()
maxAddr = funcBody.getMaxAddress()
println("Max address: %s" % (str(maxAddr)))
newMax = maxAddr
# Currently, Ghidra does not include all bytes for function that include data elements
# We're accounting for this by checking if the next byte is a new function
# If it is a new function then Ghidra already has the proper function size. If not,
# than Ghidra will misrepresent the size of the function leading to incorrect processing
# later on.
while not getFunctionAt(newMax.add(1)):
maxAddr = maxAddr.add(1)
newMax = maxAddr
println("newMax: %s" % (str(newMax)))
# The previos loop resulted in a new maximum address for the current function
# Let's update the function to include the new bytes
if newMax.subtract(funcBody.getMaxAddress()) > 0:
println("Need to update the address set for this function\n")
# Build an address set using the entry point for the function
# and the new max address
newAddrSet = AddressSet(sym.getAddress(), newMax)
# Modify the body to include the new address set
func.setBody(newAddrSet)
# println("newMax - newMaxAddress: %d" % newMax.subtract(newMaxAddress))
# This check probably isn't completely necessary. Before entering this function we already
# modified the boundary for the module. However, there is still a chance that
# there are some new bytes not accounted for. We check if the new address is greater
# than the address we already have for the module boundary. If it is greater than we
# update the maximum address, otherwise we continue on
if newMax.subtract(newMaxAddress) > 0:
println("Updating module boundary!")
newMaxAddress = newMax
else:
continue
else:
println("This function is fine")
return newMaxAddress
"""
Taken from https://stackoverflow.com/a/20793663
Python 2.7 does not have a to_bytes method for integers :(
@param n: The number to convert to bytes
@param length: How many bits should be used to represent the integer
@param endianess: What endianess should be used? Supports little and big endian
"""
def to_bytes(n, length, endianess='little'):
h = '%x' % n
s = '{:0{}x}'.format(n, length*2).decode('hex')
return s if endianess == 'big' else s[::-1]
"""
This function writes the ELF header to a file object. A lot of the entries in the ELF header
are unchanged or only slightly modified. Each entry is commented out and any changes are
called out with a reason for the change
@param obj_file: The new object file being created.
@param elffile: The elffile object of the current ELF file being analyzed.
@var ENUM: The ENUM_X_YYYY are enumerations used by the pyelftools library.
"""
def write_elf_header(obj_file):
println("Writing the ELF Header!")
# Write the raw e_ident field to the file
# This should not change in any considerable way so just copy the field
# from the original ELF executable
obj_file.write(elffile.e_ident_raw)
# The next byte is going to determine the type of file (i.e. executable or relocatable)
# The pyelftools library enumerates lots of useful ELF values
obj_file.write(to_bytes(ENUM_E_TYPE['ET_REL'], 2, 'little'))
# Now let's write the machine type
obj_file.write(to_bytes(ENUM_E_MACHINE[elffile.header['e_machine']], 2, 'little'))
# Write the version
obj_file.write(to_bytes(ENUM_E_VERSION['EV_CURRENT'], 4, 'little'))
# Write the entry point
# This will be zero since it's an object file
e_entry = 0
obj_file.write(to_bytes(e_entry, 4, 'little'))
# Write start of program headers:
# This will be zero since an object file does not have program headers
e_phoff = 0
obj_file.write(to_bytes(e_phoff, 4, 'little'))
# Start of section headers
# This member holds the section header table's file offset in bytes
# For simplicity, I place the section header table immediately after the
# end of the last section written
e_shoff = OFFSET
println("Section Header Offset: %d" % e_shoff)
obj_file.write(to_bytes(e_shoff, 4, 'little'))
# Write flags
obj_file.write(to_bytes(elffile.header['e_flags'], 4, 'little'))
# Write Header Size
# The header size should always be the same
obj_file.write(to_bytes(elffile.header['e_ehsize'], 2, 'little'))
# Write program header size
e_phentsize = 0 # No program headers in a relocatable object file
obj_file.write(to_bytes(e_phentsize, 2, 'little'))
# Write Number of program headers
e_phnum = 0
obj_file.write(to_bytes(e_phnum, 2, 'little'))
# Write size of section headers
obj_file.write(to_bytes(elffile.header['e_shentsize'], 2, 'little'))
# Write number of section headers
e_shnum = NUM_SECTIONS
println("Number of section: %d" % NUM_SECTIONS)
obj_file.write(to_bytes(e_shnum, 2, 'little'))
# Write Section header string table index
e_shstrndx = [i for i,v in enumerate(sections) if v.name == '.shstrtab'][0]
obj_file.write(to_bytes(e_shstrndx, 2, 'little'))
"""
This function writes the section header data to the newly created object file
It takes a section that may have been modified.
@param obj_file: A file descriptor representing the new object file being created
@param section: A section in the ELF file
"""
def write_section_headers(obj_file, section):
println("Writing section header: %s" % section.name)
# Write sh_name
obj_file.write(to_bytes(section.header['sh_name'], 4, 'little'))
# Write sh_type
obj_file.write(to_bytes(ENUM_SH_TYPE_ARM[section.header['sh_type']], 4, 'little'))
# Write sh_flags
obj_file.write(to_bytes(section.header['sh_flags'], 4, 'little'))
# Write sh_addr
obj_file.write(to_bytes(section.header['sh_addr'], 4, 'little'))
# Write sh_offset
obj_file.write(to_bytes(section.header['sh_offset'], 4, 'little'))
# Write sh_size
obj_file.write(to_bytes(section.header['sh_size'], 4, 'little'))
# Write sh_link
obj_file.write(to_bytes(section.header['sh_link'], 4, 'little'))
# Write sh_info
obj_file.write(to_bytes(section.header['sh_info'], 4, 'little'))
# Write sh_addralign
obj_file.write(to_bytes(section.header['sh_addralign'], 4, 'little'))
# Write sh_entsize
obj_file.write(to_bytes(section.header['sh_entsize'], 4, 'little'))
"""
This function gets the size of the data, rodata, or bss sections. Since these sections size depend
on the presence of variables or defined strings, it's possible the size will be either zero or
the sum of all data that resides in their repsective sections
@param section: The section we want to calculate the size
@return: The size of the section or zero if it has no size
"""
def get_section_size(section):
global rodata_bytes
if section == '.bss':
bss_section_size = 0
for sym in symbols['LOCAL']:
if bss_fragment.contains(sym.getAddress()):
dat = getDataAt(sym.getAddress())
bss_section_size += dat.getLength()
println("bss_section_size: %d" % bss_section_size)
return bss_section_size
elif section == '.data':
data_section_size = 0
for sym in symbols['LOCAL']:
if data_fragment.contains(sym.getAddress()):
println("Name: %s" % sym)
dat = getDataAt(sym.getAddress())
if dat.getLength() % alignment != 0:
diff = (alignment - (dat.getLength() % alignment))
size = diff + dat.getLength()
else:
size = dat.getLength()
data_section_size += size
println("data_section_size: %d" % data_section_size)
return data_section_size
else:
return 0
"""
This function performs any necessary modifications to the section headers. Only
the .text, .data, .rodata, .bss, .strtab, .shstrtab, and .symtab require specific
modifications. This is because these sections are likely to change from the executable.
For example, the .text section is going to be MUCH smaller in the unlinked object file compared
to the full executable file because there's far less code. This is similar for the other sections.
The other section headers, only the sh_offset and sh_name field will be changed since we copy the
data from those sections directly to our new unlinked object file.
Here is a description of the fields and how they will be modified and how they are created:
sh_name
This field is an index into the string table
section['sh_name'] = strtab.find(section.name)
sh_size
This is the size of the section. For the .text section it's simply the size of the moduleBytes variable.
For .data and .bss the size is a sum of the size of all variables in the respective section. For example,
if the .data section has two integers and a char, the size of this section would be 9 (sizeof(int) + sizeof(int) + sizeof(char))
The .rodata section is simply the length of all strings in the section. For example, if the .rodata section contained only one
string ("I am in the .rodata section"), the size would be len("I am in the .rodata section")
Currently, the size for these sections are set to zero, but an update will come to provide accurate sizes if necessary. The
.rel.text section's size is (number of relocations * size of each relocation). In our case the size of each relocation is 8 bytes.
Similarly, .symtab section's size is the (number of symbols * symbol size), where the size of each symbol is 16 bytes.
sh_offset
This is the offset of where the section data is in the file. For example, if the data for .rel.text section begins 128 bytes into the
file, this field will be hex(128)
sh_info
The sh_info field for .rel.text holds the index for the .text section. For the .symtab section, this field holds the symbol index
for the first non-local symbol. In our unlinked object file, the index should always going to be one. Therefore, it's hard coded.
In the future I'd like to avoid hardcoding this value.
For the .rel.text section, I update this field using the following statement:
[j for j,v in enumerate(sections) if v.name == '.text'][0]
The sections variable is a list that contains pyelftools section objects. This would grab the index where the '.text' section
resides.
sh_link
This field is only relevant for the .rel.text section and the .symtab section. For the .rel.text section, this field represents
the index of the symbol table section. Given that the .rel.text section handles relocations, it makes sense why this section
would need a link to the symbol table. For the .symtab section, this field represents the index for the string table. Again,
because symbol's in the .symtab section don't contain names but indices into the .strtab section, it makes sense why this section
needs a link to the string table section.
This field is updated using a similar statement to the sh_info field.
For .rel.text we use:
[j for j,v in enumerate(sections) if v.name == '.symtab'][0] + 1
We add one here because we're assuming the executable does not have a .rel.text section and it needs to be added. Because we're
adding a section, the index of the .symtab section will be incremented by one.
For .symtab we use
[j for j,v in enumerate(sections) if v.name == '.strtab'][0] + 1
sh_addr
This is the address at which the section's first byte should reside in memory. Since this is a relocatable object file, this
field can safely be hardcoded to zero.
"""
def modify_section_headers():
global OFFSET, section_symbols, rodata_entries
# The offset will be accumulated
# The initial offset for the sections will be the size of the
# ELF Header because that's where we'll begin writing section our data
OFFSET = ELFHDRSZ
if DEBUG:
println("----------------------------- Modifying section headers --------------------------------")
for i, section in enumerate(sections):
if DEBUG:
println("Number of section: %d" % len(sections))
println("Section name: %s OFFSET: %d" % (section.name, OFFSET))
println("Index: %d section name: %s" % (i, sections[i].name))
if section.name == '.text':
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_size'] = len(moduleBytes)
sections[i].header['sh_addr'] = 0 # All section headers will have addr set to zero
sections[i].header['sh_offset'] = OFFSET
sections[i].header['sh_addralign'] = 4
OFFSET += sections[i].header['sh_size'] # Update offset for next section header
if DEBUG:
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
println("OFFSET: %d" % OFFSET)
elif section.name == '.rel.text':
section.header['sh_offset'] = OFFSET
section.header['sh_name'] = shstrtab.find('.rel.text')
section.header['sh_type'] = 'SHT_REL'
section.header['sh_addr'] = 0
section.header['sh_flags'] = 0x40
section.header['sh_size'] = NUM_RELOCS * RELOC_SIZE
section.header['sh_entsize'] = 0x8
section.header['sh_link'] = [i for i,v in enumerate(sections) if v.name == '.symtab'][0]
section.header['sh_info'] = [i for i,v in enumerate(sections) if v.name == '.text'][0]
section.header['sh_addralign'] = 4
OFFSET += section.header['sh_size']
if DEBUG:
println("%s size: %s Section Offset: %d Num Relocaions: %d" % (section.name, section.header['sh_size'], section.header['sh_offset'], NUM_RELOCS))
println("OFFSET: %d" % OFFSET)
elif section.name == '.symtab':
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_size'] = total_symbols * SYM_SIZE
sections[i].header['sh_addr'] = 0
# The sh_info section points to the first non local symbol in the symbol table.
# In order to get this we take the section symbols (which are local) and all
# local symbols we found earlier and subtract that from the rodata entries that may be
# present.
sections[i].header['sh_info'] = len(section_symbols) + (len(symbols['LOCAL']) - rodata_entries)
sections[i].header['sh_link'] = [j for j,v in enumerate(sections) if v.name == '.strtab'][0]
sections[i].header['sh_offset'] = OFFSET
sections[i].header['sh_addralign'] = 4
OFFSET += sections[i].header['sh_size'] # Update offset for next section header
if DEBUG:
println("%s size: %d Section Offset: %d Total Symbols: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset'], total_symbols))
println("OFFSET: %d" % OFFSET)
elif section.name == '.strtab':
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_size'] = len(strtab)
sections[i].header['sh_offset'] = OFFSET
sections[i].header['sh_addr'] = 0
sections[i].header['sh_addralign'] = 1
OFFSET += sections[i].header['sh_size']
if DEBUG:
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
println("OFFSET: %d" % OFFSET)
elif section.name == '.shstrtab':
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_size'] = len(shstrtab)
sections[i].header['sh_offset'] = OFFSET
sections[i].header['sh_addr'] = 0
sections[i].header['sh_addralign'] = 1
OFFSET += sections[i].header['sh_size']
if DEBUG:
println("shstrtab: %s length: %d" % (shstrtab, len(shstrtab)))
println("section %s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
println("OFFSET: %d" % OFFSET)
elif section.name == '.rodata':
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_size'] = len(rodata_bytes) # Size will be sum of length all strings in rodata section
sections[i].header['sh_offset'] = OFFSET
sections[i].header['sh_addr'] = 0
sections[i].header['sh_addralign'] = 4
OFFSET += sections[i].header['sh_size']
if DEBUG:
println("rodata: %s length: %d" % (rodata_bytes, len(rodata_bytes)))
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
println("OFFSET: %d" % OFFSET)
elif section.name == '.data':
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_size'] = get_section_size(section.name) # Size will be sum of all data element's size (i.e. sizeof (int) + sizeof(char) + sizeof(int), assuming there are two integers and a char in the .data section)
sections[i].header['sh_offset'] = OFFSET
sections[i].header['sh_addr'] = 0
sections[i].header['sh_addralign'] = 4
OFFSET += sections[i].header['sh_size']
if DEBUG:
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
println("OFFSET: %d" % OFFSET)
elif section.name == '.bss':
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_size'] = get_section_size(section.name) # Size will be sum of all data element's size (i.e. sizeof (int) + sizeof(char) + sizeof(int), assuming there are two integers and a char in the .data section)
sections[i].header['sh_offset'] = OFFSET
sections[i].header['sh_addr'] = 0
sections[i].header['sh_addralign'] = 1
if DEBUG:
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
println("OFFSET: %d" % OFFSET)
# The bss section doesn't actually take up any space in the ELF file, so we don't need to update the
# OFFSET with the size of this section.
else:
if DEBUG:
println("Section name in modifying headers: %s" % section.name)
println("Section size: %d" % section.header['sh_size'])
sections[i].header['sh_name'] = shstrtab.find(section.name)
sections[i].header['sh_offset'] = 0 if section.name == '' else OFFSET
sections[i].header['sh_addr'] = 0
OFFSET += sections[i].header['sh_size']
if DEBUG:
println("%s size: %d Section Offset: %d" % (section.name, sections[i].header['sh_size'], sections[i].header['sh_offset']))
println("OFFSET: %d" % OFFSET)
if DEBUG:
println("----------------------------- Done modifying section headers --------------------------------")
"""
This function performs the first pass to gather the sections that will be modified
and exported. Loop through each section and append it to a list of sections if
the section name is not equal to:
.init*
.fini*
.eh_frame*
.stab*
.debug*
.noinit*
These sections aren't necessary for an object file so they're ignored
This function also grabs the symbol table, the section header string table,
and the string table
@param elffile: The ELFfile object
"""
def first_pass():
global sections, NUM_SECTIONS, shstrtab, elffile
# Open the program file
# In Python getProgramFile() returns a java.io.File object :/
# so we need to add getPath() to get the full path including the filename
try:
f = open(getProgramFile().getPath(), 'rb')
except AttributeError as e:
selectedFile = askFile("Select Executable File", "Open")
println("Path: %s" % selectedFile)
f = open(selectedFile.getPath(), 'rb')
# Create ELF object
elffile = ELFFile(f)
# Holds the element the relocation resides in the sections list
# If a relocation section is found (i.e. .plt* or .rel*) the
# RELOC_ELEM global variable will be set to the value of reloc_elem
# Otherwise, RELOC_ELEM remains negative one which is an indicator
# the ELF file has no relocation section available and one must
# be created
reloc_elem = 0
# Get all sections in ELF file excluding those listed in this functions's description
section_names = ''
for sec in elffile.iter_sections():
reloc_elem += 1
res = [ele for ele in exclude_sections if (ele in sec.name)]
if bool(res):
continue
# Build section header string table section excluding the NULL Section
if not sec.name == '':
shstrtab += sec.name + '\x00'
sections.append(sec)
# Some required sections might not be in the binary. Let's check for those and add them
for missing_section in required_sections:
if missing_section not in shstrtab:
println("We're missing a section %s! Adding it in" % missing_section)
println("s: %s" % missing_section)
if missing_section == '.rel.text':
shstrtab = shstrtab.replace('.text', '.rel.text')
else:
# Adding missing section to the section header string table
shstrtab += missing_section + '\x00'
# Copying the .text section to modify it to conform to a .rel.text section
sec = copy.deepcopy(sections[[i for i,v in enumerate(sections) if v.name == '.text'][0]])
# Insert this section into the list of sections
sec.name = missing_section
sections.insert([i for i,v in enumerate(sections) if v.name == '.text'][0] + 1, sec)
NUM_SECTIONS = len(sections) # Adding one because we're adding a relocation section
if DEBUG:
println("Number of sections: %d" % NUM_SECTIONS)
print("SHSTRTAB: ", shstrtab)
"""
This function writes the data of a section to the file.
@param obj_file: A file descriptor representing the object file being created
@param moduleBytes: The bytes in the module. This is retrieved in the output_obj_file
function
@param section: The section being written
"""
def write_section(obj_file, section):
global section_symbols
if DEBUG:
println("Writing section: %s!" % section.name)
if section.name == '.text':
obj_file.write(moduleBytes)
elif section.name == '.rel.text':
# print(relocations)
# print(sorted(relocations))
for rel in relocations.keys():
# Write offset
obj_file.write(to_bytes(int(relocations[rel]['offset'], 16), 4, 'little'))
# Write info
obj_file.write(to_bytes(relocations[rel]['info'], 4, 'little'))
elif section.name == '.symtab':
# null, .text, .data, .rodata, and .bss should all have entries in the symbol table
st_name = 0
st_value = 0
st_size = 0
st_info = 0
st_other = 0
st_shndx = 0
for l in section_symbols:
if l == 'null':
# Write the NULL symbol
if DEBUG:
println("Writing the null section symbol")
obj_file.write(to_bytes(st_name, 4, 'little'))
obj_file.write(to_bytes(st_value, 4, 'little'))
obj_file.write(to_bytes(st_size, 4, 'little'))
obj_file.write(to_bytes(st_info, 1, 'little'))
obj_file.write(to_bytes(st_other, 1, 'little'))
obj_file.write(to_bytes(st_shndx, 2, 'little'))
else:
if DEBUG:
println("Writing the %s section symbol" % l)
# Write the symbol for a section
bind = ENUM_ST_INFO_BIND['STB_LOCAL']
info_type = ENUM_ST_INFO_TYPE['STT_SECTION']
# The calculation for st_info was taken from https://refspecs.linuxfoundation.org/elf/elf.pdf
st_info = (((bind)<<4) + ((info_type)&0xf))
st_shndx = [i for i,v in enumerate(sections) if v.name == l][0]
obj_file.write(to_bytes(st_name, 4, 'little'))
obj_file.write(to_bytes(st_value, 4, 'little'))
obj_file.write(to_bytes(st_size, 4, 'little'))
obj_file.write(to_bytes(st_info, 1, 'little'))
obj_file.write(to_bytes(st_other, 1, 'little'))
obj_file.write(to_bytes(st_shndx, 2, 'little'))
for s in symbols:
if s == 'GLOBAL':
st_value = 0
for sym in symbols[s]:
if sym.name == '':
continue
# Write st_name
st_name = strtab.find(sym.name + '\x00')
obj_file.write(to_bytes(st_name, 4, 'little'))
if isInternalSymbol(sym):
st_value = write_section.offset
bind = ENUM_ST_INFO_BIND['STB_GLOBAL']
info_type = ENUM_ST_INFO_TYPE['STT_FUNC']
# The calculation for st_info was taken from https://refspecs.linuxfoundation.org/elf/elf.pdf
st_info = (((bind)<<4) + ((info_type)&0xf))
f = getFunctionAt(sym.getAddress())
body = f.getBody()
st_size = len(getBytes(body.getMinAddress(), body.getMaxAddress().subtract(body.getMinAddress()) + 1))
st_shndx = [i for i,v in enumerate(sections) if v.name == '.text'][0]
else:
bind = ENUM_ST_INFO_BIND['STB_GLOBAL']
info_type = ENUM_ST_INFO_TYPE['STT_NOTYPE']
st_info = (((bind)<<4) + ((info_type)&0xf))
st_name = strtab.find(sym.name + '\x00')
st_shndx = 0
st_size = 0
st_value = 0
# Write st_value
# st_value is essentially an offset from the first symbol onward
if DEBUG:
println("Global symbol name: %s value: %d" % (sym.name, st_value))
obj_file.write(to_bytes(st_value, 4, 'little'))
write_section.offset += st_size
# Write st_size
obj_file.write(to_bytes(st_size, 4, 'little'))
# Write st_info
obj_file.write(to_bytes(st_info, 1, 'little'))
# Write st_other
# currently holds 0 and has no defined meaning
obj_file.write(to_bytes(st_other, 1, 'little'))
# Write st_shndx
# Which section is this symbol defined?
obj_file.write(to_bytes(st_shndx, 2, 'little'))
else:
data_st_value = 0
bss_st_value = 0
for sym in symbols[s]:
try:
if sym.name == '':
continue
if DEBUG:
println("Local Symbol name: %s" % sym.name)
println("Local Symbol Address: %s" % str(sym.getAddress()))
bind = ENUM_ST_INFO_BIND['STB_LOCAL']
if bss_fragment.contains(sym.getAddress()):
info_type = ENUM_ST_INFO_TYPE['STT_OBJECT']
st_info = (((bind)<<4) + ((info_type)&0xf))
st_shndx = [i for i,v in enumerate(sections) if v.name == '.bss'][0]
else:
info_type = ENUM_ST_INFO_TYPE['STT_OBJECT']
st_info = (((bind)<<4) + ((info_type)&0xf))
st_shndx = [i for i,v in enumerate(sections) if v.name == '.data'][0]
st_name = strtab.find(sym.name + '\x00')
if DEBUG:
println("st_name: %d" % st_name)
println("st_shndx: %s" % st_shndx)
dat = getDataAt(sym.getAddress())
st_size = dat.getLength()
#write_section.offset += st_size
except AttributeError as e:
if DEBUG:
println("This is not a symbol")
continue
# Write st_name
obj_file.write(to_bytes(st_name, 4, 'little'))
if bss_fragment.contains(sym.getAddress()):
# Write st_value
obj_file.write(to_bytes(bss_st_value, 4, 'little'))
bss_st_value += st_size
else:
# Write st_value
obj_file.write(to_bytes(data_st_value, 4, 'little'))
data_st_value += st_size
# print(sym.name, " st_value ", 0)
# Write st_size
obj_file.write(to_bytes(st_size, 4, 'little'))
# Write st_info
obj_file.write(to_bytes(st_info, 1, 'little'))
# Write st_other
# currently holds 0 and has no defined meaning
obj_file.write(to_bytes(st_other, 1, 'little'))
# Write st_shndx
# Which section is this symbol defined?
# Because this is an external symbol the section is undefined or 0
obj_file.write(to_bytes(st_shndx, 2, 'little'))
elif section.name == '.strtab':
obj_file.write(strtab)
if DEBUG:
println("Wrote strtab section %d bytes" % len(strtab))
elif section.name == '.shstrtab':
obj_file.write(shstrtab)
if DEBUG:
println("Wrote shstrtab section %d bytes" % len(shstrtab))
elif section.name == '.rodata':
if DEBUG:
println("Writing %d bytes" % len(rodata_bytes))
obj_file.write(rodata_bytes)
elif section.name == '.data':
for sym in symbols['LOCAL']:
if data_fragment.contains(sym.getAddress()):
dat = getDataAt(sym.getAddress())
if dat.getLength() % alignment != 0:
diff = (alignment - (dat.getLength() % alignment))
size = diff + dat.getLength()
else:
size = dat.getLength()
if DEBUG:
println("Symbol: %s" % sym.getName())
println("Symbol Address: %s" % sym.getAddress())
println("Data Address: %s" % dat.getAddress())
println("Writing %d bytes" % size)
println("Data Type: %s" % dat.getDataType().getDisplayName())
println("TESTING: %s" % dat.getValue())
println("Data Bytes: %s" % getBytes(dat.getAddress(), dat.getLength()))
#obj_file.write(to_bytes(dat.getValue().getValue(), size, 'little'))
obj_file.write(getBytes(dat.getAddress(), size))
elif section.name == '.bss':
# The .bss section doesn't actually take up any bytes in the object file so no need to write anything :)
pass
else:
sec = elffile.get_section_by_name(section.name)
obj_file.write(sec.data())
if DEBUG:
println("Wrote %d amount" % len(sec.data()))
"""
This function is a wrapper that calls the other functions necessary for unlinking the object file.
The function creates the name of the resulting unlinked object file and opens the current program executable
so pyelftools can process it. Pyelftools is used to grab the sections in the executable so they can
be modified by other functions. The actual data is modified and written by other helper functions. The object
is created by first gathering all sections we'll want to modify and export, writing the ELF header, writing the
individual sections, and finally, writing the section headers. This is a bit different than how a compiler would
structure an object file, but the order does not matter so long as the linker knows where to find everything. In
other words, we have to make sure the offsets in the ELF and section headers are accurate. Because of this constraint,
it was easiest to create the file in this manner because each section being written can be used by a later portion
being written.
@param moduleBytes: Bytes of the module selected by the user
"""
def unlink_object_file():
# The resulting object file
# TODO: Update to use Ghidra API to get filename
# fname = moduleName + '_unlinked.o'
# path = currentProgram.getExecutablePath()
# fname = path[:path.rindex('/')+1] + fname
fname = output_arg + '/' + moduleName + '_unlinked.o'
println("File name: %s" % fname)
# fname = fname.getPath()
obj_file = open(fname, 'wb')
# Modify the section headers
modify_section_headers()
# The section headers have been modified, let's write the ELF headers to a file
write_elf_header(obj_file)
# Let's now write the section
for sec in sections:
write_section(obj_file, sec)
# Finally let's write the section headers
for sec in sections:
write_section_headers(obj_file, sec)
# The unlinked object file is now complete, close the file
obj_file.close()
"""
This function updates the module bytes at the given offset with the specific bytes
"""
def update_module_bytes(offset, newBytes):
global moduleBytes
moduleBytes[offset:offset+4] = newBytes
"""
This function grabs the text section for the module by getting the bytes between boundary1 and boundary2
"""
def get_module_bytes():
global moduleBytes
try:
# Added one at the end because a byte was being cut off if I just subtracted maxModuleAddress from minModuleAddress
# Would like to try and find a more elegant way but this works for now
moduleBytes = getBytes(minModuleAddress, (maxModuleAddress.subtract(minModuleAddress) + 1))
except MemoryAccessException:
# TODO Auto-generated catch block
println("Problem getting bytes within module... ")
e.printStackTrace()
"""
This function places all of the relocations in a relocation dictionary. Additionally, it modifies
the original bytes such that they are consistent with what a normal compiler would do. For example,
for external function calls, the compiler places the following bytes at the address where the
external function was called: FF FF FE EB, where EB is the call instruction and the other three bytes
are dummy values. The string table consists of all the symbol names (i.e. function/variable names).
This "table" is constructed here and later used for writing the strtab section.
"""
def get_relocations():
global relocations, NUM_RELOCS, sections, strtab, moduleBytes, total_symbols, rodata_bytes
bss_offset = 0
data_offset = 0
rodata_offset = 0
last_local_symbol = 0
newBytes = b'\xff\xff\xfe\xeb'
boundary1, boundary2 = minModuleAddress, maxModuleAddress
# This attempts to gather most relocations.
# Iterate through the dictionary of symbols
for key, lst in symbols.items():
# Iterate through each symbol
for sym in lst:
if DEBUG:
println("-----------------------------------")
if not rodata_fragment.contains(sym.getAddress()):
strtab.append(sym.getName())
if DEBUG:
println("Not a rodata symbol Symbol Name: %s" % sym.getName())
symbol_type = sym.getSymbolType().toString()
# Does this symbol have any references
if sym.hasReferences() or sym.hasMultipleReferences():
symRef = sym.getReferences()
if DEBUG:
println("Symbol has references...")
# Iterate through each symbols references
for ref in symRef:
# println("Reference address: %s" % ref.getFromAddress())
if ref.getFromAddress().toString() == 'Entry Point':
continue
# Is the reference within the boundary? If so, a relocation should occur
if isInternalSymbol(ref):
if DEBUG:
println("Reference is within module boundary")
# Here, the offset is the offset the relocation should occur
# That is, the address where the symbol is referenced minus
# the lower bound of the module
offset = ref.getFromAddress().subtract(boundary1)
# key for relocation will be relocation<NUM>
key = "relocation" + str(NUM_RELOCS)
# Create the info field in the relocation by appending a number
# and a hexadecimal representation of the R_ARM_CALL enumerated data type
# The info field in the relocation table is composed of the index into
# the symbol table followed by the type of relocation (i.e. R_ARM_CALL)
# The [2:] is necessary so we only grab the numbers and omit the '0x'
# TODO: modify this so that it checks what type of symbol this is
# The type of symbol will change the info field since this will be a
# different type of relocation
func_call = ENUM_RELOC_TYPE_ARM['R_ARM_CALL']
data_reloc = ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']
if symbol_type == 'Function':
if DEBUG:
println("Symbol is a function")
NUM_RELOCS += 1
if len(relocations.values()) >= 1:
# If the symbol already exists set the info field to match the other relocation
# entry for the same symbol
for d in relocations.values():
if d['name'] == sym.getName():
info = d['info']
else:
reloc_type = func_call
t = hex(reloc_type)
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
info = hex(total_symbols)[2:] + reloc_type
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
else:
reloc_type = func_call
t = hex(reloc_type)
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
info = hex(total_symbols)[2:] + reloc_type
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
else:
# The symbol is not a function
if DEBUG:
println("Symbol is not a function")
if ref.getReferenceType().toString() != 'DATA':
continue
NUM_RELOCS += 1
dat = getDataAt(sym.getAddress())
if DEBUG:
println("Symbol: %s Address: %s" % (sym.getName(), sym.getAddress()))
println("Value: %s Length: %s" % (dat.getValue(), dat.getLength()))
if len(relocations.values()) >= 1:
# If the symbol already exists set the info field to match the other relocation
# entry for the same symbol
for d in relocations.values():
if d['name'] == sym.getName():
info = d['info']
else:
reloc_type = data_reloc
t = hex(reloc_type)
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
if bss_fragment.contains(sym.getAddress()):
info = str(4)
else:
info = str(3)
info += reloc_type
if DEBUG:
println("Info bss or data: %s" % info)
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
else:
reloc_type = data_reloc
t = hex(reloc_type)
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
if bss_fragment.contains(sym.getAddress()):
info = str(4)
else:
info = str(3)
info += reloc_type
if DEBUG:
println("Info bss or data: %s" % info)
info = int(info, 16) # Change into an integer so we can call to_bytes on it later
relocations[key] = {'name': sym.getName(), 'offset': hex(offset).lstrip("0x").rstrip("L"), 'addr': ref.getFromAddress(), 'info': info}
# Modify the bytes at the offset with the bytes \xfe\xff\xff\xeb
if symbol_type == 'Function':
#moduleBytes[offset:offset+4] = newBytes
update_module_bytes(offset, newBytes)
else:
if bss_fragment.contains(sym.getAddress()):
#moduleBytes[offset:offset+4] = to_bytes(bss_offset, 4, 'little')
update_module_bytes(offset, to_bytes(bss_offset, 4, 'little'))
bss_offset += 4
else:
#moduleBytes[offset:offset+4] = to_bytes(data_offset, 4, 'little')
update_module_bytes(offset, to_bytes(data_offset, 4, 'little'))
data_offset += 4
total_symbols += 1
# Probably not best to handle this as an exception but this is the case where we have to perform
# a relocation on the rodata section. These aren't symbols like the other elements in the symbols
# dictionary so we have to treat them a little differently
else:
if DEBUG:
println("This is in the rodata section")
refs = sym.getReferenceIteratorTo()
for ref in refs:
# if ref.getFromAddress().compareTo(minModuleAddress) >= 0 and \
# ref.getFromAddress().compareTo(maxModuleAddress) <= 0:
if isInternalSymbol(ref):
if ref.getReferenceType().toString() != 'DATA':
continue
# This is a string that's too small to be recognized by Ghidra and the
# analyst didn't manually type it as a string
short_str = False
if 'undefined' in sym.getDataType().getDisplayName():
short_str = True
str_array = bytearray()
str_array.append(getByte(sym.getAddress()))
n = sym.getAddress().next()
while getByte(n) != 0:
str_array.append(getByte(n))
n = n.next()
if DEBUG:
println("str_array: %s" % str_array)
rodata_bytes += str_array
offset = ref.getFromAddress().subtract(boundary1)
# The original bytes written here is the offset into the rodata section
# The offset is updated later on
update_module_bytes(offset, to_bytes(rodata_offset, 4, 'little'))
info = str(2) # Would like to get away from hard coding this
reloc_type = data_reloc
t = hex(reloc_type)
reloc_type = t[2:] if len(t) == 4 else '0' + hex(reloc_type)[2:]
info += reloc_type
info = int(info, 16)
if short_str:
key = "relocation" + str(NUM_RELOCS)
NUM_RELOCS += 1
relocations[key] = {'name': str_array, 'offset': hex(offset).lstrip("0x").rstrip("L"), 'addr': ref.getFromAddress(), 'info': info}
rodata_offset += len(str_array)
else:
# Using getBytes will allow us to get all the bytes from the start address to the end
# We add one to the length to include the null byte at the end of the byte array. We
# add 1 yet again because after each entry in the rodata section, there's a null byte
# dividing each entry in the rodata section. So the offset will be the length of the
# current string plus that extra null byte and that's where the next
if isinstance(sym.getValue(), unicode) or isinstance(sym.getValue(), str):
key = "relocation" + str(NUM_RELOCS)
NUM_RELOCS += 1
relocations[key] = {'name': sym.getValue(), 'offset': hex(offset).lstrip("0x").rstrip("L"), 'addr': ref.getFromAddress(), 'info': info}
newString = getBytes(sym.getAddress(), len(sym.getValue()) + 1)
if DEBUG:
println("newString Data: %s" % newString)
println("newString length BEFORE: %d" % len(newString))
if len(newString) % alignment != 0:
if DEBUG:
println("This string is not 4 byte aligned. Need to add some bytes")
diff = len(newString) % alignment
padding = alignment - diff
pad = to_bytes(0, padding, 'little')
if DEBUG:
println("Length of pad: %d" % len(pad))
println("Pad bytes: %s" % pad)
println("Type of element: %s" % type(newString[0]))
println("element: %s" % newString[0])
for s in pad:
newString.append(ord(s))
# newString.append(to_bytes(0, padding, 'little'))
# rodata_bytes += to_bytes(0, padding, 'little')
if DEBUG:
println("newString length AFTER: %d" % len(newString))
println("newString: %s" % newString)
rodata_bytes += (newString)
if DEBUG:
println("rodata_bytes: %s" % rodata_bytes)
rodata_offset += len(newString)
else:
if DEBUG:
println("Address: %s" % sym.getAddress())
println("Value: %s" % sym.getValue())
# rodata_offset += 4
# rodata_bytes += to_bytes(0, 4, 'little')
# println("Rodata offset: %s" % hex(rodata_offset))
if DEBUG:
println("-----------------------------------")
if DEBUG:
println("----------------------------------------------------------------------------------------------------------------------")
print(relocations)
# println("Total Symbols: %d " % total_symbols)
println("----------------------------------------------------------------------------------------------------------------------")
strtab = '\x00' + '\x00'.join(strtab) + '\x00'
if DEBUG:
print("Strtab: " , strtab)
# get script args
args = getScriptArgs()
module_arg = args[0] if len(args) > 0 else ""
output_arg = args[1] if len(args) > 1 else os.path.dirname(parseFile(getSourceFile().getCanonicalPath()).getPath())
print("ELF Output: %s %s" % (module_arg, output_arg))
# executable_arg = args[2] if len(args) > 2 else
write_section.offset = 0
if SelectModule(module_arg):
# Perform first pass of ELF file to gather all the sections we'll modify and export.
first_pass()
get_symbols()
get_module_bytes()
get_relocations()
unlink_object_file()