From 8880a668cf4d4e188b7f8ea6eba50bad4aca852e Mon Sep 17 00:00:00 2001 From: Maddie Stone Date: Sun, 18 Jun 2017 11:08:28 -0400 Subject: [PATCH] Initial Upload --- data_offset_calc.py | 121 +++++++++++++++++++++++++++++++++ define_code_functions.py | 92 +++++++++++++++++++++++++ define_data_as_types.py | 63 +++++++++++++++++ find_mem_accesses.py | 88 ++++++++++++++++++++++++ identify_port_use_locations.py | 73 ++++++++++++++++++++ label_funcs_with_no_xrefs.py | 78 +++++++++++++++++++++ make_strings.py | 67 ++++++++++++++++++ 7 files changed, 582 insertions(+) create mode 100644 data_offset_calc.py create mode 100644 define_code_functions.py create mode 100644 define_data_as_types.py create mode 100644 find_mem_accesses.py create mode 100644 identify_port_use_locations.py create mode 100644 label_funcs_with_no_xrefs.py create mode 100644 make_strings.py diff --git a/data_offset_calc.py b/data_offset_calc.py new file mode 100644 index 0000000..fd5b2c5 --- /dev/null +++ b/data_offset_calc.py @@ -0,0 +1,121 @@ +############################################################################################## +# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC +# All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +############################################################################################## +# data_offset_calc.py +# Resolves the references to indirect offsets of a variable, register, or memory location +# whose value is known. Changes the display of the operand in the instruction (OpAlt function), +# creates a data cross references (add_dref), and creates a comment of the resolved address +# (MakeComment). User nees to define the following: +# offset_var_string: The string representation of the variable, register, or memory +# location to be replaced by the resolved value +# offset_var_value: The value of the variable defined in offset_var_string +# reg_ex_indirect: A regular expression of how indirect offset accesses to the variable +# reg_ex_immediate: A regular expression of how the immediate offset value is represented +# new_opnd_display: A string representation of how the calculated and resolved +# value should be displayed as the operand in the instruction +# +# Inputs: start_addr: Start address for segment to define as data +# end_addr: End address for segment to define as data +# +############################################################################################## +import re + + +################### USER DEFINED VALUES ################### +# String of the variable/register/location used as the indirect variable +offset_var_str = "fp" + +# The defined offset_var_str's value +offset_var_value = 0x808000 + +# Regular expression for out offset_var_str is referenced indirectly in the IDA Disassembly +# @(-0x(1-8 hex chars), fp ) +reg_ex_indirect = re.compile(r"@\(-?0x[0-9A-Fa-f]{1,8}, "+ offset_var_str +"\)") + +# Regular expression for how immediate values are shown in the indirect reference +# For this example, it's 0x1044, but some architectures would show that as 1044h +regex_immediate = re.compile(r"0x[0-9A-Fa-f]{1,8}") + +# String expression for how the newly calculated instruction should be displayed within the instruction +new_opnd_display = '@[0x%x]' + +# OPTIONAL ---- EXAMPLE FOR ADDING OTHER INSTRUCTIONS TO THE PROCESSING +# If you'd like to add other instructions to be processed for resolving indirect offset accesses, +# update the regular expression here and use it as shown in the "else" block below +reg_ex_add3 = re.compile(r"add3 \w\w, fp, #-?0x[0-9A-Fa-f]{1,8}") +############################################################# + +start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be analyzed.") +end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be analyzed.") + +if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): + print "[data_offset_calc.py] STARTING. Looking for indirect accesses across 0x%x to 0x%x" % (start_addr, end_addr) + curr_addr = start_addr; + while curr_addr < end_addr: + operand = GetOpnd(curr_addr, 1) # Operand = 2nd Operand in the Instruction at curr_addr + if reg_ex_indirect.match(operand): + print ('[data_offset_calc.py] 0x%x Operand: ' % curr_addr) + operand + + # This checks if there are any immediate values also in the 2nd operand with the variable. For example, mov R3, @(0x10, fp) + offset = re.findall(regex_immediate, operand) + if (offset): + print "[data_offset_calc.py] 0x%x Offset: 0x%x" % (curr_addr, int(offset[0],16)) + + # Check if Immediate Operand is Neg or Pos + if '-' in operand : + new_opnd = offset_var_value - int(offset[0], 16) + else: + new_opnd = offset_var_value + int(offset[0], 16) + + print ("[data_offset_calc.py] 0x%x: Offset + " + offset_var_str + " = 0x%0x") % (curr_addr, new_opnd) + OpAlt(curr_addr, 1, new_opnd_display % new_opnd) # Changes Display of Instruction + result = add_dref(curr_addr, new_opnd, dr_T) # Create Data Ref -- Using dref_T because not checking if read or write + print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result) + # Using dr_O (O as in Offset, not 0) because we are not check if this a "write" or "read" + else: + print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr + + ##################################################################################### + # This block is optional but shows how to add additional regular expressions for other instructions + # you'd like to match besides the general indirect offset acceses. For M32R we are also matching + # the add3 instruction that take the form "add3 Reg, fp, 0xNUM" + + else: + instruct = GetDisasm(curr_addr) + if reg_ex_add3.match(instruct): + print ('[data_offset_calc.py] 0x%08x Instruct: ' % curr_addr) + instruct + immed_opnd = GetOpnd(curr_addr, 2) # Getting the 3rd Operand Based on the reg_ex_add3 defined above + offset = re.findall(regex_immediate, immed_opnd); + if offset: + if '-' in immed_opnd: + new_opnd = offset_var_value - int(offset[0], 16) + else: + new_opnd = offset_var_value + int(offset[0], 16) + print '[data_offset_calc.py] 0x%x: Offset + fp = 0x%08x' % (curr_addr, new_opnd) + MakeComm(curr_addr, '0x%08x' % new_opnd) # Add comment with new operand instead of overwriting instruction as done above + result = add_dref(curr_addr, new_opnd, dr_T) # Creates Data XREF from Instruct to Calculated Val + print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result) + else: + print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr + ######################################################################################## + prev = curr_addr + curr_addr = NextHead(curr_addr, 0xFFFFF) + if (curr_addr == BADADDR): + print "[data_offset_calc.py] EXITING." + break +else: + print "[data_offset_calc.py] QUITTING. Invalid values entered for starting and ending addresses." diff --git a/define_code_functions.py b/define_code_functions.py new file mode 100644 index 0000000..6d3ccee --- /dev/null +++ b/define_code_functions.py @@ -0,0 +1,92 @@ +############################################################################################## +# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC +# All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +############################################################################################## +# define_code_functions.py +# Attempts to define the bytes in the user-entered address range as code and then as functions +# based on the user-define smart_prolog and smart_epilog regular expressions for that architecture. +# +# Inputs: start_addr: Start address for segment to define as data +# end_addr: End address for segment to define as data +# data_type: Type of data to set segment to (dependent on architecture) +# +############################################################################################## +import re + +################### USER DEFINED VALUES ################### +# Enter a regular expression for how this architecture usually begins and ends functions. +# If the architecture does not dictate how to start or end a function use r".*" to allow +# for any instruction + +# 8051 Architecture Prologue and Epilogue +smart_prolog = re.compile(r".*") +smart_epilog = re.compile(r"reti{0,1}") + +# PIC18 Architecture Prologue and Epilogue +#smart_prolog = re.compile(r".*") +#smart_epilog = re.compile(r"return 0") + +# Mitsubishi M32R Architecutre Prologue and Epilogue +#smart_prolog = re.compile(r"push +lr") +#smart_epilog = re.compile(r"jmp +lr.*") + +# Texas Instruments TMS320C28x +#smart_prolog = re.compile(r".*") +#smart_epilog = re.compile(r"lretr") + +# AVR +#smart_prolog = re.compile(r"push +r") +#smart_epilog = re.compile(r"reti{0,1}") +############################################################ + +start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.") +end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.") + +if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR)): + do_make_unk = AskYN(0, "Do you want to make all of the code block UNKNOWN first?") + if (do_make_unk == 1): + curr_addr = start_addr + while (curr_addr < end_addr): + MakeUnkn(curr_addr,idc.DOUNK_SIMPLE) + curr_addr += 1 + if (do_make_unk != -1): + curr_addr = start_addr + print "[make_code_functions.py] Running script to define code and functions on 0x%x to 0x%x" % (start_addr, end_addr) + while (curr_addr < end_addr): + next_unexplored = FindUnexplored(curr_addr, idaapi.BIN_SEARCH_FORWARD) + MakeCode(next_unexplored) # We don't care whether it succeeds or fails so not storing retval + curr_addr = next_unexplored + + # Finished attempting to make all unexplored bytes into code + # Now, attempt to create functions of all code not currently in a function + print "[make_code_functions.py] Completed attempting to define bytes as code. Now trying to define functions." + curr_addr = start_addr + while (curr_addr != BADADDR and curr_addr < end_addr): + if (isCode(GetFlags(curr_addr)) and GetFunctionAttr(curr_addr, FUNCATTR_START) == BADADDR): + #print "Function Stuffs 0x%0x" % curr_addr + if(smart_prolog.match(GetDisasm(curr_addr)) or smart_epilog.match(GetDisasm(PrevHead(curr_addr)))): + #print "Smart Prolog match" + if (MakeFunction(curr_addr) != 0): + # MakeFunction(curr_addr) was successful so set curr_addr to next addr after the new function + curr_addr = GetFunctionAttr(curr_addr, FUNCATTR_END) # Returns first address AFTER the end of the function + continue + curr_addr = NextHead(curr_addr) +else: + print "[make_code_functions.py] Quitting. Entered address values are not valid." + + + + diff --git a/define_data_as_types.py b/define_data_as_types.py new file mode 100644 index 0000000..db9b56b --- /dev/null +++ b/define_data_as_types.py @@ -0,0 +1,63 @@ +############################################################################################## +# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC +# All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +############################################################################################## +# define_data_as_types.py +# Defines a segment of addresses as the user-specified data type (byte, word, or double word). +# The byte length for each of these types is architecture dependent, but generally: +# 1 byte = Byte +# 2 bytes = Word +# 4 bytes = Double Word +# This script with undefine all bytes in the range first which means if you previously had +# code or strings defined in the area, they will be overwritten as data. +# +# Inputs: start_addr: Start address for segment to define as data +# end_addr: End address for segment to define as data +# data_type: Type of data to set segment to (dependent on architecture) +# +############################################################################################## + +def define_as_data_by_size_for_block(start_addr, end_addr, data_size): + curr_addr = start_addr; + while (curr_addr < end_addr): + if (data_size == 1): + MakeByte(curr_addr) + elif (data_size == 2): + MakeWord(curr_addr) + elif (data_size == 4): + MakeDword(curr_addr) + else: + Warning("Invalid data_size. Breaking.") + break; + curr_addr += data_size + +start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.") +end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.") + +if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): + data_size = AskLong(1, "Enter the size of each data item to be defined in the address block.\nExample: '1' for byte, '2' for word, '4' for dword\nNote the exact implementation will be dependent on architecture.") + if (data_size == 1 or data_size == 2 or data_size == 4): + print ("[define_data_as_types.py] STARTING. start_addr: 0x%X, end_addr: 0x%X, data_size: %d" % (start_addr, end_addr, data_size)) + MakeUnknown(start_addr, (end_addr - start_addr), DOUNK_SIMPLE) + print "[define_data_as_types.py] Undefined all data between 0x%X and 0x%0X" % (start_addr, end_addr) + print "[define_data_as_types.py] Defining all data as size " + str(data_size) + define_as_data_by_size_for_block(start_addr, end_addr, data_size) + print "[define_data_as_types.py] FINISHED." + else: + Warning("[define_data_as_types.py] You entered a size of %d bytes. Please enter 1 (byte), 2 (short/word), 4(long, dword)"); + +else: + print "[define_data_as_types.py] ERROR. Please enter valid address values." \ No newline at end of file diff --git a/find_mem_accesses.py b/find_mem_accesses.py new file mode 100644 index 0000000..ed4c82a --- /dev/null +++ b/find_mem_accesses.py @@ -0,0 +1,88 @@ +############################################################################################## +# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC +# All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +############################################################################################## +# find_mem_accesses.py +# +# Identifies the memory accesses used in the code. When a memory access is identified based +# on the user contributed regular expression, this script completes three different actions +# to help with the static analysis: +# 1. A cross reference is created between the instruction and the memory address. This +# will fail if the address doesn't currently exist because the segment was not created. +# 2. The value at the memory address is retrieved and added as a comment to the +# referencing instruction. +# 3. A dictionary of all of the memory addresses accessed and the referencing instructions' +# addresses are printed and saved to a file. +# ** NOTE:If you are using a Harvard architecture, ensure you can distinguish between memory +# spaces or comment out the cross-reference and value parts of this script. +# +# Inputs: start_addr: Start address for segment to define as data +# end_addr: End address for segment to define as data +# file_name: File to write the accesses to +# +############################################################################################## +import re + +################### USER DEFINED VALUES ################### +# Enter a regular expression for the memory access instructions you'd like to identify. +# Also enter the index of the operand in the instruction so that it can be retrieved via +# the GetOperandValue() function. +# +# 8051 (movx DPTR, #addr) +regex_mem_instruct = re.compile(r"mov +DPTR, #") +operand_index = 1 +############################################################ + + +start_addr = AskAddr(MinEA(), "Please enter the starting address for the code to be analyzed.") +end_addr = AskAddr(MaxEA(), "Please enter the ending address for the code to be analyzed.") + +default_fn = "memory_use_locations.txt" +filename = AskFile(1, default_fn, "Please choose the location to save the memory accesses file.") + +accesses_dict = {} + +if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): + curr_addr = start_addr + while (curr_addr < end_addr): + if (regex_mem_instruct.match(GetDisasm(curr_addr))): + #mem_addr = regex_mem_addr.match(GetDisasm(curr_addr)) + mem_addr = GetOperandValue(curr_addr, operand_index) + print "[find_mem_accesses.py] Instruction Address: 0x%x Operand Address: 0x%0x" % (curr_addr, mem_addr) + # Create Cross-Reference to Address + result = add_dref(curr_addr, mem_addr, dr_T) + if (not result): + print "[find_mem_accesses.py] Could NOT create data cross-references." + else: + # Try to Get Value at Memory Address and Record at Reference + # Defaulting to WORD (2 bytes) can change or add other intelligence here + value = Word(mem_addr) + MakeComm(curr_addr, "@[0x%x] = 0x%x" % (mem_addr, value)) + if (mem_addr in accesses_dict): + accesses_dict[mem_addr].append(curr_addr) + else: + accesses_dict[mem_addr] = [curr_addr, ] + curr_addr = NextHead(curr_addr) + print "[find_mem_accesses.py] Finished searching range. Writing to file." + with open(filename, "w") as out_file: + for key in sorted(accesses_dict.keys()): + out_file.write("0x%0x: \n" % key) + for ref in accesses_dict[key]: + out_file.write("\t0x%0x \n" % ref) +else: + print "[find_mem_accesses.py] ERROR. Please enter valid addresses." + + \ No newline at end of file diff --git a/identify_port_use_locations.py b/identify_port_use_locations.py new file mode 100644 index 0000000..57be172 --- /dev/null +++ b/identify_port_use_locations.py @@ -0,0 +1,73 @@ +############################################################################################## +# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC +# All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +############################################################################################## +# identify_ port_use_locations.py +# Identifies all code using the CPU's ports and records the address and instruction +# in the identified file. +# +# User-Defined Input: +# ** Before use, edit the regex_pinref regular expression to match how the ports are displayed +# in instructions for your architecture. +# +############################################################################################## + + +################### USER DEFINED VALUES ################### +# PIC18F8722 +regex_pinref = re.compile(r" PORT[A-H]") + +# 87C52 (8051) - Ports referenced as FSR_80 (P0)...FSR_B0 (P3) +#regex_pinref = re.compile(r" FSR_[8-9A-Ba-b]0.?[0-7]?") + +# C515 (8051) - Ports referened as P1 or P1_8 +#regex_pinref = re.compile(r" P\d+\_?\d+") + +# M32R +#regex_pinref = re.compile(r" +############################################################ + + + +start_addr = AskAddr(MinEA(), "Please enter the starting address for the code to be analyzed.") +end_addr = AskAddr(MaxEA(), "Please enter the ending address for the code to be analyzed.") + +default_fn = "port_use_locations.txt" +filename = AskFile(1, default_fn, "Please choose the location to save the port use locations file.") + +change_func_nm = AskYN(0, "Would you like to append a prefix to the names of funcs using ports?") + +curr_addr = start_addr +func_name_out = False +with open(filename, "w") as out_file: + while curr_addr < end_addr: + if (isCode(GetFlags(curr_addr))): + instruct = GetDisasm(curr_addr); + #print ("0x%08x: " % curr_addr) + instruct + if regex_pinref.search(instruct): + out_file.write(("0x%08x: " % curr_addr) + instruct); + print ("0x%08x: " % curr_addr) + instruct + if (change_func_nm == 1): + func_start_addr = GetFunctionAttr(curr_addr, FUNCATTR_START) + if (func_start_addr != BADADDR): + curr_name = GetFunctionName(curr_addr) + if (curr_name != "" and not curr_name.startswith("pin")): + port_nums = regex_pinref.findall(instruct) + name = "pin" + port_nums[0] + "Used_" + curr_name + MakeName(func_start_addr, name) + curr_addr = NextHead(curr_addr) + + \ No newline at end of file diff --git a/label_funcs_with_no_xrefs.py b/label_funcs_with_no_xrefs.py new file mode 100644 index 0000000..04d4958 --- /dev/null +++ b/label_funcs_with_no_xrefs.py @@ -0,0 +1,78 @@ +############################################################################################## +# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC +# All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +############################################################################################## +# label_funcs_with_no_xrefs.py +# This script checks each defined function in the address range entered for cross-references. +# If there are no cross-references to the function, the prefix "noXrefs_" is added to the +# function's name. It then iterates through all functions in the code range again to identify +# all functions who's only code references are functions that have no cross-references. This +# is to detected functions called only by other functions who have no code references. +# This script helps to detect "dead code" that is never called. +# +# Inputs: start_addr: Start address for segment to define as data +# end_addr: End address for segment to define as data +# +############################################################################################## + +################### USER DEFINED VALUES ################### +# None needed. +########################################################### + +def addPrefixToFunctionName(prefix, functionAddr): + name = GetFunctionName(curr_addr) + if (name and not name.startswith(prefix)): + name = prefix + name + print ("[label_funcs_with_no_xrefs.py] Function 0x%x Name: " % curr_addr) + name + MakeName(curr_addr, name) + +start_addr = AskAddr(MinEA(), "Please enter the starting address for the functions to be examined.") +end_addr = AskAddr(MaxEA(), "Please enter the ending address for the functions to be examined.") + +if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): + print "[label_funcs_with_no_xrefs.py] Running on addresses 0x%x to 0x%x" % (start_addr, end_addr) + + # If start_addr is in a function, get the starting address of that function. Else, returns -1. + curr_addr = GetFunctionAttr(start_addr, FUNCATTR_START) # Get the function head for the "start" addr + if (curr_addr == BADADDR): + # start_addr is not currently in a function so select the beginning of the next function + curr_addr = NextFunction(start_addr) + + # Using this to continually iterate through all functions until no new functions + # having no code reference paths are found. + new_noXrefs_found = False + while (curr_addr != BADADDR and curr_addr < end_addr): + if (not GetFunctionName(curr_addr).startwith("noXrefs_")): + xrefs = XrefsTo(curr_addr) + has_valid_xref = False; + for x in xrefs: + if (not GetFunctionName(x.frm).startswith("noXrefs_")): + # Function has a valid cross-reference and is not "dead code" + has_valid_xref = True; + break; + if (has_valid_xref == False): + # No valid xrefs were found to this function + new_noXrefs_found = True + addPrefixToFunctionName("noXrefs_", curr_addr) + curr_addr = NextFunction(curr_addr) + if ((curr_addr == BADADDR or curr_addr >= end_addr) and new_noXrefs_found): + print "[label_funcs_with_no_xrefs.py] Iterating through range again because new functions with no Xrefs found." + curr_addr = start_addr + new_noXrefs_found = False + + print "[label_funcs_with_no_xrefs.py] FINISHED." +else: + print "[label_funcs_with_no_xrefs.py] QUITTING. Invalid address(es) entered." diff --git a/make_strings.py b/make_strings.py new file mode 100644 index 0000000..97f1094 --- /dev/null +++ b/make_strings.py @@ -0,0 +1,67 @@ +############################################################################################## +# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC +# All rights reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +############################################################################################## +# make_strings.py +# Searches the user entered address range for a series of ASCII bytes to define as strings. +# If the continuous series of ASCII bytes has a length greater or equal to minimum_length and +# ends with a character in string_end, the scripts undefines the bytes in the series +# and attempts to define it as a string. +# +# Input: start_addr: Start address for range to search for strings +# end_addr: End address for range to search for strings +# +############################################################################################## + +################### USER DEFINED VALUES ################### +min_length = 5 # Minimum number of characters needed to define a string +string_end = [0x00] # Possible "ending characters" for strings. A string will not be + # defined if it does not end with one of these characters +########################################################### + +start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be analyzed.") +end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be analyzed.") + +if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): + string_start = start_addr + print "[make_strings.py] STARTING. Attempting to make strings with a minimum length of %d on data in range 0x%x to 0x%x" % (min_length, start_addr, end_addr) + num_strings = 0; + while string_start < end_addr: + num_chars = 0 + curr_addr = string_start + while curr_addr < end_addr: + byte = Byte(curr_addr) + if ((byte < 0x7F and byte > 0x1F) or byte in (0x9, 0xD, 0xA)): # Determine if a byte is a "character" based on this ASCII range + num_chars += 1 + curr_addr += 1 + else: + if ((byte in string_end) and (num_chars >= min_length)): + MakeUnknown(string_start, curr_addr - string_start, DOUNK_SIMPLE) + if (MakeStr(string_start, curr_addr) == 1): + print "[make_strings.py] String created at 0x%x to 0x%x" % (string_start, curr_addr) + num_strings += 1 + string_start = curr_addr + break + else: + print "[make_strings.py] String create FAILED at 0x%x to 0x%x" % (string_start, curr_addr) + break + else: + # String does not end with one of the defined "ending characters", does not meet the minimum string length, or is not an ASCII character + break + string_start += 1 + print "[make_strings.py] FINISHED. Created %d strings in range 0x%x to 0x%x" % (num_strings, start_addr, end_addr) +else: + print "[make_strings.py] QUITTING. Entered address values not valid." \ No newline at end of file