mirror of
https://github.com/JHUAPL/CodeCut.git
synced 2026-01-09 21:37:57 -05:00
Sometimes the executable is in a non-writable directory, but the IDB is always in a writable one.
366 lines
12 KiB
Python
366 lines
12 KiB
Python
##############################################################################################
|
|
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
|
|
# All rights reserved.
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
|
# software and associated documentation files (the "Software"), to deal in the Software
|
|
# without restriction, including without limitation the rights to use, copy, modify,
|
|
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
|
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
|
# OR OTHER DEALINGS IN THE SOFTWARE.
|
|
#
|
|
# HAVE A NICE DAY.
|
|
|
|
# basicutils - a version-agnostic API for IDA Pro with some (slightly) higher level functionality
|
|
# This is the 7.x version - see basicutils_6x for the 7.x version
|
|
import os
|
|
|
|
import ida_bytes
|
|
import ida_funcs
|
|
import ida_nalt
|
|
import ida_ua
|
|
import ida_name
|
|
import idc
|
|
import struct
|
|
import idautils
|
|
import ida_idaapi
|
|
import ida_segment
|
|
import re
|
|
|
|
BADADDR = ida_idaapi.BADADDR
|
|
|
|
def SegByName(n):
|
|
t = ida_segment.get_segm_by_name(n)
|
|
if (t and t.start_ea != ida_idaapi.BADADDR):
|
|
start = t.start_ea
|
|
end = t.end_ea
|
|
else:
|
|
start = ida_idaapi.BADADDR
|
|
end = ida_idaapi.BADADDR
|
|
return (start,end)
|
|
|
|
def GetFunctionName(x):
|
|
return idc.get_func_name(x)
|
|
|
|
def GetInputFile():
|
|
return idc.get_root_filename()
|
|
|
|
def GetIdbFile():
|
|
return idc.get_idb_path()
|
|
|
|
def GetRootName():
|
|
return os.path.join(os.path.dirname(GetIdbFile()), os.path.basename(GetInputFile()))
|
|
|
|
def NextFunction(x):
|
|
return idc.get_next_func(x)
|
|
|
|
def PrevFunction(x):
|
|
return idc.get_prev_func(x)
|
|
|
|
MAX_OPCODE_LEN = 15
|
|
def PrevInstr(ea):
|
|
# TODO this will return an inst_t type. Need to figure out how to populate it/make workflow happy
|
|
out=ida_ua.insn_t()
|
|
ida_ua.decode_prev_insn(out, ea)
|
|
return out.ea
|
|
|
|
def CodeRefsTo(target):
|
|
return idautils.CodeRefsTo(target,0)
|
|
|
|
def ForEveryUniqXrefTo( target, fun ):
|
|
a = 0
|
|
for xref in idautils.CodeRefsTo(target,0):
|
|
if idc.get_func_attr(xref,idc.FUNCATTR_START) != a :
|
|
fun(xref)
|
|
a = idc.get_func_attr(xref, idc.FUNCATTR_START);
|
|
|
|
def ForEveryXrefTo( target, fun ):
|
|
for xref in idautils.CodeRefsTo(target,0):
|
|
fun(xref)
|
|
|
|
def ForEveryUniqXrefToD( target, fun ):
|
|
a = 0
|
|
for xref in idautils.CodeRefsTo(target,0):
|
|
if idc.get_func_attr(xref,idc.FUNCATTR_START) != a :
|
|
fun(xref, target)
|
|
a = idc.get_func_attr(xref, idc.FUNCATTR_START);
|
|
|
|
def ForEveryXrefToD( target, fun ):
|
|
for xref in idautils.CodeRefsTo(target,0):
|
|
fun(xref, target)
|
|
|
|
def ForEveryFuncInDb( fun ):
|
|
f = NextFunction(0)
|
|
while (f != ida_idaapi.BADADDR):
|
|
"""print "ev: %#x" % f"""
|
|
fun(f)
|
|
f=NextFunction(f)
|
|
|
|
def ForEveryFuncInSeg( seg, fun ):
|
|
start,end = SegByName(".text")
|
|
if (start == BADADDR):
|
|
start = NextFunction(0)
|
|
end = BADADDR
|
|
f = start
|
|
while (f < end):
|
|
"""print "ev: %#x" % f"""
|
|
print f
|
|
fun(f)
|
|
f=NextFunction(f)
|
|
|
|
|
|
def NFuncUp( fun, n ) :
|
|
i=0
|
|
f=fun
|
|
while ((i<n) and (f!=ida_idaapi.BADADDR)):
|
|
f=PrevFunction(f)
|
|
i=i+1
|
|
return f
|
|
|
|
def NFuncDown( fun, n ) :
|
|
i=0
|
|
f=fun
|
|
while ((i<n) and (f!=ida_idaapi.BADADDR)):
|
|
f=NextFunction(f)
|
|
i=i+1
|
|
return f
|
|
|
|
def FuncMidPt( fun ):
|
|
fstart = idc.get_func_attr(fun, idc.FUNCATTR_START)
|
|
fend = idc.get_func_attr(fun, idc.FUNCATTR_END)
|
|
return fstart+((fend-fstart)/2)
|
|
|
|
|
|
def FuncXrefsFrom ( fun ) :
|
|
f = set()
|
|
for item in idautils.FuncItems(fun):
|
|
for x in idautils.CodeRefsFrom(item,0):
|
|
s = idc.get_func_attr(x, idc.FUNCATTR_START)
|
|
if (x == s):
|
|
f.add(x)
|
|
#print "func xrefs from"
|
|
#print f
|
|
return f
|
|
|
|
def XrefFromRange ( fun ) :
|
|
f = FuncXrefsFrom(fun)
|
|
if f:
|
|
return (min(f),max(f))
|
|
else:
|
|
return (0,0)
|
|
|
|
def ProgramAddrRange() :
|
|
return ida_funcs.get_prev_func(ida_idaapi.BADADDR) - ida_funcs.get_next_func(0)
|
|
|
|
def MemCopy( dest, src, length ) :
|
|
for i in xrange(0, length):
|
|
#if (i < 20):
|
|
# print "set byte at %#x to %#x" % (dest+i, idc.Byte(src+i))
|
|
ida_bytes.patch_byte(dest+i,ida_bytes.get_byte(src+i))
|
|
|
|
def PrefixRange(start, end, prefix) :
|
|
x = start
|
|
while x < end:
|
|
n = idc.get_func_name(x)
|
|
if n.startswith("sub_"):
|
|
nn = prefix + n
|
|
print "Renaming %s to %s\n" % (n, nn)
|
|
ida_name.set_name(x,nn)
|
|
x = NextFunction(x)
|
|
|
|
|
|
def snakeToCamelCase(s):
|
|
f = s.lstrip("_")
|
|
nf = ""
|
|
nx = 0
|
|
x=0
|
|
while (x<len(f)):
|
|
#print "%s" % (f[x])
|
|
if f[x] == '_':
|
|
nf+=(f[x+1].upper())
|
|
x+=2
|
|
else:
|
|
nf+=f[x]
|
|
x+=1
|
|
nx+=1
|
|
return nf
|
|
|
|
def isSnakeCase(s) :
|
|
p = re.compile("[a-zA-Z0-9]+(_[a-zA-Z0-9]+)+\Z")
|
|
if p.match(s):
|
|
return True
|
|
return False
|
|
|
|
#Todo - right now this is going to miss something like FooBARFunction
|
|
def isCamelCase(s) :
|
|
p = re.compile("([A-Z][a-z0-9]+)([A-Z][a-z0-9]+)+\Z")
|
|
if p.match(s):
|
|
return True
|
|
return False
|
|
|
|
#Todo - weed out if it's all uppercase or all uppercase and _, etc.
|
|
def isUCSnakeCase(s):
|
|
p = re.compile("[A-Z0-9]+(_[A-Z0-9]+)+\Z")
|
|
if p.match(s):
|
|
return True
|
|
return False
|
|
|
|
def isPlausibleFunction(s):
|
|
if isSnakeCase(s):
|
|
if isUCSnakeCase(s):
|
|
return False
|
|
return True
|
|
if isCamelCase(s):
|
|
return True
|
|
return False
|
|
|
|
def PrependStrToFuncName(f,s):
|
|
n = idc.get_func_name(f)
|
|
n = s + n
|
|
ida_name.set_name(f,n)
|
|
|
|
#The "canonical" name format (for now) is <module name>_<func name>_<address>
|
|
#where <module_name> and <func_name> are in camel case.
|
|
#This is not ideal for a number of reasons but this is a workaround for now
|
|
|
|
#Return just the "function name" part of the canonical name
|
|
def GetCanonicalName(f):
|
|
n = idc.get_func_name(f)
|
|
parts = n.split("_")
|
|
if len(parts) == 3:
|
|
return parts[1]
|
|
else:
|
|
return None
|
|
|
|
#Put function in canonical format, given the function name and module name
|
|
def NameCanonical(f,mod_name,func_name):
|
|
n = "%s_%s_%08x" % (mod_name,func_name,f)
|
|
print "Renaming %s to %s\n" % (idc.get_func_name(f),n)
|
|
ida_name.force_name(f,n)
|
|
|
|
#Put function in canonical format when it doesn't have a name, but you know the module name
|
|
def RenameFuncWithAddr(f,s):
|
|
func_name = "unk"
|
|
NameCanonical(f,s,func_name)
|
|
|
|
#Use this if you have pre-existing named functions in the DB that are in non-canonical format
|
|
def RenameRangeWithAddr(start,end,s):
|
|
x = start
|
|
while (x<=end):
|
|
n = idc.get_func_name(x)
|
|
if (n.startswith("sub_")):
|
|
RenameFuncWithAddr(x,s)
|
|
else:
|
|
NameCanonical(x,s,n)
|
|
x = NextFunction(x)
|
|
|
|
#Rename a function in canonical format without changing the module name
|
|
def CanonicalFuncRename(f,name):
|
|
n = idc.get_func_name(f)
|
|
parts = n.split("_")
|
|
new_name = "%s_%s_%08x" % (parts[0],name,f)
|
|
print "Renaming %s to %s\n" % (n, new_name)
|
|
ida_name.set_name(f,new_name)
|
|
|
|
#Rename the module name without changing the function name
|
|
def RenameFuncWithNewMod(f,mod):
|
|
n = idc.get_func_name(f)
|
|
parts = n.split("_")
|
|
new_name = "%s_%s_%08x" % (mod,parts[1],f)
|
|
print "Renaming %s to %s\n" % (n, new_name)
|
|
ida_name.set_name(f,new_name)
|
|
|
|
#Rename a module (all functions that start with <mod>_)
|
|
def RenameMod(orig, new):
|
|
i = idc.get_next_func(0)
|
|
while (i != BADADDR):
|
|
n = idc.get_func_name(i)
|
|
if n.startswith(orig+"_"):
|
|
RenameFuncWithNewMod(i,new)
|
|
i = NextFunction(i)
|
|
|
|
#Just rename the module over a given range (can be used to split a module and give part a new name)
|
|
def RenameModRange(start, end, new):
|
|
x = start
|
|
while (x<=end):
|
|
n = idc.get_func_name(x)
|
|
RenameFuncWithNewMod(x,new)
|
|
x = NextFunction(x)
|
|
|
|
#Given a range of functions, some of which may have names and module names
|
|
# and a module name, put names in canonical format
|
|
def CanonicalizeRange(start,end,mod):
|
|
x = start
|
|
while (x<=end):
|
|
n = idc.get_func_name(x)
|
|
#if it already starts with mod name, assume it's canonical
|
|
if (not n.startswith(mod+"_")):
|
|
if (n.startswith("sub_")):
|
|
RenameFuncWithAddr(x,mod)
|
|
#this should be contains "_"
|
|
elif ("_" in n):
|
|
n = snakeToCamelCase(n)
|
|
NameCanonical(x,mod,n)
|
|
else:
|
|
NameCanonical(x,mod,n)
|
|
x = NextFunction(x)
|
|
|
|
#Returns a string that is the concatenation of all of the string references from a function, separated by <sep>
|
|
#Iterates through every item in function and looks for data references that are strings
|
|
def CompileTextFromFunction(f,sep):
|
|
s=""
|
|
faddr = list(idautils.FuncItems(f))
|
|
for c in range(len(faddr)):
|
|
for d in idautils.DataRefsFrom(faddr[c]):
|
|
t = ida_nalt.get_str_type(d)
|
|
if ((t==0) or (t==3)):
|
|
s += " "+ sep + " " + idc.GetStrLitContents(d)
|
|
return s
|
|
|
|
#Returns a string which is the concatenation all of the string references
|
|
# for an address range in the program, separated by <sep>
|
|
#Similar to above, but iterates over the whole set of functions in the given range
|
|
def CompileTextFromRange(start,end,sep):
|
|
x = start
|
|
s = ""
|
|
while (x<=end):
|
|
faddr = list(idautils.FuncItems(x))
|
|
#print "items list: %d" % len(faddr)
|
|
for c in range(len(faddr)):
|
|
for d in idautils.DataRefsFrom(faddr[c]):
|
|
#print "Found ref at %x: %x " % (faddr[c],d)
|
|
t = ida_nalt.get_str_type(d)
|
|
if ((t==0) or (t==3)):
|
|
s += " " + sep + " " + GetStrLitContents(d)
|
|
x = NextFunction(x)
|
|
return s
|
|
|
|
#Returns a string which is a concatenation of all the function names in the given range
|
|
# separated by <sep>
|
|
def CompileFuncNamesFromRangeAsText(start,end,sep):
|
|
x = start
|
|
s = ""
|
|
while (x<=end):
|
|
n = idc.get_func_name(x)
|
|
if (not n.startswith("sub_")):
|
|
s += " " + sep + " " + n
|
|
x = NextFunction(x)
|
|
return s
|
|
|
|
#helper function which checks for both ASCII and Unicode strings at the given ea
|
|
def GetStrLitContents(ea):
|
|
potential_len = ida_bytes.get_max_strlit_length(ea, ida_nalt.STRTYPE_C_16)
|
|
if(potential_len > 0):
|
|
# If we get a non zero length, this is likely our string
|
|
return ida_bytes.get_strlit_contents(ea, potential_len, ida_nalt.STRTYPE_C_16)
|
|
# If we didn't get a good length out of C_16, try 8 bit strings
|
|
potential_len = ida_bytes.get_max_strlit_length(ea, ida_nalt.STRTYPE_C)
|
|
if(potential_len > 0):
|
|
return ida_bytes.get_strlit_contents(ea, potential_len, ida_nalt.STRTYPE_C)
|
|
#print("Error! %lu not a string" % (ea))
|
|
return "" |