From a82081611f92e1db6b66eca0ede51d5e4002b62d Mon Sep 17 00:00:00 2001 From: evm Date: Wed, 30 Oct 2019 14:44:19 -0400 Subject: [PATCH] Make LFA modules contiguous, source file names in .csv output for easier comparison between LFA and MC --- README | 11 ++ basicutils_6x.py | 316 ----------------------------------------------- cc_base.py | 15 ++- lfa.py | 15 +-- maxcut.py | 6 +- 5 files changed, 30 insertions(+), 333 deletions(-) delete mode 100644 basicutils_6x.py diff --git a/README b/README index 6242267..cee63da 100644 --- a/README +++ b/README @@ -86,6 +86,10 @@ A couple areas for research: - The portion of code that tries to name object files based on common strings is completely researchy and open ended. Lots of things to play with there. +**** MaxCut Parameters & Interpolation **** + + - The only real parameter for MaxCut is a THRESHOLD variable that corresponds to the size at which the algorithm will stop subdividing modules. A threshold of 4K (0x1000) seems to provide similar sized modules to LFA. A threshold of 8K (0x2000) seems to be a good upper bound. A good area of research would be making this not a static cutoff but maybe deciding to stop subdividing based on a connectedness measurement or something along those lines. + **** Output Files **** CodeCut produces 7 files: @@ -122,6 +126,13 @@ You can use sfdp to render the graph into a PNG file with a command line like: sfdp -x -Goverlap=scale -Tpng -Goutputorder=edgesfirst -Nstyle=filled -Nfillcolor=white _lfa_mod_graph.gv > .png +A really nice hierarchical graph can be obtained by adding: +ranksep=0 +nodesep=0 +to the .gv file and running: + +dot -x -Goverlap=scale -Tpng -Goutputorder=edgesfirst -Nstyle=filled -Nfillcolor=white .gv > .png + **** "Canonical" Names **** NOTE on IDA and Canonical Names: AFAICT IDA doesn't really have a concept of source file / object files in diff --git a/basicutils_6x.py b/basicutils_6x.py deleted file mode 100644 index 3070bc4..0000000 --- a/basicutils_6x.py +++ /dev/null @@ -1,316 +0,0 @@ -############################################################################################## -# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC -# All rights reserved. -# Permission is hereby granted, free of charge, to any person obtaining a copy of this -# software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, -# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -# OR OTHER DEALINGS IN THE SOFTWARE. -# -# HAVE A NICE DAY. - -# basicutils - a version-agnostic API for IDA Pro with some (slightly) higher level functionality -# This is the 6.x version - see basicutils_7x for the 7.x version - -import idc -import struct -import idautils -import re - -BADADDR = idc.BADADDR - -def SegByName(n): - start = idc.SegByBase(idc.SegByName(n)) - if (start != idc.BADADDR): - end = idc.SegEnd(start) - else: - start = idc.BADADDR - end = idc.BADADDR - return (start,end) - -def GetFunctionName(x): - return idc.GetFunctionName(x) - -def GetInputFile(): - return idc.GetInputFile() - -def NextFunction(x): - return idc.NextFunction(x) - -def PrevInstr(ea): - return idc.PrevHead(ea, ea-15) - -def ForEveryUniqXrefTo( target, fun ): - a = 0 - for xref in idautils.CodeRefsTo(target,0): - if idc.GetFunctionAttr(xref,idc.FUNCATTR_START) != a : - fun(xref) - a = idc.GetFunctionAttr(xref, idc.FUNCATTR_START); - -def ForEveryXrefTo( target, fun ): - for xref in idautils.CodeRefsTo(target,0): - fun(xref) - -def ForEveryUniqXrefToD( target, fun ): - a = 0 - for xref in idautils.CodeRefsTo(target,0): - if idc.GetFunctionAttr(xref,idc.FUNCATTR_START) != a : - fun(xref, target) - a = idc.GetFunctionAttr(xref, idc.FUNCATTR_START); - -def ForEveryXrefToD( target, fun ): - for xref in idautils.CodeRefsTo(target,0): - fun(xref, target) - -def ForEveryFuncInDb( fun ): - f = idc.NextFunction(0) - while (f != idc.BADADDR): - """print "ev: %#x" % f""" - fun(f) - f=idc.NextFunction(f) - -def NFuncUp( fun, n ) : - i=0 - f=fun - while ((i__
-#where and are in camel case. -#This is not ideal for a number of reasons but this is a workaround for now - -#Return just the "function name" part of the canonical name -def GetCanonicalName(f): - n = idc.GetFunctionName(f) - parts = n.split("_") - if len(parts) == 3: - return parts[1] - else: - return None - -#Put function in canonical format, given the function name and module name -def NameCanonical(f,mod_name,func_name): - n = "%s_%s_%08x" % (mod_name,func_name,f) - print "Renaming %s to %s\n" % (idc.GetFunctionName(f),n) - idc.MakeName(f,n) - -#Put function in canonical format when it doesn't have a name, but you know the module name -def RenameFuncWithAddr(f,s): - func_name = "unk" - NameCanonical(f,s,func_name) - -#Use this if you have pre-existing named functions in the DB that are in non-canonical format -def RenameRangeWithAddr(start,end,s): - x = start - while (x<=end): - n = idc.GetFunctionName(x) - if (n.startswith("sub_")): - RenameFuncWithAddr(x,s) - else: - NameCanonical(x,s,n) - x = idc.NextFunction(x) - -#Rename a function in canonical format without changing the module name -def CanonicalFuncRename(f,name): - n = idc.GetFunctionName(f) - parts = n.split("_") - new_name = "%s_%s_%08x" % (parts[0],name,f) - print "Renaming %s to %s\n" % (n, new_name) - idc.MakeName(f,new_name) - -#Rename the module name without changing the function name -def RenameFuncWithNewMod(f,mod): - n = idc.GetFunctionName(f) - parts = n.split("_") - new_name = "%s_%s_%08x" % (mod,parts[1],f) - print "Renaming %s to %s\n" % (n, new_name) - idc.MakeName(f,new_name) - -#Rename a module (all functions that start with _) -def RenameMod(orig, new): - i = idc.NextFunction(0) - while (i != idc.BADADDR): - n = idc.GetFunctionName(i) - if n.startswith(orig+"_"): - RenameFuncWithNewMod(i,new) - i = idc.NextFunction(i) - -#Just rename the module over a given range (can be used to split a module and give part a new name) -def RenameModRange(start, end, new): - x = start - while (x<=end): - n = idc.GetFunctionName(x) - RenameFuncWithNewMod(x,new) - x = idc.NextFunction(x) - -#Given a range of functions, some of which may have names and module names -# and a module name, put names in canonical format -def CanonicalizeRange(start,end,mod): - x = start - while (x<=end): - n = idc.GetFunctionName(x) - #if it already starts with mod name, assume it's canonical - if (not n.startswith(mod+"_")): - if (n.startswith("sub_")): - RenameFuncWithAddr(x,mod) - #this should be contains "_" - elif ("_" in n): - n = snakeToCamelCase(n) - NameCanonical(x,mod,n) - else: - NameCanonical(x,mod,n) - x = idc.NextFunction(x) - -#Returns a string that is the concatenation of all of the string references from a function, separated by -#Iterates through every item in function and looks for data references that are strings -def CompileTextFromFunction(f,sep): - s="" - faddr = list(idautils.FuncItems(f)) - for c in range(len(faddr)): - for d in idautils.DataRefsFrom(faddr[c]): - if idc.GetStringType(d) == 0 and idc.GetString(d): - s += " "+ sep + " " + idc.GetString(d) - return s - -#Returns a string which is the concatenation all of the string references -# for an address range in the program, separated by -#Similar to above, but iterates over the whole set of functions in the given range -def CompileTextFromRange(start,end,sep): - x = start - s = "" - while (x<=end): - #print "Function %x" % x - faddr = list(idautils.FuncItems(x)) - for c in range(len(faddr)): - for d in idautils.DataRefsFrom(faddr[c]): - #print "Found ref at %x" % faddr[c] - if idc.GetStringType(d) == 0 and idc.GetString(d): - s += " "+ sep + " " + idc.GetString(d) - elif idc.GetStringType(d) == 3 and idc.GetString(d, -1, idc.ASCSTR_UNICODE): - s += " " + sep + " " + idc.GetString(d,-1,idc.ASCSTR_UNICODE) - x = idc.NextFunction(x) - return s - -#Returns a string which is a concatenation of all the function names in the given range -# separated by -def CompileFuncNamesFromRangeAsText(start,end,sep): - x = start - s = "" - while (x<=end): - n = idc.GetFunctionName(x) - if (not n.startswith("sub_")): - s += " " + sep + " " + n - x = idc.NextFunction(x) - return s - - \ No newline at end of file diff --git a/cc_base.py b/cc_base.py index 6c32a6c..5b23110 100644 --- a/cc_base.py +++ b/cc_base.py @@ -17,6 +17,7 @@ # HAVE A NICE DAY. import basicutils_7x as basicutils +import modnaming ## CodeCut Basics ## A couple of functions for working with function and module lists and outputting results @@ -114,7 +115,8 @@ def gen_map_file(module_list, suffix): while (c0)): - print "get_last_3: %d,%d" % (c,i) + #print "get_last_3: %d,%d" % (c,i) if (g_function_list[i].lfa_skip == 0): p.append(g_function_list[i]) c+=1 @@ -225,7 +225,7 @@ def get_lfa_start(): c=0; i=0; while (c < 4): - print "get_lfa_start: %d,%d" % (c,i) + #print "get_lfa_start: %d,%d" % (c,i) if (g_function_list[i].lfa_skip==0): c+=1 i+=1 @@ -245,16 +245,11 @@ def edge_detect(): c=get_lfa_start() #do edge detection while (c THRESHOLD) and (nodes > 1): cut_address = make_cut(start, end,graph)