mirror of
https://github.com/JHUAPL/CodeCut.git
synced 2026-01-09 13:28:06 -05:00
344 lines
10 KiB
Python
Executable File
344 lines
10 KiB
Python
Executable File
#!/usr/bin/python
|
|
|
|
##############################################################################################
|
|
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
|
|
# All rights reserved.
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
|
# software and associated documentation files (the "Software"), to deal in the Software
|
|
# without restriction, including without limitation the rights to use, copy, modify,
|
|
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
|
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
|
# OR OTHER DEALINGS IN THE SOFTWARE.
|
|
#
|
|
# HAVE A NICE DAY.
|
|
|
|
import sys
|
|
|
|
#Syntax: map_read.py <ground truth .map file> <LFA produced .map file>
|
|
#Reads the two map files and outputs a score
|
|
#Score is % overlap, % underlap, and % gap (the sum of which should be 100%)
|
|
|
|
#Raw list of modules
|
|
g_mod_list1 = []
|
|
g_mod_list2 = []
|
|
#"Reconciled" module list - after modules have been combined to represent best alignment
|
|
g_rec_list1 = []
|
|
g_rec_list2 = []
|
|
|
|
#name
|
|
#offset - starting address of the module
|
|
#mlen - length of the module
|
|
#reach - end address of the module (offset + mlen)
|
|
#gap - when collapsing two modules,
|
|
|
|
class bin_mod:
|
|
def __init__(self, n, o, ml):
|
|
self.name = n
|
|
self.offset = o
|
|
self.mlen = ml
|
|
self.reach = o+ml
|
|
self.gap = 0
|
|
|
|
#map_parse(function, mlist):
|
|
#Parse a gcc/ld formatted .map file
|
|
# (mlist == 1): ground truth map, saved to g_mod_list1
|
|
# (mlist == 2): LFA map, saved to g_mod_list2
|
|
def map_parse(f,mlist):
|
|
global g_mod_list1
|
|
global g_mod_list2
|
|
|
|
line = f.readline()
|
|
prev_name = ""
|
|
while (line != ""):
|
|
#print "line %s" % line
|
|
if (not line.startswith(" .text") or (len(line) < 17)):
|
|
line = f.readline()
|
|
continue
|
|
#line wrap case
|
|
if not ((line[16] == '0') and (line[17] == 'x')):
|
|
seg = line.strip()
|
|
line = f.readline()
|
|
else:
|
|
seg = line[0:15].strip()
|
|
offset = int(line[16:34],16)
|
|
mlen = int(line[35:45].strip(),16)
|
|
name = line[46:].strip()
|
|
|
|
#print "%s\n%s\n%s\n%s\n"% (line[0:15],line[16:33],line[34:45],line[46:])
|
|
|
|
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,offset,mlen,name)
|
|
|
|
if (offset == 0) or (mlen == 0):
|
|
line = f.readline()
|
|
continue
|
|
|
|
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,offset,mlen,name)
|
|
if (name == prev_name):
|
|
#print "Combining"
|
|
if (mlist == 1):
|
|
new_reach = offset+mlen
|
|
begin = g_mod_list1[-1].offset
|
|
new_len = new_reach-begin
|
|
g_mod_list1[-1].mlen = new_len
|
|
g_mod_list1[-1].reach = new_reach
|
|
else:
|
|
new_reach = offset+mlen
|
|
begin = g_mod_list2[-1].offset
|
|
new_len = new_reach-begin
|
|
g_mod_list2[-1].mlen = new_len
|
|
g_mod_list2[-1].reach = new_reach
|
|
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,begin,new_len,name)
|
|
else:
|
|
bm = bin_mod(name,offset,mlen)
|
|
if (mlist == 1):
|
|
g_mod_list1.append(bm)
|
|
else:
|
|
g_mod_list2.append(bm)
|
|
|
|
#read next line
|
|
line = f.readline()
|
|
prev_name = name
|
|
|
|
#map_print():
|
|
#Print both ground truth and LFA map output
|
|
def map_print(n):
|
|
if (n==1):
|
|
print "Map 1 (ground truth):"
|
|
mod_list = g_mod_list1
|
|
else:
|
|
print "Map 2:"
|
|
mod_list = g_mod_list2
|
|
print "# of modules: %d" % len(mod_list)
|
|
for x in xrange(len(mod_list)):
|
|
print "Name: %s Offset: %x Len: %x" % (mod_list[x].name,mod_list[x].offset,mod_list[x].mlen)
|
|
|
|
|
|
#score_underlap(module1,module2):
|
|
#opposite of overlap - actually "disjoint areas" might be more accurate
|
|
#For the purposes of scoring this is the area of m1 that m2 doesn't cover
|
|
#to ensure that the underlap does not get counted twice
|
|
def score_underlap(m1,m2):
|
|
#Assume that the m1s are contiguous (from .map files)
|
|
#Only measure the portion of this m1 that the m2 doesn't cover
|
|
#This ensures that disjoint areas don't get counted twice
|
|
m2_upper = max(m1.offset,m2.offset)
|
|
m2_lower = min(m1.reach, m2.reach)
|
|
ul = abs (m1.offset - m2_upper)
|
|
ul += abs (m1.reach - m2_lower)
|
|
return ul
|
|
|
|
#mod_underlap(m1,m2):
|
|
#Like score underlap but this is a simpler calculation for use with module list reconciliation
|
|
def mod_underlap(m1,m2):
|
|
ul = abs (m1.offset - m2.offset)
|
|
ul += abs (m1.reach - m2.reach)
|
|
return ul
|
|
|
|
|
|
#mod_collapse(module1,module2):
|
|
#Return a module object that is the combination of the two modules
|
|
#Does not update either of the global module lists
|
|
def mod_collapse(m1,m2):
|
|
nname = m1.name + "_and_" + m2.name
|
|
noffset = min(m1.offset,m2.offset)
|
|
nr = max(m1.reach,m2.reach)
|
|
nlen = nr - noffset
|
|
|
|
cm = bin_mod(nname, noffset, nlen)
|
|
|
|
cm.gap = m1.gap
|
|
cm.gap += m2.gap
|
|
#will work regardless of module order,
|
|
#the correct one will be positive, the wrong one negative
|
|
cm.gap += max(m2.offset - m1.reach, m1.offset - m2.reach)
|
|
|
|
return cm
|
|
|
|
#mod_print(m):
|
|
#Print a single module
|
|
def mod_print(m):
|
|
#print "%s: %08x - %08x" % (m.name,m.offset,m.reach),
|
|
print "%08x - %08x" % (m.offset,m.reach),
|
|
if (m.gap != 0):
|
|
print " gap: %x" % m.gap,
|
|
|
|
#rec_list_print():
|
|
#Print side by side the reconciled module lists
|
|
def rec_list_print():
|
|
i1 = len(g_rec_list1)
|
|
i2 = len(g_rec_list2)
|
|
if (i1 != i2):
|
|
print "Error: List lengths don't match, not fully reconciled (%d and %d)." % (i1,i2)
|
|
return
|
|
for i in xrange(i1):
|
|
mod_print(g_rec_list1[i])
|
|
mod_print(g_rec_list2[i])
|
|
print "u: %x" % (score_underlap(g_rec_list1[i],g_rec_list2[i]))
|
|
|
|
#final_score():
|
|
#Determine the scores by iterating through the reconciled module lists
|
|
#and tallying underlap areas and gap areas
|
|
def final_score():
|
|
start = min(g_rec_list1[0].offset,g_rec_list2[0].offset)
|
|
end = max(g_rec_list1[-1].reach,g_rec_list2[-1].reach)
|
|
i1 = len(g_rec_list1)
|
|
i2 = len(g_rec_list2)
|
|
if (i1 != i2):
|
|
print "Error: List lengths don't match, not fully reconciled (%d and %d)." % (i1,i2)
|
|
return
|
|
s=0
|
|
g=0
|
|
for i in xrange(0,i1):
|
|
s+=score_underlap(g_rec_list1[i],g_rec_list2[i])
|
|
#only count gaps from the "compare" map file (the one we generate with LFA)
|
|
g+=g_rec_list2[i].gap
|
|
#Area of overlap - total area - (underlaps + gaps)
|
|
good_area = (end-start) - (s+g)
|
|
print "Length: 0x%x Good: 0x%x (%2f) Underlap: 0x%x (%2f) Gaps: 0x%x (%2f)" % (end-start,good_area, good_area*100.0/(end-start),s,s*100.0/(end-start),g,g*100.0/(end-start))
|
|
return (s+g)/1.0/(end-start)
|
|
|
|
#map_reconcile():
|
|
#Attempt to combine modules in either list to make the maps more similar
|
|
#When combining modules, keep track of gaps between the modules so we can account for that in the overall score
|
|
#This might seem like cheating, but here's why it's not:
|
|
# - we want to give the algorithm credit if it finds a couple of clusters of functionality within a .o file
|
|
# (i.e. it says one .o file is really 2 or 3 .o files)
|
|
# - we want to give the algorithm credit if it says nearby .o files are so inter-related that they are essentially one
|
|
# (i.e. it says that 2 or 3 adjacent .o files are really one .o file
|
|
#
|
|
#I'm definitely open to suggestions on better ways to do this
|
|
def map_reconcile():
|
|
i1 = 0
|
|
i2 = 0
|
|
|
|
while (i1 < len(g_mod_list1)) and (i2 < len(g_mod_list2)):
|
|
m1 = g_mod_list1[i1]
|
|
m2 = g_mod_list2[i2]
|
|
|
|
#"reach" - aka the end of the current modules under consideration
|
|
m1r = m1.reach
|
|
m2r = m2.reach
|
|
|
|
#current underlap
|
|
po = mod_underlap(m1,m2)
|
|
pc = 0x10000000000
|
|
|
|
print " m1 (%d): " % i1,
|
|
mod_print(m1)
|
|
print " m2 (%d): " % i2,
|
|
mod_print(m2)
|
|
print " underlap: %x" % (po)
|
|
|
|
d=0
|
|
#module 1 is longer than module 2, so attempt to collapse modules in list 2 to optimize
|
|
if (m1r > m2r):
|
|
nm2 = g_mod_list2[i2]
|
|
#add/collapse m2 modules, but check to see if makes it better
|
|
while (d == 0) and (i2+1 < len(g_mod_list2)):
|
|
pnm2 = nm2
|
|
nm2 = mod_collapse(nm2,g_mod_list2[i2+1])
|
|
pc = mod_underlap(m1, nm2)
|
|
print "nm2 (%d): (%x)" % (i2+1,pc),
|
|
mod_print(nm2)
|
|
print ""
|
|
if (pc < po):
|
|
po = pc
|
|
i2+=1
|
|
else:
|
|
d=1
|
|
print "Collapsed m2 (%d): " % i2,
|
|
mod_print(pnm2)
|
|
print ""
|
|
|
|
#add final collapsed modules to reconciled list
|
|
g_rec_list1.append(m1)
|
|
g_rec_list2.append(pnm2)
|
|
#module 2 is longer than module 1, so attempt to collapse modules in list 1 to optimize
|
|
else:
|
|
nm1 = g_mod_list1[i1]
|
|
while (d==0) and (i1+1 < len(g_mod_list1)):
|
|
pnm1 = nm1
|
|
nm1 = mod_collapse(nm1,g_mod_list1[i1+1])
|
|
pc = mod_underlap(nm1, m2)
|
|
print "nm1 (%d): (%x)" % (i1 + 1, pc),
|
|
mod_print(nm1)
|
|
print ""
|
|
if (pc < po):
|
|
po = pc
|
|
i1 += 1
|
|
else:
|
|
d=1
|
|
print "Collapsed m1 (%d): " % i1,
|
|
mod_print(pnm1)
|
|
print ""
|
|
g_rec_list1.append(pnm1)
|
|
g_rec_list2.append(m2)
|
|
|
|
i1+=1
|
|
i2+=1
|
|
|
|
print ""
|
|
|
|
#end case
|
|
#if we've got one module left on either side,
|
|
#collapse all the other modules on the other side to match
|
|
if (i1 == len(g_mod_list1)-1):
|
|
m1 = g_mod_list1[i1]
|
|
print "end m1 (%d):" % (i1),
|
|
mod_print(m1)
|
|
print ""
|
|
nm2 = g_mod_list2[i2]
|
|
i2 += 1
|
|
while (i2 < len(g_mod_list2)):
|
|
nm2 = mod_collapse(nm2,g_mod_list2[i2])
|
|
print "end nm2 (%d):" % (i2),
|
|
mod_print(nm2)
|
|
print ""
|
|
i2 += 1
|
|
g_rec_list1.append(m1)
|
|
g_rec_list2.append(nm2)
|
|
if (i2 == len(g_mod_list2)-1):
|
|
m2 = g_mod_list2[i2]
|
|
print "end m2 (%d):" % (i2),
|
|
mod_print(m2)
|
|
print ""
|
|
nm1 = g_mod_list1[i1]
|
|
i1 += 1
|
|
while (i1 < len(g_mod_list1)):
|
|
nm1 = mod_collapse(nm1,g_mod_list1[i1])
|
|
print "end nm1 (%d):" % (i1),
|
|
mod_print(nm1)
|
|
print ""
|
|
i1 += 1
|
|
g_rec_list1.append(nm1)
|
|
g_rec_list2.append(m2)
|
|
|
|
|
|
|
|
#"ground truth" map file
|
|
f = open(sys.argv[1], 'r')
|
|
map_parse(f,1)
|
|
#map file to compare
|
|
f2 = open(sys.argv[2], 'r')
|
|
map_parse(f2,2)
|
|
|
|
map_print(1)
|
|
map_print(2)
|
|
|
|
#"Reconcile" maps to make them more similar - see comment above for why we do this
|
|
map_reconcile()
|
|
|
|
#Print reconciled map
|
|
rec_list_print()
|
|
|
|
#Print score
|
|
print "Score: %f" % (final_score())
|
|
f.close()
|
|
f2.close()
|