mirror of
https://github.com/JHUAPL/CodeCut.git
synced 2026-01-09 14:58:02 -05:00
@@ -1,848 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# IDAPython script to show many features extracted from debugging strings. It's
|
||||
# also able to rename functions based on the guessed function name & rename
|
||||
# functions based on the source code file they belong to.
|
||||
#
|
||||
# Copyright (c) 2018-2019, Joxean Koret
|
||||
# Licensed under the GNU Affero General Public License v3.
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from collections import Counter
|
||||
|
||||
import idaapi
|
||||
|
||||
from idc import *
|
||||
from idaapi import *
|
||||
from idautils import *
|
||||
|
||||
from PyQt5 import QtCore, QtGui, QtWidgets
|
||||
|
||||
try:
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.tag import pos_tag
|
||||
|
||||
has_nltk = True
|
||||
except ImportError:
|
||||
has_nltk = False
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
PROGRAM_NAME = "IMS"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
SOURCE_FILES_REGEXP = r"([a-z_\/\\][a-z0-9_/\\:\-\.@]+\.(c|cc|cxx|c\+\+|cpp|h|hpp|m|rs|go|ml))($|:| )"
|
||||
|
||||
LANGS = {}
|
||||
LANGS["C/C++"] = ["c", "cc", "cxx", "cpp", "h", "hpp"]
|
||||
LANGS["C"] = ["c"]
|
||||
LANGS["C++"] = ["cc", "cxx", "cpp", "hpp", "c++"]
|
||||
LANGS["Obj-C"] = ["m"]
|
||||
LANGS["Rust"] = ["rs"]
|
||||
LANGS["Golang"] = ["go"]
|
||||
LANGS["OCaml"] = ["ml"]
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
FUNCTION_NAMES_REGEXP = r"([a-z_][a-z0-9_]+((::)+[a-z_][a-z0-9_]+)*)"
|
||||
CLASS_NAMES_REGEXP = r"([a-z_][a-z0-9_]+(::(<[a-z0-9_]+>|~{0,1}[a-z0-9_]+))+)\({0,1}"
|
||||
NOT_FUNCTION_NAMES = ["copyright", "char", "bool", "int", "unsigned", "long",
|
||||
"double", "float", "signed", "license", "version", "cannot", "error",
|
||||
"invalid", "null", "warning", "general", "argument", "written", "report",
|
||||
"failed", "assert", "object", "integer", "unknown", "localhost", "native",
|
||||
"memory", "system", "write", "read", "open", "close", "help", "exit", "test",
|
||||
"return", "libs", "home", "ambiguous", "internal", "request", "inserting",
|
||||
"deleting", "removing", "updating", "adding", "assertion", "flags",
|
||||
"overflow", "enabled", "disabled", "enable", "disable", "virtual", "client",
|
||||
"server", "switch", "while", "offset", "abort", "panic", "static", "updated",
|
||||
"pointer", "reason", "month", "year", "week", "hour", "minute", "second",
|
||||
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
|
||||
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
|
||||
'september', 'october', 'november', 'december', "arguments", "corrupt",
|
||||
"corrupted", "default", "success", "expecting", "missing", "phrase",
|
||||
"unrecognized", "undefined",
|
||||
]
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
FOUND_TOKENS = {}
|
||||
TOKEN_TYPES = ["NN", "NNS", "NNP", "JJ", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
|
||||
def nltk_preprocess(strings):
|
||||
if not has_nltk:
|
||||
return
|
||||
|
||||
strings = "\n".join(map(str, list(strings)))
|
||||
tokens = re.findall(FUNCTION_NAMES_REGEXP, strings)
|
||||
l = []
|
||||
for token in tokens:
|
||||
l.append(token[0])
|
||||
word_tags = nltk.pos_tag(l)
|
||||
for word, tag in word_tags:
|
||||
try:
|
||||
FOUND_TOKENS[word.lower()].add(tag)
|
||||
except:
|
||||
FOUND_TOKENS[word.lower()] = set([tag])
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def get_strings(strtypes = [0, 1]):
|
||||
strings = Strings()
|
||||
strings.setup(strtypes = strtypes)
|
||||
return strings
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def get_lang(full_path):
|
||||
_, file_ext = os.path.splitext(full_path.lower())
|
||||
file_ext = file_ext.strip(".")
|
||||
for key in LANGS:
|
||||
if file_ext in LANGS[key]:
|
||||
return key
|
||||
return None
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def add_source_file_to(d, src_langs, refs, full_path, s):
|
||||
if full_path not in d:
|
||||
d[full_path] = []
|
||||
|
||||
lang = get_lang(full_path)
|
||||
if lang is not None:
|
||||
src_langs[lang] += 1
|
||||
|
||||
for ref in refs:
|
||||
d[full_path].append([ref, get_func_name(ref), str(s)])
|
||||
|
||||
return d, src_langs
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def get_source_strings(min_len = 4, strtypes = [0, 1]):
|
||||
strings = get_strings(strtypes)
|
||||
|
||||
# Search string references to source files
|
||||
src_langs = Counter()
|
||||
total_files = 0
|
||||
d = {}
|
||||
for s in strings:
|
||||
if s and s.length > min_len:
|
||||
ret = re.findall(SOURCE_FILES_REGEXP, str(s), re.IGNORECASE)
|
||||
if ret and len(ret) > 0:
|
||||
refs = list(DataRefsTo(s.ea))
|
||||
if len(refs) > 0:
|
||||
total_files += 1
|
||||
full_path = ret[0][0]
|
||||
d, src_langs = add_source_file_to(d, src_langs, refs, full_path, s)
|
||||
|
||||
# Use the loaded debugging information (if any) to find source files
|
||||
for f in list(Functions()):
|
||||
done = False
|
||||
func = idaapi.get_func(f)
|
||||
if func is not None:
|
||||
cfg = idaapi.FlowChart(func)
|
||||
for block in cfg:
|
||||
if done:
|
||||
break
|
||||
|
||||
for head in list(Heads(block.start_ea, block.end_ea)):
|
||||
full_path = get_sourcefile(head)
|
||||
if full_path is not None:
|
||||
total_files += 1
|
||||
d, src_langs = add_source_file_to(d, src_langs, [head], full_path, "Symbol: %s" % full_path)
|
||||
|
||||
nltk_preprocess(strings)
|
||||
if len(d) > 0 and total_files > 0:
|
||||
print("Programming languages found:\n")
|
||||
for key in src_langs:
|
||||
print(" %s %f%%" % (key.ljust(10), src_langs[key] * 100. / total_files))
|
||||
print("\n")
|
||||
|
||||
return d, strings
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def handler(item, column_no):
|
||||
ea = item.ea
|
||||
if is_mapped(ea):
|
||||
jumpto(ea)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CBaseTreeViewer(PluginForm):
|
||||
def populate_tree(self, d):
|
||||
# Clear previous items
|
||||
self.tree.clear()
|
||||
|
||||
# Build the tree
|
||||
for key in d:
|
||||
src_file_item = QtWidgets.QTreeWidgetItem(self.tree)
|
||||
src_file_item.setText(0, key)
|
||||
src_file_item.ea = BADADDR
|
||||
|
||||
for ea, name, str_data in d[key]:
|
||||
item = QtWidgets.QTreeWidgetItem(src_file_item)
|
||||
item.setText(0, "%s [0x%08x] %s" % (name, ea, str_data))
|
||||
item.ea = ea
|
||||
|
||||
self.tree.itemDoubleClicked.connect(handler)
|
||||
|
||||
def OnCreate(self, form):
|
||||
# Get parent widget
|
||||
self.parent = idaapi.PluginForm.FormToPyQtWidget(form)
|
||||
|
||||
# Create tree control
|
||||
self.tree = QtWidgets.QTreeWidget()
|
||||
self.tree.setHeaderLabels(("Names",))
|
||||
self.tree.setColumnWidth(0, 100)
|
||||
|
||||
if self.d is None:
|
||||
self.d, self.s = get_source_strings()
|
||||
d = self.d
|
||||
|
||||
# Create layout
|
||||
layout = QtWidgets.QVBoxLayout()
|
||||
layout.addWidget(self.tree)
|
||||
self.populate_tree(d)
|
||||
|
||||
# Populate PluginForm
|
||||
self.parent.setLayout(layout)
|
||||
|
||||
def Show(self, title, d = None):
|
||||
self.d = d
|
||||
return PluginForm.Show(self, title, options = PluginForm.WOPN_PERSIST)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def basename(path):
|
||||
pos1 = path[::-1].find("\\")
|
||||
pos2 = path[::-1].find("/")
|
||||
|
||||
if pos1 == -1: pos1 = len(path)
|
||||
if pos2 == -1: pos2 = len(path)
|
||||
pos = min(pos1, pos2)
|
||||
|
||||
return path[len(path)-pos:]
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class command_handler_t(ida_kernwin.action_handler_t):
|
||||
def __init__(self, obj, cmd_id, num_args = 1):
|
||||
self.obj = obj
|
||||
self.cmd_id = cmd_id
|
||||
self.num_args = num_args
|
||||
ida_kernwin.action_handler_t.__init__(self)
|
||||
|
||||
def activate(self, ctx):
|
||||
if self.num_args == 1:
|
||||
return self.obj.OnCommand(self.cmd_id)
|
||||
return self.obj.OnCommand(self.obj, self.cmd_id)
|
||||
|
||||
def update(self, ctx):
|
||||
return idaapi.AST_ENABLE_ALWAYS
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CIDAMagicStringsChooser(Choose):
|
||||
def __init__(self, title, columns, options):
|
||||
Choose.__init__(self, title, columns, options)
|
||||
self.actions = []
|
||||
|
||||
def AddCommand(self, menu_name, shortcut=None):
|
||||
action_name = "IDAMagicStrings:%s" % menu_name.replace(" ", "")
|
||||
self.actions.append([len(self.actions), action_name, menu_name, shortcut])
|
||||
return len(self.actions)-1
|
||||
|
||||
def OnPopup(self, form, popup_handle):
|
||||
for num, action_name, menu_name, shortcut in self.actions:
|
||||
handler = command_handler_t(self, num, 2)
|
||||
desc = ida_kernwin.action_desc_t(action_name, menu_name, handler, shortcut)
|
||||
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CSourceFilesChooser(CIDAMagicStringsChooser):
|
||||
def __init__(self, title):
|
||||
columns = [ ["Line", 4], ["Full path", 20], ["Filename", 15], ["EA", 16], ["Function Name", 18], ["String data", 40], ]
|
||||
CIDAMagicStringsChooser.__init__(self, title, columns, Choose.CH_MULTI)
|
||||
self.n = 0
|
||||
self.icon = -1
|
||||
self.selcount = 0
|
||||
self.modal = False
|
||||
self.items = []
|
||||
self.selected_items = []
|
||||
|
||||
d, s = get_source_strings()
|
||||
keys = list(d.keys())
|
||||
keys.sort()
|
||||
|
||||
i = 0
|
||||
for key in keys:
|
||||
for ea, name, str_data in d[key]:
|
||||
line = ["%03d" % i, key, basename(key), "0x%08x" % ea, name, str_data]
|
||||
self.items.append(line)
|
||||
i += 1
|
||||
|
||||
self.d = d
|
||||
self.s = s
|
||||
|
||||
def show(self):
|
||||
ret = self.Show(False)
|
||||
if ret < 0:
|
||||
return False
|
||||
|
||||
self.cmd_all = self.AddCommand("Rename all to filename_EA")
|
||||
self.cmd_all_sub = self.AddCommand("Rename all sub_* to filename_EA")
|
||||
self.cmd_selected = self.AddCommand("Rename selected to filename_EA")
|
||||
self.cmd_selected_sub = self.AddCommand("Rename selected sub_* to filename_EA")
|
||||
return self.d
|
||||
|
||||
def OnCommand(self, n, cmd_id):
|
||||
# Aditional right-click-menu commands handles
|
||||
if cmd_id == self.cmd_all:
|
||||
l = list(range(len(self.items)))
|
||||
elif cmd_id == self.cmd_all_sub:
|
||||
l = []
|
||||
for i, item in enumerate(self.items):
|
||||
if item[4] is not None and item[4].startswith("sub_"):
|
||||
l.append(i)
|
||||
elif cmd_id == self.cmd_selected:
|
||||
l = list(self.selected_items)
|
||||
elif cmd_id == self.cmd_selected_sub:
|
||||
l = []
|
||||
for i, item in enumerate(self.items):
|
||||
if item[4].startswith("sub_"):
|
||||
if i in self.selected_items:
|
||||
l.append(i)
|
||||
|
||||
self.rename_items(l)
|
||||
|
||||
def rename_items(self, items):
|
||||
for i in items:
|
||||
item = self.items[i]
|
||||
ea = int(item[3], 16)
|
||||
candidate, _ = os.path.splitext(item[2])
|
||||
name = "%s_%08x" % (candidate, ea)
|
||||
func = idaapi.get_func(ea)
|
||||
if func is not None:
|
||||
ea = func.start_ea
|
||||
set_name(ea, name, SN_CHECK)
|
||||
else:
|
||||
line = "WARNING: Cannot rename 0x%08x to %s because there is no function associated."
|
||||
print(line % (ea, name))
|
||||
|
||||
def OnGetLine(self, n):
|
||||
return self.items[n]
|
||||
|
||||
def OnGetSize(self):
|
||||
n = len(self.items)
|
||||
return n
|
||||
|
||||
def OnDeleteLine(self, n):
|
||||
del self.items[n]
|
||||
return n
|
||||
|
||||
def OnRefresh(self, n):
|
||||
return n
|
||||
|
||||
def OnSelectLine(self, n):
|
||||
self.selcount += 1
|
||||
row = self.items[n[0]]
|
||||
ea = int(row[3], 16)
|
||||
if is_mapped(ea):
|
||||
jumpto(ea)
|
||||
|
||||
def OnSelectionChange(self, sel_list):
|
||||
self.selected_items = sel_list
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CCandidateFunctionNames(CIDAMagicStringsChooser):
|
||||
def __init__(self, title, l):
|
||||
columns = [ ["Line", 4], ["EA", 16], ["Function Name", 25], ["Candidate", 25], ["FP?", 2], ["Strings", 50], ]
|
||||
CIDAMagicStringsChooser.__init__(self, title, columns, Choose.CH_MULTI)
|
||||
self.n = 0
|
||||
self.icon = -1
|
||||
self.selcount = 0
|
||||
self.modal = False
|
||||
self.items = []
|
||||
self.selected_items = []
|
||||
|
||||
i = 0
|
||||
for item in l:
|
||||
bin_func = item[1]
|
||||
candidate = item[2]
|
||||
seems_false = str(int(self.looks_false(bin_func, candidate)))
|
||||
line = ["%03d" % i, "0x%08x" % item[0], item[1], item[2], seems_false, ", ".join(item[3]) ]
|
||||
self.items.append(line)
|
||||
i += 1
|
||||
|
||||
self.items = sorted(self.items, key=lambda x: x[4])
|
||||
|
||||
def show(self):
|
||||
ret = self.Show(False)
|
||||
if ret < 0:
|
||||
return False
|
||||
|
||||
self.cmd_rename_all = self.AddCommand("Rename all functions")
|
||||
self.cmd_rename_sub = self.AddCommand("Rename all sub_* functions")
|
||||
self.cmd_rename_selected = self.AddCommand("Rename selected function(s)")
|
||||
self.cmd_rename_sub_sel = self.AddCommand("Rename selected sub_* function(s)")
|
||||
|
||||
def OnCommand(self, n, cmd_id):
|
||||
# Aditional right-click-menu commands handles
|
||||
if cmd_id == self.cmd_rename_all:
|
||||
l = list(range(len(self.items)))
|
||||
elif cmd_id == self.cmd_rename_selected:
|
||||
l = list(self.selected_items)
|
||||
elif cmd_id == self.cmd_rename_sub:
|
||||
l = []
|
||||
for i, item in enumerate(self.items):
|
||||
if item[2].startswith("sub_"):
|
||||
l.append(i)
|
||||
elif cmd_id == self.cmd_rename_sub_sel:
|
||||
l = []
|
||||
for i, item in enumerate(self.items):
|
||||
if item[2].startswith("sub_"):
|
||||
if i in self.selected_items:
|
||||
l.append(i)
|
||||
else:
|
||||
raise Exception("Unknown menu command!")
|
||||
|
||||
self.rename_items(l)
|
||||
|
||||
def rename_items(self, items):
|
||||
for i in items:
|
||||
item = self.items[i]
|
||||
ea = int(item[1], 16)
|
||||
candidate = item[3]
|
||||
set_name(ea, candidate, SN_CHECK)
|
||||
|
||||
def OnGetLine(self, n):
|
||||
return self.items[n]
|
||||
|
||||
def OnGetSize(self):
|
||||
n = len(self.items)
|
||||
return n
|
||||
|
||||
def OnDeleteLine(self, n):
|
||||
del self.items[n]
|
||||
return n
|
||||
|
||||
def OnRefresh(self, n):
|
||||
return n
|
||||
|
||||
def OnSelectLine(self, n):
|
||||
self.selcount += 1
|
||||
row = self.items[n[0]]
|
||||
ea = int(row[1], 16)
|
||||
if is_mapped(ea):
|
||||
jumpto(ea)
|
||||
|
||||
def OnSelectionChange(self, sel_list):
|
||||
self.selected_items = sel_list
|
||||
|
||||
def looks_false(self, bin_func, candidate):
|
||||
bin_func = bin_func.lower()
|
||||
candidate = candidate.lower()
|
||||
if not bin_func.startswith("sub_"):
|
||||
if bin_func.find(candidate) == -1 and candidate.find(bin_func) == -1:
|
||||
return True
|
||||
return False
|
||||
|
||||
def OnGetLineAttr(self, n):
|
||||
item = self.items[n]
|
||||
bin_func = item[2]
|
||||
candidate = item[3]
|
||||
if self.looks_false(bin_func, candidate):
|
||||
return [0x026AFD, 0]
|
||||
return [0xFFFFFF, 0]
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CClassXRefsChooser(idaapi.Choose):
|
||||
def __init__(self, title, items):
|
||||
idaapi.Choose.__init__(self,
|
||||
title,
|
||||
[ ["Address", 8], ["String", 80] ])
|
||||
self.items = items
|
||||
|
||||
def OnGetLine(self, n):
|
||||
return self.items[n]
|
||||
|
||||
def OnGetSize(self):
|
||||
return len(self.items)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def get_string(ea):
|
||||
tmp = idc.get_strlit_contents(ea, strtype=0)
|
||||
if tmp is None or len(tmp) == 1:
|
||||
unicode_tmp = idc.get_strlit_contents(ea, strtype=1)
|
||||
if unicode_tmp is not None and len(unicode_tmp) > len(tmp):
|
||||
tmp = unicode_tmp
|
||||
|
||||
if tmp is None:
|
||||
tmp = ""
|
||||
elif type(tmp) != str:
|
||||
tmp = tmp.decode("utf-8")
|
||||
return tmp
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def classes_handler(item, column_no):
|
||||
if item.childCount() == 0:
|
||||
ea = item.ea
|
||||
if is_mapped(ea):
|
||||
jumpto(ea)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CClassesTreeViewer(PluginForm):
|
||||
def populate_tree(self):
|
||||
# Clear previous items
|
||||
self.tree.clear()
|
||||
self.nodes = {}
|
||||
|
||||
self.classes = sorted(self.classes, key=lambda x: x[1][0])
|
||||
for ea, tokens in self.classes:
|
||||
for i, node_name in enumerate(tokens):
|
||||
full_name = "::".join(tokens[:tokens.index(node_name)+1])
|
||||
if full_name not in self.nodes:
|
||||
if full_name.find("::") == -1:
|
||||
parent = self.tree
|
||||
else:
|
||||
parent_name = "::".join(tokens[:tokens.index(node_name)])
|
||||
try:
|
||||
parent = self.nodes[parent_name]
|
||||
except:
|
||||
print("Error adding node?", self.nodes, parent_name, str(sys.exc_info()[1]))
|
||||
|
||||
node = QtWidgets.QTreeWidgetItem(parent)
|
||||
node.setText(0, full_name)
|
||||
node.ea = ea
|
||||
self.nodes[full_name] = node
|
||||
|
||||
self.tree.itemDoubleClicked.connect(classes_handler)
|
||||
|
||||
def OnCreate(self, form):
|
||||
# Get parent widget
|
||||
self.parent = idaapi.PluginForm.FormToPyQtWidget(form)
|
||||
|
||||
# Create tree control
|
||||
self.tree = QtWidgets.QTreeWidget()
|
||||
self.tree.setHeaderLabels(("Classes",))
|
||||
self.tree.setColumnWidth(0, 100)
|
||||
|
||||
# Create layout
|
||||
layout = QtWidgets.QVBoxLayout()
|
||||
layout.addWidget(self.tree)
|
||||
self.populate_tree()
|
||||
|
||||
# Populate PluginForm
|
||||
self.parent.setLayout(layout)
|
||||
|
||||
def Show(self, title, classes):
|
||||
self.classes = classes
|
||||
return PluginForm.Show(self, title, options = PluginForm.WOPN_PERSIST)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CClassesGraph(idaapi.GraphViewer):
|
||||
def __init__(self, title, classes, final_list):
|
||||
idaapi.GraphViewer.__init__(self, title)
|
||||
self.selected = None
|
||||
self.classes = classes
|
||||
self.final_list = final_list
|
||||
self.nodes = {}
|
||||
self.nodes_ea = {}
|
||||
self.graph = {}
|
||||
|
||||
self.last_cmd = 0
|
||||
|
||||
dones = set()
|
||||
for ea, tokens in self.classes:
|
||||
refs = DataRefsTo(ea)
|
||||
refs_funcs = set()
|
||||
for ref in refs:
|
||||
func = idaapi.get_func(ref)
|
||||
if func is not None:
|
||||
refs_funcs.add(func.start_ea)
|
||||
|
||||
if len(refs_funcs) == 1:
|
||||
func_ea = list(refs_funcs)[0]
|
||||
if func_ea in dones:
|
||||
continue
|
||||
dones.add(func_ea)
|
||||
|
||||
func_name = get_func_name(func_ea)
|
||||
tmp = demangle_name(func_name, INF_SHORT_DN)
|
||||
if tmp is not None:
|
||||
func_name = tmp
|
||||
|
||||
element = [func_ea, func_name, "::".join(tokens), [get_string(ea)]]
|
||||
self.final_list.append(element)
|
||||
|
||||
def OnRefresh(self):
|
||||
self.Clear()
|
||||
self.graph = {}
|
||||
for ea, tokens in self.classes:
|
||||
for node_name in tokens:
|
||||
full_name = "::".join(tokens[:tokens.index(node_name)+1])
|
||||
if full_name not in self.nodes:
|
||||
node_id = self.AddNode(node_name)
|
||||
self.nodes[full_name] = node_id
|
||||
self.graph[node_id] = []
|
||||
else:
|
||||
node_id = self.nodes[full_name]
|
||||
|
||||
try:
|
||||
self.nodes_ea[node_id].add(ea)
|
||||
except KeyError:
|
||||
self.nodes_ea[node_id] = set([ea])
|
||||
|
||||
parent_name = "::".join(tokens[:tokens.index(node_name)])
|
||||
if parent_name != "" and parent_name in self.nodes:
|
||||
parent_id = self.nodes[parent_name]
|
||||
self.AddEdge(parent_id, node_id)
|
||||
self.graph[parent_id].append(node_id)
|
||||
|
||||
return True
|
||||
|
||||
def OnGetText(self, node_id):
|
||||
return str(self[node_id])
|
||||
|
||||
def OnDblClick(self, node_id):
|
||||
eas = self.nodes_ea[node_id]
|
||||
if len(eas) == 1:
|
||||
jumpto(list(eas)[0])
|
||||
else:
|
||||
items = []
|
||||
for ea in eas:
|
||||
func = idaapi.get_func(ea)
|
||||
if func is None:
|
||||
s = get_strlit_contents(ea)
|
||||
s = s.decode("utf-8")
|
||||
if s is not None and s.find(str(self[node_id])) == -1:
|
||||
s = get_strlit_contents(ea, strtype=1)
|
||||
else:
|
||||
s = GetDisasm(ea)
|
||||
else:
|
||||
s = get_func_name(func.start_ea)
|
||||
|
||||
items.append(["0x%08x" % ea, repr(s)])
|
||||
|
||||
chooser = CClassXRefsChooser("XRefs to %s" % str(self[node_id]), items)
|
||||
idx = chooser.Show(1)
|
||||
if idx > -1:
|
||||
jumpto(list(eas)[idx])
|
||||
|
||||
def OnCommand(self, cmd_id):
|
||||
if self.cmd_dot == cmd_id:
|
||||
fname = ask_file(1, "*.dot", "Dot file name")
|
||||
if fname:
|
||||
f = open(fname, "w")
|
||||
buf = 'digraph G {\n graph [overlap=scale]; node [fontname=Courier]; \n\n'
|
||||
for n in self.graph:
|
||||
name = str(self[n])
|
||||
buf += ' a%s [shape=box, label = "%s", color="blue"]\n' % (n, name)
|
||||
buf += '\n'
|
||||
|
||||
dones = set()
|
||||
for node_id in self.graph:
|
||||
for child_id in self.graph[node_id]:
|
||||
s = str([node_id, child_id])
|
||||
if s in dones:
|
||||
continue
|
||||
dones.add(s)
|
||||
buf += " a%s -> a%s [style = bold]\n" % (node_id, child_id)
|
||||
|
||||
buf += '\n'
|
||||
buf += '}'
|
||||
f.write(buf)
|
||||
f.close()
|
||||
elif self.cmd_gml == cmd_id:
|
||||
fname = ask_file(1, "*.gml", "GML file name")
|
||||
if fname:
|
||||
f = open(fname, "w")
|
||||
buf = 'graph [ \n'
|
||||
for n in self.graph:
|
||||
name = str(self[n])
|
||||
buf += 'node [ id %s \n label "%s"\n fill "blue" \n type "oval"\n LabelGraphics [ type "text" ] ] \n' % (n, name)
|
||||
buf += '\n'
|
||||
|
||||
dones = set()
|
||||
for node_id in self.graph:
|
||||
for child_id in self.graph[node_id]:
|
||||
s = str([node_id, child_id])
|
||||
if s in dones:
|
||||
continue
|
||||
dones.add(s)
|
||||
buf += " edge [ source %s \n target %s ]\n" % (node_id, child_id)
|
||||
|
||||
buf += '\n'
|
||||
buf += ']'
|
||||
f.write(buf)
|
||||
f.close()
|
||||
|
||||
def OnPopup(self, form, popup_handle):
|
||||
self.cmd_dot = 0
|
||||
cmd_handler = command_handler_t(self, self.cmd_dot)
|
||||
desc = ida_kernwin.action_desc_t("IDAMagicStrings:GraphvizExport", "Export to Graphviz",
|
||||
cmd_handler, "F2")
|
||||
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
|
||||
|
||||
self.cmd_gml = 1
|
||||
cmd_handler = command_handler_t(self, self.cmd_gml)
|
||||
desc = ida_kernwin.action_desc_t("IDAMagicStrings:GmlExport","Export to GML",
|
||||
cmd_handler, "F3")
|
||||
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
|
||||
|
||||
def OnClick(self, item):
|
||||
self.selected = item
|
||||
return True
|
||||
|
||||
def Show(self):
|
||||
if not idaapi.GraphViewer.Show(self):
|
||||
return False
|
||||
return True
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def show_tree(d = None):
|
||||
tree_frm = CBaseTreeViewer()
|
||||
tree_frm.Show(PROGRAM_NAME + ": Source code tree", d)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def seems_function_name(candidate):
|
||||
if len(candidate) >= 6 and candidate.lower() not in NOT_FUNCTION_NAMES:
|
||||
if candidate.upper() != candidate:
|
||||
return True
|
||||
return False
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CFakeString:
|
||||
def __init__(self, ea, s):
|
||||
self.ea = ea
|
||||
self.s = s
|
||||
|
||||
def __str__(self):
|
||||
return str(self.s)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def find_function_names(strings_list):
|
||||
rarity = {}
|
||||
func_names = {}
|
||||
raw_func_strings = {}
|
||||
class_objects = []
|
||||
|
||||
class_tmp_names = []
|
||||
for ea, name in Names():
|
||||
func = idaapi.get_func(ea)
|
||||
if func is None:
|
||||
continue
|
||||
|
||||
true_name = name
|
||||
if name.find("::") == -1:
|
||||
name = demangle_name(name, INF_SHORT_DN)
|
||||
if name is not None and name != "" and name.find("::") > -1:
|
||||
true_name = name
|
||||
|
||||
if true_name.find("::") > -1:
|
||||
s = CFakeString(ea, true_name)
|
||||
class_tmp_names.append(s)
|
||||
|
||||
class_tmp_names.extend(strings_list)
|
||||
for s in class_tmp_names:
|
||||
# Find class members
|
||||
class_ret = re.findall(CLASS_NAMES_REGEXP, str(s), re.IGNORECASE)
|
||||
if len(class_ret) > 0:
|
||||
for element in class_ret:
|
||||
candidate = element[0]
|
||||
if candidate.find("::") > 0:
|
||||
tokens = candidate.split("::")
|
||||
if tokens not in class_objects:
|
||||
class_objects.append([s.ea, tokens])
|
||||
|
||||
# Find just function names
|
||||
ret = re.findall(FUNCTION_NAMES_REGEXP, str(s), re.IGNORECASE)
|
||||
if len(ret) > 0:
|
||||
candidate = ret[0][0]
|
||||
if seems_function_name(candidate):
|
||||
ea = s.ea
|
||||
refs = DataRefsTo(ea)
|
||||
found = False
|
||||
for ref in refs:
|
||||
func = idaapi.get_func(ref)
|
||||
if func is not None:
|
||||
found = True
|
||||
key = func.start_ea
|
||||
|
||||
if has_nltk:
|
||||
if candidate not in FOUND_TOKENS:
|
||||
continue
|
||||
|
||||
found = False
|
||||
for tkn_type in TOKEN_TYPES:
|
||||
if tkn_type in FOUND_TOKENS[candidate]:
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
continue
|
||||
|
||||
try:
|
||||
rarity[candidate].add(key)
|
||||
except KeyError:
|
||||
rarity[candidate] = set([key])
|
||||
|
||||
try:
|
||||
func_names[key].add(candidate)
|
||||
except KeyError:
|
||||
func_names[key] = set([candidate])
|
||||
|
||||
try:
|
||||
raw_func_strings[key].add(str(s))
|
||||
except:
|
||||
raw_func_strings[key] = set([str(s)])
|
||||
|
||||
return func_names, raw_func_strings, rarity, class_objects
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def show_function_names(strings_list):
|
||||
l = find_function_names(strings_list)
|
||||
func_names, raw_func_strings, rarity, classes = l
|
||||
|
||||
final_list = []
|
||||
for key in func_names:
|
||||
candidates = set()
|
||||
for candidate in func_names[key]:
|
||||
if len(rarity[candidate]) == 1:
|
||||
candidates.add(candidate)
|
||||
|
||||
if len(candidates) == 1:
|
||||
raw_strings = list(raw_func_strings[key])
|
||||
raw_strings = list(map(repr, raw_strings))
|
||||
|
||||
func_name = get_func_name(key)
|
||||
tmp = demangle_name(func_name, INF_SHORT_DN)
|
||||
if tmp is not None:
|
||||
func_name = tmp
|
||||
final_list.append([key, func_name, list(candidates)[0], raw_strings])
|
||||
|
||||
if len(classes) > 0:
|
||||
class_graph = CClassesGraph(PROGRAM_NAME + ": Classes Hierarchy", classes, final_list)
|
||||
class_graph.Show()
|
||||
|
||||
class_tree = CClassesTreeViewer()
|
||||
class_tree.Show(PROGRAM_NAME + ": Classes Tree", classes)
|
||||
|
||||
final_list = class_graph.final_list
|
||||
|
||||
if len(final_list) > 0:
|
||||
cfn = CCandidateFunctionNames(PROGRAM_NAME + ": Candidate Function Names", final_list)
|
||||
cfn.show()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def main():
|
||||
ch = CSourceFilesChooser(PROGRAM_NAME + ": Source code files")
|
||||
if len(ch.items) > 0:
|
||||
ch.show()
|
||||
|
||||
d = ch.d
|
||||
if len(d) > 0:
|
||||
show_tree(d)
|
||||
|
||||
show_function_names(ch.s)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,153 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
#################################################################
|
||||
#### CodeCut - Detecting Object File Boundaries in IDA Pro ####
|
||||
#################################################################
|
||||
|
||||
**** Terminology ****
|
||||
|
||||
I tend to use the term "module" for a set of related functions within a binary
|
||||
that came from a single object file. So you will see the terms "module" and
|
||||
"object file" used interchangeabley in the CC source and documentation.
|
||||
|
||||
**** Dependencies ****
|
||||
|
||||
CodeCut relies on:
|
||||
Natural Language Toolkit (NLTK) - https://www.nltk.org
|
||||
Snap.py - https://snap.stanford.edu/snappy/
|
||||
|
||||
**** Source Files ****
|
||||
|
||||
cc_main.py - Main entry point - simply load this up with the
|
||||
"File -> Script file..." option in IDA.
|
||||
|
||||
lfa.py - Analysis engine for LFA.
|
||||
|
||||
mc.py - Analysis engine for MaxCut.
|
||||
|
||||
basicutils_7x.py - Provides an API to IDA - maybe one day we'll get this
|
||||
ported to Ghidra!
|
||||
|
||||
map_read.py - For research purposes - compares a ground truth .map
|
||||
file (from ld) to a .map file from CC and produces
|
||||
a score. See RECON slides or the code itself for more
|
||||
info. You need to add the option -Map=<target>.map to
|
||||
the linker options in a Makefile to get a .map file.
|
||||
|
||||
The syntax to map_read is:
|
||||
python map_read.py <ground truth file> <CC map file>
|
||||
|
||||
**** MaxCut Parameters ****
|
||||
|
||||
- Right now there is only one parameter for MaxCut, a value for the maximum
|
||||
module size (currently set to 16K).
|
||||
|
||||
|
||||
**** LFA Parameters & Interpolation ****
|
||||
|
||||
A couple areas for research:
|
||||
|
||||
- The idea behind LFA is that we throw out "external" calls - we can't
|
||||
determine this exactly in a binary so we throw out calls that are above a
|
||||
certain threshold. This is set to 4K in the code but it could be tweaked.
|
||||
|
||||
- There is a threshold set for edge detection - plus a little bit of extra
|
||||
logic (value has to be positive and 2 of last 3 values were negative). You
|
||||
can either vary this threshold or write your own edge_detect() function.
|
||||
|
||||
- Currently "calls to" affinity and "calls from" affinity are treated as
|
||||
separate scores. If one of these scores is zero an interpolation from
|
||||
the previous score is used - just a simple linear equation assuming
|
||||
decreasing scores. This could be improved a number of ways but could
|
||||
be replaced with an actual interpolation between scores.
|
||||
|
||||
- If both "calls to" affinity and "calls from" affinity for a function are 0
|
||||
the function is skipped and is essentially treated like it's not there.
|
||||
This happens for functions with no references or where all references are
|
||||
above the "external" threshold. This means there can be gaps between the
|
||||
modules in the output list.
|
||||
|
||||
- The portion of code that tries to name object files based on common strings
|
||||
is completely researchy and open ended. Lots of things to play with there.
|
||||
|
||||
**** MaxCut Parameters & Interpolation ****
|
||||
|
||||
- The only real parameter for MaxCut is a THRESHOLD variable that corresponds to the size at which the algorithm will stop subdividing modules. A threshold of 4K (0x1000) seems to provide similar sized modules to LFA. A threshold of 8K (0x2000) seems to be a good upper bound. A good area of research would be making this not a static cutoff but maybe deciding to stop subdividing based on a connectedness measurement or something along those lines.
|
||||
|
||||
**** Output Files ****
|
||||
|
||||
CodeCut produces 7 files:
|
||||
|
||||
<target>_cc_results.csv - Raw score output from LFA and MaxCut, including where
|
||||
edges are detected. Graphs can fairly easily be
|
||||
generated in your favorite spreadsheet program.
|
||||
|
||||
<target>_{lfa,mc}_labels.py - Script that can be used to label your DB with CC's
|
||||
output. After determining module boundaries, CC
|
||||
attempts to guess the name (fun!) by looking at
|
||||
common strings used by the module, for both the
|
||||
LFA and MaxCut module lists. You can use this
|
||||
script as a scratchpad to name unnamed modules as you
|
||||
determine what they are, or you can also use other
|
||||
functions in basicutils to change module names later.
|
||||
|
||||
<target>_{lfa,mc}_map.map - A .map file similar to the output from the ld. This is
|
||||
for the purposes of comparing to a ground truth .map
|
||||
file to test CC when you have source code.
|
||||
|
||||
<target>_{lfa,mc}_mod_graph.gv - a Graphviz graph file of the module relationships
|
||||
This is a directed graph where a -> b indicates
|
||||
that a function in module a calls a function in
|
||||
module b. This may take a long time to render if
|
||||
you have a large binary (more than a couple
|
||||
hundred modules detected). For smaller binaries
|
||||
this can pretty clearly communicate the software
|
||||
architecture immediately. For larger binaries
|
||||
this will show you graphically the most heavily
|
||||
used modules in the binary.
|
||||
|
||||
You can use sfdp to render the graph into a PNG file with a command line like:
|
||||
|
||||
sfdp -x -Goverlap=scale -Tpng -Goutputorder=edgesfirst -Nstyle=filled -Nfillcolor=white <target>_lfa_mod_graph.gv > <target>.png
|
||||
|
||||
A really nice hierarchical graph can be obtained by adding:
|
||||
ranksep=0
|
||||
nodesep=0
|
||||
to the .gv file and running:
|
||||
|
||||
dot -x -Goverlap=scale -Tpng -Goutputorder=edgesfirst -Nstyle=filled -Nfillcolor=white <target>.gv > <target>.png
|
||||
|
||||
**** "Canonical" Names ****
|
||||
NOTE on IDA and Canonical Names:
|
||||
AFAICT IDA doesn't really have a concept of source file / object files in
|
||||
the database (it does with source-level debugging but that's it I think).
|
||||
In my ideal world, I'd write a nice GUI plugin to manage the object file
|
||||
names and regions, and then you'd be able to select how to display object/
|
||||
function names in the disassembly. For now though I have to save both the
|
||||
object name and function name in the filename.
|
||||
|
||||
For now, my hacky workaround is to name modules and functions in camel case
|
||||
(e.g. ReadNetworkString, or HtmlParsingEngine), and then combine them together
|
||||
in a nasty snake case "canonical" format, that looks like:
|
||||
|
||||
<ObjectName>_<FunctionName>_<Address>
|
||||
|
||||
That way I can parse out function and object names to be able to rename
|
||||
objects. I am open to suggestions on better ways to do this.
|
||||
|
||||
@@ -1,366 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
# basicutils - a version-agnostic API for IDA Pro with some (slightly) higher level functionality
|
||||
# This is the 7.x version - see basicutils_6x for the 7.x version
|
||||
import os
|
||||
|
||||
import ida_bytes
|
||||
import ida_funcs
|
||||
import ida_nalt
|
||||
import ida_ua
|
||||
import ida_name
|
||||
import idc
|
||||
import struct
|
||||
import idautils
|
||||
import ida_idaapi
|
||||
import ida_segment
|
||||
import re
|
||||
|
||||
BADADDR = ida_idaapi.BADADDR
|
||||
|
||||
def SegByName(n):
|
||||
t = ida_segment.get_segm_by_name(n)
|
||||
if (t and t.start_ea != ida_idaapi.BADADDR):
|
||||
start = t.start_ea
|
||||
end = t.end_ea
|
||||
else:
|
||||
start = ida_idaapi.BADADDR
|
||||
end = ida_idaapi.BADADDR
|
||||
return (start,end)
|
||||
|
||||
def GetFunctionName(x):
|
||||
return idc.get_func_name(x)
|
||||
|
||||
def GetInputFile():
|
||||
return idc.get_root_filename()
|
||||
|
||||
def GetIdbFile():
|
||||
return idc.get_idb_path()
|
||||
|
||||
def GetRootName():
|
||||
return os.path.join(os.path.dirname(GetIdbFile()), os.path.basename(GetInputFile()))
|
||||
|
||||
def NextFunction(x):
|
||||
return idc.get_next_func(x)
|
||||
|
||||
def PrevFunction(x):
|
||||
return idc.get_prev_func(x)
|
||||
|
||||
MAX_OPCODE_LEN = 15
|
||||
def PrevInstr(ea):
|
||||
# TODO this will return an inst_t type. Need to figure out how to populate it/make workflow happy
|
||||
out=ida_ua.insn_t()
|
||||
ida_ua.decode_prev_insn(out, ea)
|
||||
return out.ea
|
||||
|
||||
def CodeRefsTo(target):
|
||||
return idautils.CodeRefsTo(target,0)
|
||||
|
||||
def ForEveryUniqXrefTo( target, fun ):
|
||||
a = 0
|
||||
for xref in idautils.CodeRefsTo(target,0):
|
||||
if idc.get_func_attr(xref,idc.FUNCATTR_START) != a :
|
||||
fun(xref)
|
||||
a = idc.get_func_attr(xref, idc.FUNCATTR_START);
|
||||
|
||||
def ForEveryXrefTo( target, fun ):
|
||||
for xref in idautils.CodeRefsTo(target,0):
|
||||
fun(xref)
|
||||
|
||||
def ForEveryUniqXrefToD( target, fun ):
|
||||
a = 0
|
||||
for xref in idautils.CodeRefsTo(target,0):
|
||||
if idc.get_func_attr(xref,idc.FUNCATTR_START) != a :
|
||||
fun(xref, target)
|
||||
a = idc.get_func_attr(xref, idc.FUNCATTR_START);
|
||||
|
||||
def ForEveryXrefToD( target, fun ):
|
||||
for xref in idautils.CodeRefsTo(target,0):
|
||||
fun(xref, target)
|
||||
|
||||
def ForEveryFuncInDb( fun ):
|
||||
f = NextFunction(0)
|
||||
while (f != ida_idaapi.BADADDR):
|
||||
"""print "ev: %#x" % f"""
|
||||
fun(f)
|
||||
f=NextFunction(f)
|
||||
|
||||
def ForEveryFuncInSeg( seg, fun ):
|
||||
start,end = SegByName(".text")
|
||||
if (start == BADADDR):
|
||||
start = NextFunction(0)
|
||||
end = BADADDR
|
||||
f = start
|
||||
while (f < end):
|
||||
"""print "ev: %#x" % f"""
|
||||
print(f)
|
||||
fun(f)
|
||||
f=NextFunction(f)
|
||||
|
||||
|
||||
def NFuncUp( fun, n ) :
|
||||
i=0
|
||||
f=fun
|
||||
while ((i<n) and (f!=ida_idaapi.BADADDR)):
|
||||
f=PrevFunction(f)
|
||||
i=i+1
|
||||
return f
|
||||
|
||||
def NFuncDown( fun, n ) :
|
||||
i=0
|
||||
f=fun
|
||||
while ((i<n) and (f!=ida_idaapi.BADADDR)):
|
||||
f=NextFunction(f)
|
||||
i=i+1
|
||||
return f
|
||||
|
||||
def FuncMidPt( fun ):
|
||||
fstart = idc.get_func_attr(fun, idc.FUNCATTR_START)
|
||||
fend = idc.get_func_attr(fun, idc.FUNCATTR_END)
|
||||
return fstart+((fend-fstart)/2)
|
||||
|
||||
|
||||
def FuncXrefsFrom ( fun ) :
|
||||
f = set()
|
||||
for item in idautils.FuncItems(fun):
|
||||
for x in idautils.CodeRefsFrom(item,0):
|
||||
s = idc.get_func_attr(x, idc.FUNCATTR_START)
|
||||
if (x == s):
|
||||
f.add(x)
|
||||
#print "func xrefs from"
|
||||
#print f
|
||||
return f
|
||||
|
||||
def XrefFromRange ( fun ) :
|
||||
f = FuncXrefsFrom(fun)
|
||||
if f:
|
||||
return (min(f),max(f))
|
||||
else:
|
||||
return (0,0)
|
||||
|
||||
def ProgramAddrRange() :
|
||||
return ida_funcs.get_prev_func(ida_idaapi.BADADDR) - ida_funcs.get_next_func(0)
|
||||
|
||||
def MemCopy( dest, src, length ) :
|
||||
for i in range(0, length):
|
||||
#if (i < 20):
|
||||
# print "set byte at %#x to %#x" % (dest+i, idc.Byte(src+i))
|
||||
ida_bytes.patch_byte(dest+i,ida_bytes.get_byte(src+i))
|
||||
|
||||
def PrefixRange(start, end, prefix) :
|
||||
x = start
|
||||
while x < end:
|
||||
n = idc.get_func_name(x)
|
||||
if n.startswith("sub_"):
|
||||
nn = prefix + n
|
||||
print("Renaming %s to %s\n" % (n, nn))
|
||||
ida_name.set_name(x,nn)
|
||||
x = NextFunction(x)
|
||||
|
||||
|
||||
def snakeToCamelCase(s):
|
||||
f = s.lstrip("_")
|
||||
nf = ""
|
||||
nx = 0
|
||||
x=0
|
||||
while (x<len(f)):
|
||||
#print "%s" % (f[x])
|
||||
if f[x] == '_':
|
||||
nf+=(f[x+1].upper())
|
||||
x+=2
|
||||
else:
|
||||
nf+=f[x]
|
||||
x+=1
|
||||
nx+=1
|
||||
return nf
|
||||
|
||||
def isSnakeCase(s) :
|
||||
p = re.compile("[a-zA-Z0-9]+(_[a-zA-Z0-9]+)+\Z")
|
||||
if p.match(s):
|
||||
return True
|
||||
return False
|
||||
|
||||
#Todo - right now this is going to miss something like FooBARFunction
|
||||
def isCamelCase(s) :
|
||||
p = re.compile("([A-Z][a-z0-9]+)([A-Z][a-z0-9]+)+\Z")
|
||||
if p.match(s):
|
||||
return True
|
||||
return False
|
||||
|
||||
#Todo - weed out if it's all uppercase or all uppercase and _, etc.
|
||||
def isUCSnakeCase(s):
|
||||
p = re.compile("[A-Z0-9]+(_[A-Z0-9]+)+\Z")
|
||||
if p.match(s):
|
||||
return True
|
||||
return False
|
||||
|
||||
def isPlausibleFunction(s):
|
||||
if isSnakeCase(s):
|
||||
if isUCSnakeCase(s):
|
||||
return False
|
||||
return True
|
||||
if isCamelCase(s):
|
||||
return True
|
||||
return False
|
||||
|
||||
def PrependStrToFuncName(f,s):
|
||||
n = idc.get_func_name(f)
|
||||
n = s + n
|
||||
ida_name.set_name(f,n)
|
||||
|
||||
#The "canonical" name format (for now) is <module name>_<func name>_<address>
|
||||
#where <module_name> and <func_name> are in camel case.
|
||||
#This is not ideal for a number of reasons but this is a workaround for now
|
||||
|
||||
#Return just the "function name" part of the canonical name
|
||||
def GetCanonicalName(f):
|
||||
n = idc.get_func_name(f)
|
||||
parts = n.split("_")
|
||||
if len(parts) == 3:
|
||||
return parts[1]
|
||||
else:
|
||||
return None
|
||||
|
||||
#Put function in canonical format, given the function name and module name
|
||||
def NameCanonical(f,mod_name,func_name):
|
||||
n = "%s_%s_%08x" % (mod_name,func_name,f)
|
||||
print("Renaming %s to %s\n" % (idc.get_func_name(f),n))
|
||||
ida_name.force_name(f,n)
|
||||
|
||||
#Put function in canonical format when it doesn't have a name, but you know the module name
|
||||
def RenameFuncWithAddr(f,s):
|
||||
func_name = "unk"
|
||||
NameCanonical(f,s,func_name)
|
||||
|
||||
#Use this if you have pre-existing named functions in the DB that are in non-canonical format
|
||||
def RenameRangeWithAddr(start,end,s):
|
||||
x = start
|
||||
while (x<=end):
|
||||
n = idc.get_func_name(x)
|
||||
if (n.startswith("sub_")):
|
||||
RenameFuncWithAddr(x,s)
|
||||
else:
|
||||
NameCanonical(x,s,n)
|
||||
x = NextFunction(x)
|
||||
|
||||
#Rename a function in canonical format without changing the module name
|
||||
def CanonicalFuncRename(f,name):
|
||||
n = idc.get_func_name(f)
|
||||
parts = n.split("_")
|
||||
new_name = "%s_%s_%08x" % (parts[0],name,f)
|
||||
print("Renaming %s to %s\n" % (n, new_name))
|
||||
ida_name.set_name(f,new_name)
|
||||
|
||||
#Rename the module name without changing the function name
|
||||
def RenameFuncWithNewMod(f,mod):
|
||||
n = idc.get_func_name(f)
|
||||
parts = n.split("_")
|
||||
new_name = "%s_%s_%08x" % (mod,parts[1],f)
|
||||
print("Renaming %s to %s\n" % (n, new_name))
|
||||
ida_name.set_name(f,new_name)
|
||||
|
||||
#Rename a module (all functions that start with <mod>_)
|
||||
def RenameMod(orig, new):
|
||||
i = idc.get_next_func(0)
|
||||
while (i != BADADDR):
|
||||
n = idc.get_func_name(i)
|
||||
if n.startswith(orig+"_"):
|
||||
RenameFuncWithNewMod(i,new)
|
||||
i = NextFunction(i)
|
||||
|
||||
#Just rename the module over a given range (can be used to split a module and give part a new name)
|
||||
def RenameModRange(start, end, new):
|
||||
x = start
|
||||
while (x<=end):
|
||||
n = idc.get_func_name(x)
|
||||
RenameFuncWithNewMod(x,new)
|
||||
x = NextFunction(x)
|
||||
|
||||
#Given a range of functions, some of which may have names and module names
|
||||
# and a module name, put names in canonical format
|
||||
def CanonicalizeRange(start,end,mod):
|
||||
x = start
|
||||
while (x<=end):
|
||||
n = idc.get_func_name(x)
|
||||
#if it already starts with mod name, assume it's canonical
|
||||
if (not n.startswith(mod+"_")):
|
||||
if (n.startswith("sub_")):
|
||||
RenameFuncWithAddr(x,mod)
|
||||
#this should be contains "_"
|
||||
elif ("_" in n):
|
||||
n = snakeToCamelCase(n)
|
||||
NameCanonical(x,mod,n)
|
||||
else:
|
||||
NameCanonical(x,mod,n)
|
||||
x = NextFunction(x)
|
||||
|
||||
#Returns a string that is the concatenation of all of the string references from a function, separated by <sep>
|
||||
#Iterates through every item in function and looks for data references that are strings
|
||||
def CompileTextFromFunction(f,sep):
|
||||
s=""
|
||||
faddr = list(idautils.FuncItems(f))
|
||||
for c in range(len(faddr)):
|
||||
for d in idautils.DataRefsFrom(faddr[c]):
|
||||
t = ida_nalt.get_str_type(d)
|
||||
if ((t==0) or (t==3)):
|
||||
s += " "+ sep + " " + idc.GetStrLitContents(d)
|
||||
return s
|
||||
|
||||
#Returns a string which is the concatenation all of the string references
|
||||
# for an address range in the program, separated by <sep>
|
||||
#Similar to above, but iterates over the whole set of functions in the given range
|
||||
def CompileTextFromRange(start,end,sep):
|
||||
x = start
|
||||
s = ""
|
||||
while (x<=end):
|
||||
faddr = list(idautils.FuncItems(x))
|
||||
#print "items list: %d" % len(faddr)
|
||||
for c in range(len(faddr)):
|
||||
for d in idautils.DataRefsFrom(faddr[c]):
|
||||
#print "Found ref at %x: %x " % (faddr[c],d)
|
||||
t = ida_nalt.get_str_type(d)
|
||||
if ((t==0) or (t==3)):
|
||||
s += " " + sep + " " + GetStrLitContents(d).decode("utf-8")
|
||||
x = NextFunction(x)
|
||||
return s
|
||||
|
||||
#Returns a string which is a concatenation of all the function names in the given range
|
||||
# separated by <sep>
|
||||
def CompileFuncNamesFromRangeAsText(start,end,sep):
|
||||
x = start
|
||||
s = ""
|
||||
while (x<=end):
|
||||
n = idc.get_func_name(x)
|
||||
if (not n.startswith("sub_")):
|
||||
s += " " + sep + " " + n
|
||||
x = NextFunction(x)
|
||||
return s
|
||||
|
||||
#helper function which checks for both ASCII and Unicode strings at the given ea
|
||||
def GetStrLitContents(ea):
|
||||
potential_len = ida_bytes.get_max_strlit_length(ea, ida_nalt.STRTYPE_C_16)
|
||||
if(potential_len > 0):
|
||||
# If we get a non zero length, this is likely our string
|
||||
return ida_bytes.get_strlit_contents(ea, potential_len, ida_nalt.STRTYPE_C_16)
|
||||
# If we didn't get a good length out of C_16, try 8 bit strings
|
||||
potential_len = ida_bytes.get_max_strlit_length(ea, ida_nalt.STRTYPE_C)
|
||||
if(potential_len > 0):
|
||||
return ida_bytes.get_strlit_contents(ea, potential_len, ida_nalt.STRTYPE_C)
|
||||
#print("Error! %lu not a string" % (ea))
|
||||
return ""
|
||||
@@ -1,161 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
import basicutils_7x as basicutils
|
||||
import json
|
||||
import os
|
||||
import modnaming
|
||||
|
||||
## Utilities
|
||||
|
||||
#escape_for_graphviz()
|
||||
#Return the string escaped for usage in a GraphViz file
|
||||
def escape_for_graphviz(string):
|
||||
return json.dumps(string)
|
||||
|
||||
## CodeCut Basics
|
||||
## A couple of functions for working with function and module lists and outputting results
|
||||
|
||||
#locate_module()
|
||||
#Return the module information for a given function
|
||||
#This assumes that the module list is in order, but not necessarily contiguous
|
||||
def locate_module(module_list, f):
|
||||
found=0
|
||||
c=0
|
||||
#print "Finding %08x in module list length: %d" % (f,len(module_list))
|
||||
while ( (found != 1) and (c < len(module_list))):
|
||||
m = module_list[c]
|
||||
#print "\t%x - %x: %s" % (m.start,m.end,m.name)
|
||||
#this is the case where a function falls in the cracks between modules (because it wasn't cool enough to get a score)
|
||||
if (f < m.start):
|
||||
found = 1
|
||||
ret = None
|
||||
elif ((f >= m.start) and (f <= m.end)):
|
||||
found = 1
|
||||
ret = m
|
||||
c+=1
|
||||
return m
|
||||
|
||||
|
||||
#gen_mod_graph()
|
||||
#Output a module-to-module call graph in GraphViz format
|
||||
#For each module m_1
|
||||
# For each function <f> in the module
|
||||
# For each function that <f> calls
|
||||
# Lookup the module info for <f> m_2
|
||||
# If it's been assigned a module, add edge m_1 -> m_2 to the graph
|
||||
def gen_mod_graph(module_list, suffix):
|
||||
c=0
|
||||
g=set()
|
||||
while (c < len(module_list)):
|
||||
m = module_list[c]
|
||||
f = m.start
|
||||
while (f <= m.end):
|
||||
for xref in basicutils.FuncXrefsFrom(f):
|
||||
target = locate_module(module_list,xref)
|
||||
if (target):
|
||||
g.add((m.name,target.name))
|
||||
f = basicutils.NextFunction(f)
|
||||
c+=1
|
||||
|
||||
root_name = basicutils.GetRootName()
|
||||
file = open(root_name + "_" + suffix + "_mod_graph.gv", "w")
|
||||
|
||||
file.write("digraph g {\n")
|
||||
|
||||
for (node1,node2) in g:
|
||||
line = "%s -> %s\n" % (escape_for_graphviz(node1),escape_for_graphviz(node2))
|
||||
file.write(line)
|
||||
|
||||
file.write("}\n")
|
||||
file.close()
|
||||
|
||||
#gen_rename_script()
|
||||
#Output the module list with names as a Python script
|
||||
#This script can then be run on the database if in the same directory as the basicutils libraries
|
||||
#Look at basicutils.RenameRangeWithAddr to see the "canonical" name format -
|
||||
# you can also tweak that function to use a different naming convention
|
||||
def gen_rename_script(module_list, suffix):
|
||||
c=0
|
||||
|
||||
root_name = basicutils.GetRootName()
|
||||
file = open(root_name + "_" + suffix + "_labels.py", "w")
|
||||
|
||||
#if (IDA_VERSION < 7):
|
||||
# file.write("import basicutils_6x as basicutils\n");
|
||||
#else:
|
||||
file.write("import basicutils_7x as basicutils\n");
|
||||
file.write("\ndef go():\n");
|
||||
|
||||
while (c<len(module_list)):
|
||||
m=module_list[c]
|
||||
file.write("\tbasicutils.RenameRangeWithAddr(0x%x,0x%x,%r)\n"%(m.start,m.end,m.name))
|
||||
c+=1
|
||||
|
||||
file.write("\n")
|
||||
file.write("if __name__ == \"__main__\":\n")
|
||||
file.write("\treload(basicutils)\n")
|
||||
file.write("\tgo()\n")
|
||||
file.close()
|
||||
|
||||
#gen_map_file()
|
||||
#Produce a .map file similar to that produced by the ld option -Map=foo.map
|
||||
#Use map_read.py to test accuracy when a ground truth map file is available
|
||||
def gen_map_file(module_list, suffix):
|
||||
c=0
|
||||
|
||||
root_name = basicutils.GetRootName()
|
||||
file = open(root_name + "_" + suffix + "_map.map", "w")
|
||||
|
||||
while (c<len(module_list)):
|
||||
m=module_list[c]
|
||||
#mlen = basicutils.NextFunction(m.end) - m.start
|
||||
mlen = m.end - m.start
|
||||
mlen_str = "0x%x" % mlen
|
||||
file.write("%s0x%016x%s %s\n" % (" .text".ljust(16),m.start,mlen_str.rjust(11),m.name))
|
||||
c+=1
|
||||
|
||||
file.close()
|
||||
|
||||
#print_results():
|
||||
#Write all of the results to <target>.csv - which can be opened in your favorite spreadsheet program
|
||||
def print_results(function_list, module_list1, module_list2):
|
||||
c=0
|
||||
root_name = basicutils.GetRootName()
|
||||
file = open(root_name + "_cc_results.csv", "w")
|
||||
|
||||
#write header
|
||||
file.write("Function,Function #,LFA Score 1,LFA Score 2,LFA Total,LFA Edge,MC Edge,Function Name,Suggested Mod Name (LFA), Suggested Mod Name(MC),Source Str Ref\n");
|
||||
|
||||
while (c<len(function_list)):
|
||||
f = function_list[c]
|
||||
fname = basicutils.GetFunctionName(f.loc)
|
||||
m1 = locate_module(module_list1, f.loc)
|
||||
m2 = locate_module(module_list2, f.loc)
|
||||
mname1 = m1.name
|
||||
mname2 = m2.name
|
||||
#hacky - should actually find the extent of the function
|
||||
#for now we'll just skip the last one
|
||||
if (c < (len(function_list) - 1)):
|
||||
nf = basicutils.NextFunction(f.loc)
|
||||
func_str_ref, score = modnaming.source_file_strings(f.loc, nf-1)
|
||||
else:
|
||||
func_str_ref=""
|
||||
line = "0x%08x, %d , %f, %f, %f, %d, %d, %s, %s, %s, %s\n" % (f.loc,c+1,f.score1, f.score2, f.total_score,f.edge[0],f.edge[1],fname, mname1, mname2, func_str_ref)
|
||||
file.write(line)
|
||||
c+=1
|
||||
@@ -1,63 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
import maxcut
|
||||
import lfa
|
||||
import module
|
||||
import modnaming
|
||||
import cc_base
|
||||
import basicutils_7x as basicutils
|
||||
import snap_cg
|
||||
import imp
|
||||
|
||||
def go():
|
||||
|
||||
#Do LFA and MaxCut Analysis to find module boundaries
|
||||
lfa_funclist, lfa_modlist = lfa.analyze()
|
||||
merge_flist,maxcut_modlist = maxcut.analyze(lfa_funclist)
|
||||
|
||||
#Guess names for the modules using NLP
|
||||
lfa_modlist = modnaming.guess_module_names(lfa_modlist)
|
||||
maxcut_modlist = modnaming.guess_module_names(maxcut_modlist)
|
||||
|
||||
#Output all results as .csv
|
||||
cc_base.print_results(merge_flist, lfa_modlist, maxcut_modlist)
|
||||
|
||||
#Output module-to-module call graph as a Graphviz .gv file
|
||||
cc_base.gen_mod_graph(lfa_modlist, "lfa")
|
||||
cc_base.gen_mod_graph(maxcut_modlist, "mc")
|
||||
|
||||
#Output a Python script that will rename modules
|
||||
cc_base.gen_rename_script(lfa_modlist, "lfa")
|
||||
cc_base.gen_rename_script(maxcut_modlist, "mc")
|
||||
|
||||
#Output .map file (for comparison against ground truth, when available)
|
||||
cc_base.gen_map_file(lfa_modlist, "lfa")
|
||||
cc_base.gen_map_file(maxcut_modlist, "mc")
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
imp.reload(modnaming)
|
||||
imp.reload(module)
|
||||
imp.reload(cc_base)
|
||||
imp.reload(lfa)
|
||||
imp.reload(maxcut)
|
||||
imp.reload(snap_cg)
|
||||
imp.reload(basicutils)
|
||||
go()
|
||||
@@ -1,103 +0,0 @@
|
||||
|
||||
import idc
|
||||
import ida_kernwin
|
||||
|
||||
import imp
|
||||
import snap_cg
|
||||
|
||||
import lfa
|
||||
import maxcut
|
||||
import module
|
||||
import cc_base
|
||||
import modnaming
|
||||
import basicutils_7x as basicutils
|
||||
|
||||
from PyQt5 import QtCore, QtGui, QtWidgets
|
||||
|
||||
from IDAMagicStrings import get_source_strings
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def handler(item, column_no):
|
||||
if item.ignore:
|
||||
return
|
||||
|
||||
ea = item.ea
|
||||
if is_mapped(ea):
|
||||
jumpto(ea)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CBaseTreeViewer(ida_kernwin.PluginForm):
|
||||
def populate_tree(self):
|
||||
# Clear previous items
|
||||
self.tree.clear()
|
||||
|
||||
# Get source file names
|
||||
self.dict, _ = get_source_strings()
|
||||
module_names = {}
|
||||
for key in self.dict:
|
||||
for values in self.dict[key]:
|
||||
ea, module_name = values[0], values[2]
|
||||
module_names[ea] = module_name
|
||||
|
||||
self.modules_cache = {}
|
||||
#Do LFA and MaxCut Analysis to find module boundaries
|
||||
_, lfa_modlist = lfa.analyze()
|
||||
for module_data in lfa_modlist:
|
||||
module_name = "Module 0x%08x:0x%08x" % (module_data.start, module_data.end)
|
||||
for ea in module_names:
|
||||
if ea >= module_data.start and ea <= module_data.end:
|
||||
module_name = module_names[ea]
|
||||
break
|
||||
|
||||
if module_name in self.modules_cache:
|
||||
item = self.modules_cache[module_name]
|
||||
else:
|
||||
item = QtWidgets.QTreeWidgetItem(self.tree)
|
||||
item.setText(0, module_name)
|
||||
item.ea = module_data.start
|
||||
item.ignore = True
|
||||
self.modules_cache[module_name] = item
|
||||
|
||||
for func in Functions(module_data.start, module_data.end):
|
||||
node = QtWidgets.QTreeWidgetItem(item)
|
||||
node.setText(0, "0x%08x: %s" % (func, idc.get_func_name(func)))
|
||||
node.ea = func
|
||||
node.ignore = False
|
||||
|
||||
self.tree.itemDoubleClicked.connect(handler)
|
||||
|
||||
def OnCreate(self, form):
|
||||
# Get parent widget
|
||||
self.parent = ida_kernwin.PluginForm.FormToPyQtWidget(form)
|
||||
|
||||
# Create tree control
|
||||
self.tree = QtWidgets.QTreeWidget()
|
||||
self.tree.setHeaderLabels(("Names",))
|
||||
self.tree.setColumnWidth(0, 100)
|
||||
|
||||
# Create layout
|
||||
layout = QtWidgets.QVBoxLayout()
|
||||
layout.addWidget(self.tree)
|
||||
self.populate_tree()
|
||||
|
||||
# Populate PluginForm
|
||||
self.parent.setLayout(layout)
|
||||
|
||||
def Show(self, title):
|
||||
return ida_kernwin.PluginForm.Show(self, title, options = ida_kernwin.PluginForm.WOPN_PERSIST)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def main():
|
||||
tree_frm = CBaseTreeViewer()
|
||||
tree_frm.Show("Object Files")
|
||||
|
||||
if __name__ == "__main__":
|
||||
imp.reload(modnaming)
|
||||
imp.reload(module)
|
||||
imp.reload(cc_base)
|
||||
imp.reload(lfa)
|
||||
imp.reload(maxcut)
|
||||
imp.reload(snap_cg)
|
||||
imp.reload(basicutils)
|
||||
main()
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
This is a dataset for exploring other solutions to the CodeCut problem or
|
||||
improving LFA.
|
||||
|
||||
Each of the targets is an ELF file with DWARF debug information, as well as
|
||||
a .map file that was produced by ld at link time (showing object file
|
||||
boundaries). Some of the larger targets have a trimmed .map file
|
||||
(with "_trim") in the filename - this is basically just the object file
|
||||
boundaries to make parsing faster.
|
||||
|
||||
You can use map_read.py to test LFA output (or any other solution to the
|
||||
CodeCut problem)
|
||||
|
||||
Syntax:
|
||||
map_read.py <ground truth map file> <CodeCut/LFA map file>
|
||||
|
||||
This will output a 3 part score: match %, gap %, and underlap %
|
||||
|
||||
Expected results for LFA:
|
||||
|
||||
Gnuchess - Linux binary (x86) 76.1 3.2 20.7
|
||||
PX4 Firmware / NuttX (ARM) 82.2 13.6 4.2
|
||||
GoodFET 41 Firmware (msp430) 76.1 0 23.9
|
||||
Tmote Sky Firmware / Contiki (msp430) 93.3 0 6.7
|
||||
NXP Httpd Demo / FreeRTOS (ARM) 86.7 1.4 11.9
|
||||
|
||||
A perfect result would be 100% match with no gap and no underlap.
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,907 +0,0 @@
|
||||
.text 0x0000000000402470 0x2e012
|
||||
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 main.o
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 components.o
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(cmd.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(debug.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(epd.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(genmove.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(init.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(move.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(output.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(players.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(pgn.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(solve.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(swap.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(util.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(engine.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(lexpgn.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 frontend/libfrontend.a(atak.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 adapter/libadapter.a(main.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 adapter/libadapter.a(option.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 adapter/libadapter.a(piece.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 adapter/libadapter.a(square.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 adapter/libadapter.a(uci.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0x0 adapter/libadapter.a(util.o)
|
||||
.text.unlikely
|
||||
0x0000000000402470 0xd4 adapter/libadapter.a(adapter.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(attack.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(board.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(book.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(book_make.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(book_merge.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(colour.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(engine.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(epd.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(fen.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(game.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(hash.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(io.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(line.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(list.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(move.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(move_do.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(move_gen.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(move_legal.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(parse.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(pgn.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(posix.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(random.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 adapter/libadapter.a(san.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(main.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(move_do.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(option.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(pawn.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(piece.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(protocol.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(pst.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(random.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(search.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(search_full.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(see.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(sort.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(square.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x0 engine/libengine.a(trans.o)
|
||||
.text.unlikely
|
||||
0x0000000000402544 0x1c engine/libengine.a(util.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(value.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(vector.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(attack.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(board.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(book.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(eval.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(fen.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(hash.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(list.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(material.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(move.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(move_check.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(move_evasion.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(move_gen.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(move_legal.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(posix.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(pv.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 engine/libengine.a(recog.o)
|
||||
.text.unlikely
|
||||
0x0000000000402560 0x0 /usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
|
||||
*(.text.exit .text.exit.*)
|
||||
*(.text.startup .text.startup.*)
|
||||
.text.startup 0x0000000000402560 0x79b main.o
|
||||
0x0000000000402560 main
|
||||
*(.text.hot .text.hot.*)
|
||||
*(.text .stub .text.* .gnu.linkonce.t.*)
|
||||
*fill* 0x0000000000402cfb 0x5
|
||||
.text 0x0000000000402d00 0x2a /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o
|
||||
0x0000000000402d00 _start
|
||||
.text 0x0000000000402d2a 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o
|
||||
*fill* 0x0000000000402d2a 0x6
|
||||
.text 0x0000000000402d30 0xc6 /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o
|
||||
.text 0x0000000000402df6 0x0 main.o
|
||||
*fill* 0x0000000000402df6 0xa
|
||||
.text 0x0000000000402e00 0x1ab components.o
|
||||
0x0000000000402e00 engine_func(void*)
|
||||
0x0000000000402e20 InitAdapter()
|
||||
0x0000000000402eb0 InitEngine()
|
||||
0x0000000000402f10 adapter_func(void*)
|
||||
0x0000000000402f30 TerminateAdapterEngine()
|
||||
*fill* 0x0000000000402fab 0x5
|
||||
.text 0x0000000000402fb0 0x2863 frontend/libfrontend.a(cmd.o)
|
||||
0x0000000000402fb0 cmd_variant()
|
||||
0x0000000000402fc0 cmd_bk()
|
||||
0x0000000000402fd0 cmd_movenow()
|
||||
0x0000000000402fe0 cmd_protover()
|
||||
0x0000000000402ff0 cmd_otim()
|
||||
0x0000000000403000 cmd_ics()
|
||||
0x0000000000403010 cmd_hint()
|
||||
0x0000000000403020 cmd_hard()
|
||||
0x0000000000403030 cmd_easy()
|
||||
0x0000000000403040 cmd_accepted()
|
||||
0x0000000000403050 cmd_activate()
|
||||
0x0000000000403080 cmd_black()
|
||||
0x00000000004030b0 cmd_graphic()
|
||||
0x00000000004030e0 cmd_nographic()
|
||||
0x0000000000403110 cmd_random()
|
||||
0x0000000000403140 cmd_switch()
|
||||
0x0000000000403170 cmd_white()
|
||||
0x00000000004031a0 cmd_test()
|
||||
0x00000000004031d0 cmd_analyze()
|
||||
0x0000000000403200 cmd_force()
|
||||
0x0000000000403230 cmd_manual()
|
||||
0x0000000000403260 cmd_nopost()
|
||||
0x0000000000403290 cmd_quit()
|
||||
0x0000000000403370 cmd_hash()
|
||||
0x0000000000403420 cmd_null()
|
||||
0x00000000004034d0 cmd_time()
|
||||
0x00000000004035b0 cmd_depth()
|
||||
0x0000000000403660 cmd_ping()
|
||||
0x00000000004036a0 cmd_post()
|
||||
0x00000000004036e0 cmd_go()
|
||||
0x0000000000403780 cmd_level()
|
||||
0x00000000004038d0 cmd_st()
|
||||
0x0000000000403990 cmd_rating()
|
||||
0x0000000000403a10 cmd_new()
|
||||
0x0000000000403a90 cmd_undo()
|
||||
0x0000000000403b60 cmd_remove()
|
||||
0x0000000000403c40 cmd_result()
|
||||
0x0000000000403cf0 cmd_solve()
|
||||
0x0000000000403d00 cmd_usage()
|
||||
0x0000000000403f60 cmd_usermove()
|
||||
0x0000000000404110 cmd_help()
|
||||
0x0000000000404270 cmd_version()
|
||||
0x00000000004042b0 cmd_exit()
|
||||
0x00000000004042f0 cmd_xboard()
|
||||
0x0000000000404390 cmd_book()
|
||||
0x00000000004045f0 cmd_pgnload()
|
||||
0x0000000000404760 cmd_edit()
|
||||
0x0000000000404790 cmd_memory()
|
||||
0x0000000000404860 cmd_list()
|
||||
0x00000000004048f0 cmd_name()
|
||||
0x0000000000404a20 cmd_next()
|
||||
0x0000000000404af0 cmd_previous()
|
||||
0x0000000000404bc0 cmd_last()
|
||||
0x0000000000404d30 cmd_first()
|
||||
0x0000000000404d50 cmd_pgnreplay()
|
||||
0x0000000000404fb0 cmd_save()
|
||||
0x0000000000404fd0 cmd_pgnsave()
|
||||
0x0000000000404ff0 cmd_show()
|
||||
0x0000000000405270 cmd_rejected()
|
||||
0x0000000000405280 parse_input()
|
||||
0x0000000000405550 check_board()
|
||||
0x00000000004055d0 cmd_load()
|
||||
0x0000000000405780 cmd_setboard()
|
||||
*fill* 0x0000000000405813 0xd
|
||||
.text 0x0000000000405820 0x23 frontend/libfrontend.a(debug.o)
|
||||
0x0000000000405820 dbg_open(char const*)
|
||||
0x0000000000405830 dbg_printf(char const*, ...)
|
||||
0x0000000000405840 dbg_close()
|
||||
*fill* 0x0000000000405843 0xd
|
||||
.text 0x0000000000405850 0xeee frontend/libfrontend.a(epd.o)
|
||||
0x0000000000405850 ParseEPD(char*)
|
||||
0x0000000000406210 ReadEPDFile(char const*, short)
|
||||
0x00000000004063d0 LoadEPD(char*)
|
||||
0x00000000004064a0 SaveEPD(char*)
|
||||
*fill* 0x000000000040673e 0x2
|
||||
.text 0x0000000000406740 0x3460 frontend/libfrontend.a(genmove.o)
|
||||
0x0000000000406740 GenMoves(short)
|
||||
0x0000000000407890 GenNonCaptures(short)
|
||||
0x00000000004084a0 GenCaptures(short)
|
||||
0x0000000000409290 GenCheckEscapes(short)
|
||||
0x0000000000409ab0 FilterIllegalMoves(short)
|
||||
.text 0x0000000000409ba0 0x1091 frontend/libfrontend.a(init.o)
|
||||
0x0000000000409ba0 InitLzArray()
|
||||
0x0000000000409be0 InitBitPosArray()
|
||||
0x0000000000409c20 InitMoveArray()
|
||||
0x0000000000409d30 InitRay()
|
||||
0x0000000000409e80 InitFromToRay()
|
||||
0x0000000000409fc0 InitRankFileBit()
|
||||
0x000000000040a010 InitBitCount()
|
||||
0x000000000040a080 InitRotAtak()
|
||||
0x000000000040a720 InitVars()
|
||||
0x000000000040aaa0 Initialize()
|
||||
0x000000000040abd0 NewPosition()
|
||||
*fill* 0x000000000040ac31 0xf
|
||||
.text 0x000000000040ac40 0x1d9f frontend/libfrontend.a(move.o)
|
||||
0x000000000040ac40 MakeMove(int, int*)
|
||||
0x000000000040b600 UnmakeMove(int, int*)
|
||||
0x000000000040bb20 SANMove(int, int)
|
||||
0x000000000040be80 IsInMoveList(int, int, int, char)
|
||||
0x000000000040bf10 AlgbrMove(int)
|
||||
0x000000000040bfa0 ValidateMove(char*)
|
||||
*fill* 0x000000000040c9df 0x1
|
||||
.text 0x000000000040c9e0 0xd9a frontend/libfrontend.a(output.o)
|
||||
0x000000000040c9e0 ShowTime()
|
||||
0x000000000040c9f0 ShowMoveList(int)
|
||||
0x000000000040cac0 ShowSmallBoard()
|
||||
0x000000000040ce20 ShowBoard()
|
||||
0x000000000040d560 ShowCBoard()
|
||||
0x000000000040d5e0 ShowMvboard()
|
||||
0x000000000040d650 ShowGame()
|
||||
*fill* 0x000000000040d77a 0x6
|
||||
.text 0x000000000040d780 0x572 frontend/libfrontend.a(players.o)
|
||||
0x000000000040d870 DBSortPlayer(char const*)
|
||||
0x000000000040d910 DBWritePlayer()
|
||||
0x000000000040d9a0 DBReadPlayer()
|
||||
0x000000000040da30 DBListPlayer(char const*)
|
||||
0x000000000040db10 DBSearchPlayer(char const*)
|
||||
0x000000000040db90 DBUpdatePlayer(char const*, char const*)
|
||||
*fill* 0x000000000040dcf2 0xe
|
||||
.text 0x000000000040dd00 0x6da frontend/libfrontend.a(pgn.o)
|
||||
0x000000000040dd00 PGNSaveToFile(char const*, char const*)
|
||||
0x000000000040e270 PGNReadFromFile(char const*, int)
|
||||
0x000000000040e390 IsTrustedPlayer(char const*)
|
||||
*fill* 0x000000000040e3da 0x6
|
||||
.text 0x000000000040e3e0 0x205 frontend/libfrontend.a(solve.o)
|
||||
0x000000000040e3e0 Solve(char*)
|
||||
*fill* 0x000000000040e5e5 0xb
|
||||
.text 0x000000000040e5f0 0x513 frontend/libfrontend.a(swap.o)
|
||||
0x000000000040e5f0 AddXrayPiece(int, int, int, unsigned long*, unsigned long*)
|
||||
0x000000000040e700 SwapOff(int)
|
||||
*fill* 0x000000000040eb03 0xd
|
||||
.text 0x000000000040eb10 0x371 frontend/libfrontend.a(util.o)
|
||||
0x000000000040eb10 UpdateFriends()
|
||||
0x000000000040eb90 UpdateCBoard()
|
||||
0x000000000040ec70 UpdateMvboard()
|
||||
0x000000000040ecc0 ValidateBoard()
|
||||
*fill* 0x000000000040ee81 0xf
|
||||
.text 0x000000000040ee90 0xf42 frontend/libfrontend.a(engine.o)
|
||||
0x000000000040efa0 InitFrontend()
|
||||
0x000000000040efc0 SendToEngine(char*)
|
||||
0x000000000040f040 ReadFromEngine()
|
||||
0x000000000040f1c0 ReadFromUser()
|
||||
0x000000000040f310 SetDataToEngine(char const*)
|
||||
0x000000000040f330 ExpectAnswerFromEngine(int)
|
||||
0x000000000040f340 ShowPrompt()
|
||||
0x000000000040f430 NextUserCmd()
|
||||
0x000000000040f610 NextEngineCmd()
|
||||
0x000000000040f8b0 SetUserInputValidMove(int)
|
||||
0x000000000040f8c0 ChangeColor(int)
|
||||
0x000000000040f8d0 SetAutoGo(int)
|
||||
0x000000000040f8e0 GetAutoGo()
|
||||
0x000000000040f8f0 SolvePosition(char*, char const*)
|
||||
0x000000000040fae0 ForwardUserInputToEngine()
|
||||
0x000000000040fc90 ForwardEngineOutputToUser()
|
||||
*fill* 0x000000000040fdd2 0xe
|
||||
.text 0x000000000040fde0 0x22f5 frontend/libfrontend.a(lexpgn.o)
|
||||
0x000000000040ff80 return_append_str(char*, char const*)
|
||||
0x0000000000410020 append_str(char**, char const*)
|
||||
0x0000000000410040 append_comment(char const*)
|
||||
0x00000000004100a0 yy_switch_to_buffer(yy_buffer_state*)
|
||||
0x0000000000410150 yy_delete_buffer(yy_buffer_state*)
|
||||
0x00000000004101c0 yy_flush_buffer(yy_buffer_state*)
|
||||
0x00000000004102d0 yy_create_buffer(_IO_FILE*, int)
|
||||
0x0000000000410330 yyrestart(_IO_FILE*)
|
||||
0x0000000000410400 yylex()
|
||||
0x0000000000411cd0 yypush_buffer_state(yy_buffer_state*)
|
||||
0x0000000000411d90 yypop_buffer_state()
|
||||
0x0000000000411e20 yy_scan_buffer(char*, unsigned long)
|
||||
0x0000000000411ec0 yy_scan_bytes(char const*, unsigned long)
|
||||
0x0000000000411f40 yy_scan_string(char const*)
|
||||
0x0000000000411f60 yyget_lineno()
|
||||
0x0000000000411f70 yyget_in()
|
||||
0x0000000000411f80 yyget_out()
|
||||
0x0000000000411f90 yyget_leng()
|
||||
0x0000000000411fa0 yyget_text()
|
||||
0x0000000000411fb0 yyset_lineno(int)
|
||||
0x0000000000411fc0 yyset_in(_IO_FILE*)
|
||||
0x0000000000411fd0 yyset_out(_IO_FILE*)
|
||||
0x0000000000411fe0 yyget_debug()
|
||||
0x0000000000411ff0 yyset_debug(int)
|
||||
0x0000000000412000 yylex_destroy()
|
||||
0x00000000004120b0 yyalloc(unsigned long)
|
||||
0x00000000004120c0 yyrealloc(void*, unsigned long)
|
||||
0x00000000004120d0 yyfree(void*)
|
||||
*fill* 0x00000000004120d5 0xb
|
||||
.text 0x00000000004120e0 0x4ff frontend/libfrontend.a(atak.o)
|
||||
0x00000000004120e0 SqAtakd(short, short)
|
||||
0x00000000004122c0 AttackTo(int, int)
|
||||
0x0000000000412480 PinnedOnKing(int, int)
|
||||
*fill* 0x00000000004125df 0x1
|
||||
.text 0x00000000004125e0 0x5cc adapter/libadapter.a(main.o)
|
||||
0x00000000004129c0 adapter::main_adapter(int, char**)
|
||||
0x0000000000412b40 adapter::quit()
|
||||
*fill* 0x0000000000412bac 0x4
|
||||
.text 0x0000000000412bb0 0x3e5 adapter/libadapter.a(option.o)
|
||||
0x0000000000412bb0 adapter::option_set(char const*, char const*)
|
||||
0x0000000000412c20 adapter::option_init()
|
||||
0x0000000000412e50 adapter::option_get(char const*)
|
||||
0x0000000000412ec0 adapter::option_get_bool(char const*)
|
||||
0x0000000000412f50 adapter::option_get_double(char const*)
|
||||
0x0000000000412f70 adapter::option_get_int(char const*)
|
||||
0x0000000000412f90 adapter::option_get_string(char const*)
|
||||
*fill* 0x0000000000412f95 0xb
|
||||
.text 0x0000000000412fa0 0x1bb adapter/libadapter.a(piece.o)
|
||||
0x0000000000412fa0 adapter::piece_init()
|
||||
0x0000000000412ff0 adapter::piece_is_ok(int)
|
||||
0x0000000000413010 adapter::piece_make_pawn(int)
|
||||
0x0000000000413020 adapter::piece_pawn_opp(int)
|
||||
0x0000000000413030 adapter::piece_colour(int)
|
||||
0x0000000000413040 adapter::piece_type(int)
|
||||
0x0000000000413050 adapter::piece_is_pawn(int)
|
||||
0x0000000000413060 adapter::piece_is_knight(int)
|
||||
0x0000000000413070 adapter::piece_is_bishop(int)
|
||||
0x0000000000413080 adapter::piece_is_rook(int)
|
||||
0x0000000000413090 adapter::piece_is_queen(int)
|
||||
0x00000000004130a0 adapter::piece_is_king(int)
|
||||
0x00000000004130b0 adapter::piece_is_slider(int)
|
||||
0x00000000004130c0 adapter::piece_to_12(int)
|
||||
0x00000000004130d0 adapter::piece_from_12(int)
|
||||
0x00000000004130e0 adapter::piece_to_char(int)
|
||||
0x0000000000413100 adapter::piece_from_char(int)
|
||||
0x0000000000413140 adapter::char_is_piece(int)
|
||||
*fill* 0x000000000041315b 0x5
|
||||
.text 0x0000000000413160 0x1f7 adapter/libadapter.a(square.o)
|
||||
0x0000000000413160 adapter::square_init()
|
||||
0x00000000004131b0 adapter::square_is_ok(int)
|
||||
0x00000000004131d0 adapter::square_make(int, int)
|
||||
0x00000000004131e0 adapter::square_file(int)
|
||||
0x00000000004131f0 adapter::square_rank(int)
|
||||
0x0000000000413200 adapter::square_side_rank(int, int)
|
||||
0x0000000000413230 adapter::square_from_64(int)
|
||||
0x0000000000413240 adapter::square_to_64(int)
|
||||
0x0000000000413250 adapter::square_is_promote(int)
|
||||
0x0000000000413270 adapter::square_ep_dual(int)
|
||||
0x0000000000413280 adapter::square_colour(int)
|
||||
0x0000000000413290 adapter::file_from_char(int)
|
||||
0x00000000004132a0 adapter::rank_from_char(int)
|
||||
0x00000000004132b0 adapter::file_to_char(int)
|
||||
0x00000000004132c0 adapter::rank_to_char(int)
|
||||
0x00000000004132d0 adapter::char_is_file(int)
|
||||
0x00000000004132e0 adapter::char_is_rank(int)
|
||||
0x00000000004132f0 adapter::square_to_string(int, char*, int)
|
||||
0x0000000000413320 adapter::square_from_string(char const*)
|
||||
*fill* 0x0000000000413357 0x9
|
||||
.text 0x0000000000413360 0x1328 adapter/libadapter.a(uci.o)
|
||||
0x0000000000413360 adapter::uci_close(adapter::uci_t*)
|
||||
0x00000000004133d0 adapter::uci_clear(adapter::uci_t*)
|
||||
0x00000000004134a0 adapter::uci_send_isready(adapter::uci_t*)
|
||||
0x00000000004134c0 adapter::uci_send_stop(adapter::uci_t*)
|
||||
0x00000000004134e0 adapter::uci_send_ucinewgame(adapter::uci_t*)
|
||||
0x0000000000413510 adapter::uci_option_exist(adapter::uci_t*, char const*)
|
||||
0x0000000000413570 adapter::uci_send_option(adapter::uci_t*, char const*, char const*, ...)
|
||||
0x00000000004136e0 adapter::uci_parse(adapter::uci_t*, char const*)
|
||||
0x00000000004144b0 adapter::uci_open(adapter::uci_t*, adapter::engine_t*)
|
||||
0x0000000000414590 adapter::uci_send_isready_sync(adapter::uci_t*)
|
||||
0x0000000000414610 adapter::uci_send_stop_sync(adapter::uci_t*)
|
||||
*fill* 0x0000000000414688 0x8
|
||||
.text 0x0000000000414690 0x81f adapter/libadapter.a(util.o)
|
||||
0x0000000000414690 adapter::util_init()
|
||||
0x00000000004146d0 adapter::my_random_init()
|
||||
0x00000000004146f0 adapter::my_random_int(int)
|
||||
0x0000000000414720 adapter::my_random_double()
|
||||
0x0000000000414740 adapter::my_atoll(char const*)
|
||||
0x0000000000414780 adapter::my_round(double)
|
||||
0x00000000004147a0 adapter::my_free(void*)
|
||||
0x00000000004147b0 adapter::my_log_open(char const*)
|
||||
0x00000000004147f0 adapter::my_log_close()
|
||||
0x0000000000414810 adapter::my_log(char const*, ...)
|
||||
0x00000000004148e0 adapter::my_fatal(char const*, ...)
|
||||
0x0000000000414a30 adapter::my_malloc(int)
|
||||
0x0000000000414a60 adapter::my_realloc(void*, int)
|
||||
0x0000000000414aa0 adapter::my_file_read_line(_IO_FILE*, char*, int)
|
||||
0x0000000000414b30 adapter::my_string_empty(char const*)
|
||||
0x0000000000414b50 adapter::my_string_equal(char const*, char const*)
|
||||
0x0000000000414b70 adapter::my_string_case_equal(char const*, char const*)
|
||||
0x0000000000414bd0 adapter::my_strdup(char const*)
|
||||
0x0000000000414c20 adapter::my_string_clear(char const**)
|
||||
0x0000000000414c40 adapter::my_string_set(char const**, char const*)
|
||||
0x0000000000414c70 adapter::my_timer_reset(adapter::my_timer_t*)
|
||||
0x0000000000414c90 adapter::my_timer_start(adapter::my_timer_t*)
|
||||
0x0000000000414cb0 adapter::my_timer_stop(adapter::my_timer_t*)
|
||||
0x0000000000414cf0 adapter::my_timer_elapsed_real(adapter::my_timer_t const*)
|
||||
0x0000000000414d40 adapter::my_timer_elapsed_cpu(adapter::my_timer_t const*)
|
||||
0x0000000000414d90 adapter::my_timer_cpu_usage(adapter::my_timer_t const*)
|
||||
0x0000000000414e90 adapter::compute_pkgdatadir()
|
||||
*fill* 0x0000000000414eaf 0x1
|
||||
.text 0x0000000000414eb0 0x2a53 adapter/libadapter.a(adapter.o)
|
||||
0x0000000000417620 adapter::adapter_loop()
|
||||
*fill* 0x0000000000417903 0xd
|
||||
.text 0x0000000000417910 0x32e adapter/libadapter.a(attack.o)
|
||||
0x0000000000417910 adapter::attack_init()
|
||||
0x0000000000417a50 adapter::piece_attack(adapter::board_t const*, int, int, int)
|
||||
0x0000000000417ad0 adapter::is_attacked(adapter::board_t const*, int, int)
|
||||
0x0000000000417b30 adapter::is_in_check(adapter::board_t const*, int)
|
||||
0x0000000000417b60 adapter::is_pinned(adapter::board_t const*, int, int, int)
|
||||
*fill* 0x0000000000417c3e 0x2
|
||||
.text 0x0000000000417c40 0x70a adapter/libadapter.a(board.o)
|
||||
0x0000000000417c40 adapter::board_is_ok(adapter::board_t const*)
|
||||
0x0000000000417c50 adapter::board_clear(adapter::board_t*)
|
||||
0x0000000000417d50 adapter::board_start(adapter::board_t*)
|
||||
0x0000000000417d60 adapter::board_copy(adapter::board_t*, adapter::board_t const*)
|
||||
0x0000000000417d70 adapter::board_equal(adapter::board_t const*, adapter::board_t const*)
|
||||
0x0000000000417e40 adapter::board_init_list(adapter::board_t*)
|
||||
0x00000000004180a0 adapter::board_flags(adapter::board_t const*)
|
||||
0x00000000004180e0 adapter::board_can_play(adapter::board_t const*)
|
||||
0x0000000000418160 adapter::board_mobility(adapter::board_t const*)
|
||||
0x00000000004181b0 adapter::board_is_check(adapter::board_t const*)
|
||||
0x00000000004181c0 adapter::board_is_mate(adapter::board_t const*)
|
||||
0x00000000004181f0 adapter::board_is_stalemate(adapter::board_t const*)
|
||||
0x0000000000418220 adapter::king_pos(adapter::board_t const*, int)
|
||||
0x0000000000418230 adapter::board_disp(adapter::board_t const*)
|
||||
*fill* 0x000000000041834a 0x6
|
||||
.text 0x0000000000418350 0xa9f adapter/libadapter.a(book.o)
|
||||
0x00000000004185b0 adapter::book_clear()
|
||||
0x00000000004185d0 adapter::book_open(char const*, int)
|
||||
0x00000000004187a0 adapter::book_close()
|
||||
0x00000000004187e0 adapter::is_in_book(adapter::board_t const*)
|
||||
0x0000000000418870 adapter::book_move(adapter::board_t const*, bool)
|
||||
0x0000000000418980 adapter::book_move(adapter::board_t const*, bool, bool)
|
||||
0x0000000000418ae0 adapter::book_disp(adapter::board_t const*)
|
||||
0x0000000000418c50 adapter::book_learn_move(adapter::board_t const*, int, int)
|
||||
0x0000000000418db0 adapter::book_flush()
|
||||
*fill* 0x0000000000418def 0x1
|
||||
.text 0x0000000000418df0 0xb1a adapter/libadapter.a(book_make.o)
|
||||
0x00000000004192b0 adapter::book_make(int, char**)
|
||||
*fill* 0x000000000041990a 0x6
|
||||
.text 0x0000000000419910 0x6a1 adapter/libadapter.a(book_merge.o)
|
||||
0x0000000000419c10 adapter::book_merge(int, char**)
|
||||
*fill* 0x0000000000419fb1 0xf
|
||||
.text 0x0000000000419fc0 0x46 adapter/libadapter.a(colour.o)
|
||||
0x0000000000419fc0 adapter::colour_is_ok(int)
|
||||
0x0000000000419fd0 adapter::colour_is_white(int)
|
||||
0x0000000000419fe0 adapter::colour_is_black(int)
|
||||
0x0000000000419ff0 adapter::colour_equal(int, int)
|
||||
0x000000000041a000 adapter::colour_opp(int)
|
||||
*fill* 0x000000000041a006 0xa
|
||||
.text 0x000000000041a010 0x29d adapter/libadapter.a(engine.o)
|
||||
0x000000000041a010 adapter::engine_is_ok(adapter::engine_t const*)
|
||||
0x000000000041a030 adapter::engine_open(adapter::engine_t*)
|
||||
0x000000000041a050 adapter::engine_close(adapter::engine_t*)
|
||||
0x000000000041a060 adapter::engine_get(adapter::engine_t*, char*, int)
|
||||
0x000000000041a0b0 adapter::engine_send(adapter::engine_t*, char const*, ...)
|
||||
0x000000000041a1b0 adapter::engine_send_queue(adapter::engine_t*, char const*, ...)
|
||||
*fill* 0x000000000041a2ad 0x3
|
||||
.text 0x000000000041a2b0 0xb1c adapter/libadapter.a(epd.o)
|
||||
0x000000000041a430 adapter::epd_get_op(char const*, char const*, char*, int)
|
||||
0x000000000041aaf0 adapter::epd_test(int, char**)
|
||||
*fill* 0x000000000041adcc 0x4
|
||||
.text 0x000000000041add0 0xad0 adapter/libadapter.a(fen.o)
|
||||
0x000000000041add0 adapter::fen_fatal(char const*, int, char const*)
|
||||
0x000000000041af70 adapter::board_from_fen(adapter::board_t*, char const*)
|
||||
0x000000000041b560 adapter::board_to_fen(adapter::board_t const*, char*, int)
|
||||
.text 0x000000000041b8a0 0x518 adapter/libadapter.a(game.o)
|
||||
0x000000000041ba30 adapter::game_is_ok(adapter::game_t const*)
|
||||
0x000000000041ba70 adapter::game_init(adapter::game_t*, char const*)
|
||||
0x000000000041bac0 adapter::game_clear(adapter::game_t*)
|
||||
0x000000000041bad0 adapter::game_status(adapter::game_t const*)
|
||||
0x000000000041bae0 adapter::game_size(adapter::game_t const*)
|
||||
0x000000000041baf0 adapter::game_pos(adapter::game_t const*)
|
||||
0x000000000041bb00 adapter::game_move(adapter::game_t const*, int)
|
||||
0x000000000041bb10 adapter::game_get_board(adapter::game_t const*, adapter::board_t*, int)
|
||||
0x000000000041bb90 adapter::game_turn(adapter::game_t const*)
|
||||
0x000000000041bba0 adapter::game_move_nb(adapter::game_t const*)
|
||||
0x000000000041bbb0 adapter::game_add_move(adapter::game_t*, int)
|
||||
0x000000000041bc40 adapter::game_goto(adapter::game_t*, int)
|
||||
0x000000000041bcd0 adapter::game_rem_move(adapter::game_t*)
|
||||
0x000000000041bd00 adapter::game_disp(adapter::game_t const*)
|
||||
*fill* 0x000000000041bdb8 0x8
|
||||
.text 0x000000000041bdc0 0x1a6 adapter/libadapter.a(hash.o)
|
||||
0x000000000041bdc0 adapter::hash_init()
|
||||
0x000000000041be20 adapter::hash_piece_key(int, int)
|
||||
0x000000000041be50 adapter::hash_key(adapter::board_t const*)
|
||||
0x000000000041bf10 adapter::hash_castle_key(int)
|
||||
0x000000000041bf20 adapter::hash_ep_key(int)
|
||||
0x000000000041bf40 adapter::hash_turn_key(int)
|
||||
*fill* 0x000000000041bf66 0xa
|
||||
.text 0x000000000041bf70 0x65f adapter/libadapter.a(io.o)
|
||||
0x000000000041bf70 adapter::io_is_ok(adapter::io_t const*)
|
||||
0x000000000041bfb0 adapter::io_init(adapter::io_t*)
|
||||
0x000000000041bfd0 adapter::io_close(adapter::io_t*)
|
||||
0x000000000041c030 adapter::io_get_update(adapter::io_t*)
|
||||
0x000000000041c0e0 adapter::io_line_ready(adapter::io_t const*)
|
||||
0x000000000041c110 adapter::io_get_line(adapter::io_t*, char*, int)
|
||||
0x000000000041c220 adapter::io_send(adapter::io_t*, char const*, ...)
|
||||
0x000000000041c460 adapter::io_send_queue(adapter::io_t*, char const*, ...)
|
||||
*fill* 0x000000000041c5cf 0x1
|
||||
.text 0x000000000041c5d0 0x3cd adapter/libadapter.a(line.o)
|
||||
0x000000000041c5d0 adapter::line_is_ok(unsigned short const*)
|
||||
0x000000000041c610 adapter::line_clear(unsigned short*)
|
||||
0x000000000041c620 adapter::line_copy(unsigned short*, unsigned short const*)
|
||||
0x000000000041c640 adapter::line_from_can(unsigned short*, adapter::board_t const*, char const*, int)
|
||||
0x000000000041c760 adapter::line_to_can(unsigned short const*, adapter::board_t const*, char*, int)
|
||||
0x000000000041c870 adapter::line_to_san(unsigned short const*, adapter::board_t const*, char*, int)
|
||||
*fill* 0x000000000041c99d 0x3
|
||||
.text 0x000000000041c9a0 0x55a adapter/libadapter.a(list.o)
|
||||
0x000000000041c9a0 adapter::list_is_ok(adapter::list_t const*)
|
||||
0x000000000041c9c0 adapter::list_clear(adapter::list_t*)
|
||||
0x000000000041c9d0 adapter::list_add(adapter::list_t*, int, int)
|
||||
0x000000000041c9f0 adapter::list_remove(adapter::list_t*, int)
|
||||
0x000000000041ca30 adapter::list_is_empty(adapter::list_t const*)
|
||||
0x000000000041ca40 adapter::list_size(adapter::list_t const*)
|
||||
0x000000000041ca50 adapter::list_move(adapter::list_t const*, int)
|
||||
0x000000000041ca60 adapter::list_value(adapter::list_t const*, int)
|
||||
0x000000000041ca70 adapter::list_copy(adapter::list_t*, adapter::list_t const*)
|
||||
0x000000000041cad0 adapter::list_move_to_front(adapter::list_t*, int)
|
||||
0x000000000041cb20 adapter::list_note(adapter::list_t*)
|
||||
0x000000000041cb60 adapter::list_sort(adapter::list_t*)
|
||||
0x000000000041cc10 adapter::list_contain(adapter::list_t const*, int)
|
||||
0x000000000041cc50 adapter::list_equal(adapter::list_t*, adapter::list_t*)
|
||||
0x000000000041ce40 adapter::list_disp(adapter::list_t const*, adapter::board_t const*)
|
||||
*fill* 0x000000000041cefa 0x6
|
||||
.text 0x000000000041cf00 0x661 adapter/libadapter.a(move.o)
|
||||
0x000000000041cf00 adapter::move_is_ok(int)
|
||||
0x000000000041cf10 adapter::move_make(int, int)
|
||||
0x000000000041cf40 adapter::move_make_flags(int, int, int)
|
||||
0x000000000041cf70 adapter::move_from(int)
|
||||
0x000000000041cf80 adapter::move_to(int)
|
||||
0x000000000041cf90 adapter::move_promote_hack(int)
|
||||
0x000000000041cfa0 adapter::move_is_promote(int)
|
||||
0x000000000041cfb0 adapter::move_is_en_passant(int, adapter::board_t const*)
|
||||
0x000000000041d000 adapter::move_is_capture(int, adapter::board_t const*)
|
||||
0x000000000041d030 adapter::move_is_castle(int, adapter::board_t const*)
|
||||
0x000000000041d180 adapter::move_piece(int, adapter::board_t const*)
|
||||
0x000000000041d1a0 adapter::move_capture(int, adapter::board_t const*)
|
||||
0x000000000041d1f0 adapter::move_promote(int, adapter::board_t const*)
|
||||
0x000000000041d220 adapter::move_is_check(int, adapter::board_t const*)
|
||||
0x000000000041d280 adapter::move_is_mate(int, adapter::board_t const*)
|
||||
0x000000000041d2e0 adapter::move_to_can(int, adapter::board_t const*, char*, int)
|
||||
0x000000000041d300 adapter::move_from_can(char const*, adapter::board_t const*)
|
||||
0x000000000041d500 adapter::move_order(int)
|
||||
0x000000000041d510 adapter::move_disp(int, adapter::board_t const*)
|
||||
*fill* 0x000000000041d561 0xf
|
||||
.text 0x000000000041d570 0x649 adapter/libadapter.a(move_do.o)
|
||||
0x000000000041d7c0 adapter::move_do(adapter::board_t*, int)
|
||||
*fill* 0x000000000041dbb9 0x7
|
||||
.text 0x000000000041dbc0 0x832 adapter/libadapter.a(move_gen.o)
|
||||
0x000000000041dc30 adapter::gen_moves(adapter::list_t*, adapter::board_t const*)
|
||||
0x000000000041e3d0 adapter::gen_legal_moves(adapter::list_t*, adapter::board_t const*)
|
||||
*fill* 0x000000000041e3f2 0xe
|
||||
.text 0x000000000041e400 0x175 adapter/libadapter.a(move_legal.o)
|
||||
0x000000000041e400 adapter::move_is_pseudo(int, adapter::board_t const*)
|
||||
0x000000000041e450 adapter::pseudo_is_legal(int, adapter::board_t const*)
|
||||
0x000000000041e4c0 adapter::move_is_legal(int, adapter::board_t const*)
|
||||
0x000000000041e4f0 adapter::filter_legal(adapter::list_t*, adapter::board_t const*)
|
||||
*fill* 0x000000000041e575 0xb
|
||||
.text 0x000000000041e580 0x5c7 adapter/libadapter.a(parse.o)
|
||||
0x000000000041e680 adapter::match(char*, char const*)
|
||||
0x000000000041e690 adapter::parse_is_ok(adapter::parse_t const*)
|
||||
0x000000000041e6f0 adapter::parse_open(adapter::parse_t*, char const*)
|
||||
0x000000000041e710 adapter::parse_close(adapter::parse_t*)
|
||||
0x000000000041e760 adapter::parse_add_keyword(adapter::parse_t*, char const*)
|
||||
0x000000000041e790 adapter::parse_get_word(adapter::parse_t*, char*, int)
|
||||
0x000000000041e860 adapter::parse_get_string(adapter::parse_t*, char*, int)
|
||||
*fill* 0x000000000041eb47 0x9
|
||||
.text 0x000000000041eb50 0xb49 adapter/libadapter.a(pgn.o)
|
||||
0x000000000041f1b0 adapter::pgn_open(adapter::pgn_t*, char const*)
|
||||
0x000000000041f290 adapter::pgn_close(adapter::pgn_t*)
|
||||
0x000000000041f2a0 adapter::pgn_next_game(adapter::pgn_t*)
|
||||
0x000000000041f4f0 adapter::pgn_next_move(adapter::pgn_t*, char*, int)
|
||||
*fill* 0x000000000041f699 0x7
|
||||
.text 0x000000000041f6a0 0x1da adapter/libadapter.a(posix.o)
|
||||
0x000000000041f6a0 adapter::input_available()
|
||||
0x000000000041f750 adapter::now_real()
|
||||
0x000000000041f7f0 adapter::now_cpu()
|
||||
*fill* 0x000000000041f87a 0x6
|
||||
.text 0x000000000041f880 0x1c adapter/libadapter.a(random.o)
|
||||
0x000000000041f880 adapter::random_init()
|
||||
0x000000000041f890 adapter::random_64(int)
|
||||
*fill* 0x000000000041f89c 0x4
|
||||
.text 0x000000000041f8a0 0xc99 adapter/libadapter.a(san.o)
|
||||
0x000000000041fd50 adapter::move_to_san(int, adapter::board_t const*, char*, int)
|
||||
0x000000000041fd70 adapter::move_from_san(char const*, adapter::board_t const*)
|
||||
0x0000000000420490 adapter::move_from_san_debug(char const*, adapter::board_t const*)
|
||||
*fill* 0x0000000000420539 0x7
|
||||
.text 0x0000000000420540 0xae engine/libengine.a(main.o)
|
||||
0x0000000000420540 engine::main_engine(int, char**)
|
||||
*fill* 0x00000000004205ee 0x2
|
||||
.text 0x00000000004205f0 0xc77 engine/libengine.a(move_do.o)
|
||||
0x0000000000420b90 engine::move_do_init()
|
||||
0x0000000000420bf0 engine::move_do(engine::board_t*, int, engine::undo_t*)
|
||||
0x0000000000420fc0 engine::move_undo(engine::board_t*, int, engine::undo_t const*)
|
||||
0x0000000000421190 engine::move_do_null(engine::board_t*, engine::undo_t*)
|
||||
0x0000000000421230 engine::move_undo_null(engine::board_t*, engine::undo_t const*)
|
||||
*fill* 0x0000000000421267 0x9
|
||||
.text 0x0000000000421270 0x245 engine/libengine.a(option.o)
|
||||
0x0000000000421270 engine::option_list()
|
||||
0x00000000004212f0 engine::option_set(char const*, char const*)
|
||||
0x0000000000421360 engine::option_init()
|
||||
0x0000000000421390 engine::option_get(char const*)
|
||||
0x0000000000421400 engine::option_get_bool(char const*)
|
||||
0x0000000000421490 engine::option_get_int(char const*)
|
||||
0x00000000004214b0 engine::option_get_string(char const*)
|
||||
*fill* 0x00000000004214b5 0xb
|
||||
.text 0x00000000004214c0 0x9e0 engine/libengine.a(pawn.o)
|
||||
0x00000000004214c0 engine::pawn_init_bit()
|
||||
0x0000000000421630 engine::pawn_init()
|
||||
0x00000000004216c0 engine::pawn_clear()
|
||||
0x0000000000421720 engine::pawn_alloc()
|
||||
0x0000000000421760 engine::pawn_get_info(engine::pawn_info_t*, engine::board_t const*)
|
||||
0x0000000000421e80 engine::quad(int, int, int)
|
||||
.text 0x0000000000421ea0 0x1a7 engine/libengine.a(piece.o)
|
||||
0x0000000000421ea0 engine::piece_init()
|
||||
0x0000000000421fc0 engine::piece_is_ok(int)
|
||||
0x0000000000421fe0 engine::piece_from_12(int)
|
||||
0x0000000000421ff0 engine::piece_to_char(int)
|
||||
0x0000000000422010 engine::piece_from_char(int)
|
||||
*fill* 0x0000000000422047 0x9
|
||||
.text 0x0000000000422050 0xeb0 engine/libengine.a(protocol.o)
|
||||
0x00000000004220b0 engine::get(char*, int)
|
||||
0x00000000004220e0 engine::send(char const*, ...)
|
||||
0x0000000000422e70 engine::loop()
|
||||
0x0000000000422ec0 engine::event()
|
||||
.text 0x0000000000422f00 0x59b engine/libengine.a(pst.o)
|
||||
0x0000000000422f00 engine::pst_init()
|
||||
*fill* 0x000000000042349b 0x5
|
||||
.text 0x00000000004234a0 0x2 engine/libengine.a(random.o)
|
||||
0x00000000004234a0 engine::random_init()
|
||||
*fill* 0x00000000004234a2 0xe
|
||||
.text 0x00000000004234b0 0x8e3 engine/libengine.a(search.o)
|
||||
0x00000000004234b0 engine::depth_is_ok(int)
|
||||
0x00000000004234c0 engine::height_is_ok(int)
|
||||
0x00000000004234d0 engine::search_clear()
|
||||
0x00000000004235e0 engine::search_update_current()
|
||||
0x0000000000423660 engine::search_update_best()
|
||||
0x00000000004238e0 engine::search()
|
||||
0x0000000000423bf0 engine::search_update_root()
|
||||
0x0000000000423c70 engine::search_check()
|
||||
*fill* 0x0000000000423d93 0xd
|
||||
.text 0x0000000000423da0 0x18c5 engine/libengine.a(search_full.o)
|
||||
0x00000000004253f0 engine::search_full_init(engine::list_t*, engine::board_t*)
|
||||
0x0000000000425660 engine::search_full_root(engine::list_t*, engine::board_t*, int, int)
|
||||
*fill* 0x0000000000425665 0xb
|
||||
.text 0x0000000000425670 0x663 engine/libengine.a(see.o)
|
||||
0x00000000004259b0 engine::see_move(int, engine::board_t const*)
|
||||
0x0000000000425c20 engine::see_square(engine::board_t const*, int, int)
|
||||
*fill* 0x0000000000425cd3 0xd
|
||||
.text 0x0000000000425ce0 0xc87 engine/libengine.a(sort.o)
|
||||
0x0000000000425e40 engine::sort_init()
|
||||
0x0000000000425fa0 engine::sort_next(engine::sort_t*)
|
||||
0x0000000000426340 engine::sort_init_qs(engine::sort_t*, engine::board_t*, engine::attack_t const*, bool)
|
||||
0x00000000004263b0 engine::sort_next_qs(engine::sort_t*)
|
||||
0x0000000000426590 engine::good_move(int, engine::board_t const*, int, int)
|
||||
0x0000000000426650 engine::history_good(int, engine::board_t const*)
|
||||
0x00000000004266f0 engine::history_bad(int, engine::board_t const*)
|
||||
0x0000000000426780 engine::note_moves(engine::list_t*, engine::board_t const*, int, int)
|
||||
0x00000000004268c0 engine::sort_init(engine::sort_t*, engine::board_t*, engine::attack_t const*, int, int, int)
|
||||
*fill* 0x0000000000426967 0x9
|
||||
.text 0x0000000000426970 0x120 engine/libengine.a(square.o)
|
||||
0x0000000000426970 engine::square_init()
|
||||
0x00000000004269f0 engine::file_from_char(int)
|
||||
0x0000000000426a00 engine::rank_from_char(int)
|
||||
0x0000000000426a10 engine::file_to_char(int)
|
||||
0x0000000000426a20 engine::rank_to_char(int)
|
||||
0x0000000000426a30 engine::square_to_string(int, char*, int)
|
||||
0x0000000000426a60 engine::square_from_string(char const*)
|
||||
.text 0x0000000000426a90 0x4a3 engine/libengine.a(trans.o)
|
||||
0x0000000000426a90 engine::trans_is_ok(engine::trans const*)
|
||||
0x0000000000426af0 engine::trans_free(engine::trans*)
|
||||
0x0000000000426b20 engine::trans_clear(engine::trans*)
|
||||
0x0000000000426bd0 engine::trans_init(engine::trans*)
|
||||
0x0000000000426c50 engine::trans_alloc(engine::trans*)
|
||||
0x0000000000426cd0 engine::trans_inc_date(engine::trans*)
|
||||
0x0000000000426d40 engine::trans_store(engine::trans*, unsigned long long, int, int, int, int)
|
||||
0x0000000000426e90 engine::trans_retrieve(engine::trans*, unsigned long long, int*, int*, int*, int*, int*)
|
||||
0x0000000000426f00 engine::trans_stats(engine::trans const*)
|
||||
*fill* 0x0000000000426f33 0xd
|
||||
.text 0x0000000000426f40 0x586 engine/libengine.a(util.o)
|
||||
0x0000000000426f40 engine::util_init()
|
||||
0x0000000000426f80 engine::my_random_init()
|
||||
0x0000000000426fa0 engine::my_random(int)
|
||||
0x0000000000426fd0 engine::my_atoll(char const*)
|
||||
0x0000000000427010 engine::my_round(double)
|
||||
0x0000000000427030 engine::my_free(void*)
|
||||
0x0000000000427040 engine::my_fatal(char const*, ...)
|
||||
0x0000000000427100 engine::my_malloc(int)
|
||||
0x0000000000427120 engine::my_file_read_line(_IO_FILE*, char*, int)
|
||||
0x00000000004271a0 engine::my_string_empty(char const*)
|
||||
0x00000000004271c0 engine::my_string_equal(char const*, char const*)
|
||||
0x0000000000427220 engine::my_strdup(char const*)
|
||||
0x0000000000427260 engine::my_string_clear(char const**)
|
||||
0x0000000000427280 engine::my_string_set(char const**, char const*)
|
||||
0x00000000004272b0 engine::my_timer_reset(engine::my_timer_t*)
|
||||
0x00000000004272d0 engine::my_timer_start(engine::my_timer_t*)
|
||||
0x00000000004272f0 engine::my_timer_stop(engine::my_timer_t*)
|
||||
0x0000000000427330 engine::my_timer_elapsed_real(engine::my_timer_t const*)
|
||||
0x0000000000427380 engine::my_timer_elapsed_cpu(engine::my_timer_t const*)
|
||||
0x00000000004273d0 engine::my_timer_cpu_usage(engine::my_timer_t const*)
|
||||
*fill* 0x00000000004274c6 0xa
|
||||
.text 0x00000000004274d0 0x19a engine/libengine.a(value.o)
|
||||
0x00000000004274d0 engine::value_init()
|
||||
0x0000000000427580 engine::value_is_ok(int)
|
||||
0x0000000000427590 engine::range_is_ok(int, int)
|
||||
0x00000000004275c0 engine::value_is_mate(int)
|
||||
0x00000000004275d0 engine::value_to_trans(int, int)
|
||||
0x0000000000427600 engine::value_from_trans(int, int)
|
||||
0x0000000000427630 engine::value_to_mate(int)
|
||||
*fill* 0x000000000042766a 0x6
|
||||
.text 0x0000000000427670 0xc6 engine/libengine.a(vector.o)
|
||||
0x0000000000427670 engine::vector_init()
|
||||
0x00000000004276f0 engine::delta_is_ok(int)
|
||||
0x0000000000427710 engine::inc_is_ok(int)
|
||||
*fill* 0x0000000000427736 0xa
|
||||
.text 0x0000000000427740 0xae9 engine/libengine.a(attack.o)
|
||||
0x0000000000427740 engine::attack_init()
|
||||
0x0000000000427e00 engine::is_attacked(engine::board_t const*, int, int)
|
||||
0x0000000000427ea0 engine::line_is_empty(engine::board_t const*, int, int)
|
||||
0x0000000000427ee0 engine::is_pinned(engine::board_t const*, int, int)
|
||||
0x0000000000427f60 engine::attack_is_ok(engine::attack_t const*)
|
||||
0x0000000000428010 engine::attack_set(engine::attack_t*, engine::board_t const*)
|
||||
0x0000000000428160 engine::piece_attack_king(engine::board_t const*, int, int, int)
|
||||
*fill* 0x0000000000428229 0x7
|
||||
.text 0x0000000000428230 0x8cb engine/libengine.a(board.o)
|
||||
0x0000000000428230 engine::board_is_ok(engine::board_t const*)
|
||||
0x0000000000428240 engine::board_clear(engine::board_t*)
|
||||
0x00000000004282b0 engine::board_copy(engine::board_t*, engine::board_t const*)
|
||||
0x00000000004282d0 engine::board_is_legal(engine::board_t const*)
|
||||
0x0000000000428300 engine::board_is_check(engine::board_t const*)
|
||||
0x0000000000428320 engine::board_is_mate(engine::board_t const*)
|
||||
0x0000000000428380 engine::board_is_stalemate(engine::board_t*)
|
||||
0x0000000000428440 engine::board_is_repetition(engine::board_t const*)
|
||||
0x00000000004284e0 engine::board_opening(engine::board_t const*)
|
||||
0x0000000000428590 engine::board_endgame(engine::board_t const*)
|
||||
0x0000000000428640 engine::board_init_list(engine::board_t*)
|
||||
*fill* 0x0000000000428afb 0x5
|
||||
.text 0x0000000000428b00 0x586 engine/libengine.a(book.o)
|
||||
0x0000000000428c40 engine::book_init()
|
||||
0x0000000000428c60 engine::book_open(char const*)
|
||||
0x0000000000428db0 engine::book_close()
|
||||
0x0000000000428df0 engine::book_move(engine::board_t*)
|
||||
*fill* 0x0000000000429086 0xa
|
||||
.text 0x0000000000429090 0x2012 engine/libengine.a(eval.o)
|
||||
0x0000000000429890 engine::eval_init()
|
||||
0x0000000000429ab0 engine::eval(engine::board_t const*)
|
||||
*fill* 0x000000000042b0a2 0xe
|
||||
.text 0x000000000042b0b0 0x639 engine/libengine.a(fen.o)
|
||||
0x000000000042b0b0 engine::board_from_fen(engine::board_t*, char const*)
|
||||
0x000000000042b4e0 engine::board_to_fen(engine::board_t const*, char*, int)
|
||||
*fill* 0x000000000042b6e9 0x7
|
||||
.text 0x000000000042b6f0 0x260 engine/libengine.a(hash.o)
|
||||
0x000000000042b6f0 engine::hash_init()
|
||||
0x000000000042b730 engine::hash_key(engine::board_t const*)
|
||||
0x000000000042b830 engine::hash_pawn_key(engine::board_t const*)
|
||||
0x000000000042b890 engine::hash_material_key(engine::board_t const*)
|
||||
0x000000000042b8e0 engine::hash_piece_key(int, int)
|
||||
0x000000000042b910 engine::hash_castle_key(int)
|
||||
0x000000000042b930 engine::hash_ep_key(int)
|
||||
0x000000000042b940 engine::hash_turn_key(int)
|
||||
.text 0x000000000042b950 0x287 engine/libengine.a(list.o)
|
||||
0x000000000042b950 engine::list_is_ok(engine::list_t const*)
|
||||
0x000000000042b970 engine::list_remove(engine::list_t*, int)
|
||||
0x000000000042b9b0 engine::list_copy(engine::list_t*, engine::list_t const*)
|
||||
0x000000000042ba00 engine::list_sort(engine::list_t*)
|
||||
0x000000000042baa0 engine::list_contain(engine::list_t const*, int)
|
||||
0x000000000042bae0 engine::list_note(engine::list_t*)
|
||||
0x000000000042bb20 engine::list_filter(engine::list_t*, engine::board_t*, bool (*)(int, engine::board_t*), bool)
|
||||
*fill* 0x000000000042bbd7 0x9
|
||||
.text 0x000000000042bbe0 0xdac engine/libengine.a(material.o)
|
||||
0x000000000042bbe0 engine::material_init()
|
||||
0x000000000042bc30 engine::material_clear()
|
||||
0x000000000042bc90 engine::material_alloc()
|
||||
0x000000000042bcd0 engine::material_get_info(engine::material_info_t*, engine::board_t const*)
|
||||
*fill* 0x000000000042c98c 0x4
|
||||
.text 0x000000000042c990 0x34a engine/libengine.a(move.o)
|
||||
0x000000000042c990 engine::move_is_ok(int)
|
||||
0x000000000042c9b0 engine::move_promote(int)
|
||||
0x000000000042c9e0 engine::move_order(int)
|
||||
0x000000000042ca00 engine::move_is_capture(int, engine::board_t const*)
|
||||
0x000000000042ca30 engine::move_is_under_promote(int)
|
||||
0x000000000042ca60 engine::move_is_tactical(int, engine::board_t const*)
|
||||
0x000000000042ca90 engine::move_capture(int, engine::board_t const*)
|
||||
0x000000000042cad0 engine::move_to_string(int, char*, int)
|
||||
0x000000000042cbc0 engine::move_from_string(char const*, engine::board_t const*)
|
||||
*fill* 0x000000000042ccda 0x6
|
||||
.text 0x000000000042cce0 0xa96 engine/libengine.a(move_check.o)
|
||||
0x000000000042cd70 engine::gen_quiet_checks(engine::list_t*, engine::board_t*)
|
||||
0x000000000042d640 engine::move_is_check(int, engine::board_t*)
|
||||
*fill* 0x000000000042d776 0xa
|
||||
.text 0x000000000042d780 0x7c3 engine/libengine.a(move_evasion.o)
|
||||
0x000000000042ded0 engine::gen_legal_evasions(engine::list_t*, engine::board_t const*, engine::attack_t const*)
|
||||
0x000000000042dee0 engine::gen_pseudo_evasions(engine::list_t*, engine::board_t const*, engine::attack_t const*)
|
||||
0x000000000042def0 engine::legal_evasion_exist(engine::board_t const*, engine::attack_t const*)
|
||||
*fill* 0x000000000042df43 0xd
|
||||
.text 0x000000000042df50 0x1a8a engine/libengine.a(move_gen.o)
|
||||
0x000000000042e1f0 engine::gen_quiet_moves(engine::list_t*, engine::board_t const*)
|
||||
0x000000000042ebd0 engine::add_pawn_move(engine::list_t*, int, int)
|
||||
0x000000000042ec40 engine::gen_moves(engine::list_t*, engine::board_t const*)
|
||||
0x000000000042ef00 engine::gen_legal_moves(engine::list_t*, engine::board_t*)
|
||||
0x000000000042ef80 engine::gen_captures(engine::list_t*, engine::board_t const*)
|
||||
0x000000000042f9a0 engine::add_promote(engine::list_t*, int)
|
||||
*fill* 0x000000000042f9da 0x6
|
||||
.text 0x000000000042f9e0 0x3a3 engine/libengine.a(move_legal.o)
|
||||
0x000000000042fa30 engine::move_is_pseudo(int, engine::board_t*)
|
||||
0x000000000042fb50 engine::quiet_is_pseudo(int, engine::board_t*)
|
||||
0x000000000042fc50 engine::pseudo_is_legal(int, engine::board_t*)
|
||||
*fill* 0x000000000042fd83 0xd
|
||||
.text 0x000000000042fd90 0x20a engine/libengine.a(posix.o)
|
||||
0x000000000042fd90 engine::input_available()
|
||||
0x000000000042fe70 engine::now_real()
|
||||
0x000000000042ff10 engine::now_cpu()
|
||||
*fill* 0x000000000042ff9a 0x6
|
||||
.text 0x000000000042ffa0 0x127 engine/libengine.a(pv.o)
|
||||
0x000000000042ffa0 engine::pv_is_ok(unsigned short const*)
|
||||
0x0000000000430000 engine::pv_copy(unsigned short*, unsigned short const*)
|
||||
0x0000000000430020 engine::pv_cat(unsigned short*, unsigned short const*, int)
|
||||
0x0000000000430040 engine::pv_to_string(unsigned short const*, char*, int)
|
||||
*fill* 0x00000000004300c7 0x9
|
||||
.text 0x00000000004300d0 0x340 engine/libengine.a(recog.o)
|
||||
0x00000000004301f0 engine::recog_draw(engine::board_t const*)
|
||||
.text 0x0000000000430410 0x72 /usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
|
||||
0x0000000000430410 __libc_csu_init
|
||||
0x0000000000430480 __libc_csu_fini
|
||||
.text 0x0000000000430482 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o
|
||||
.text 0x0000000000430482 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o
|
||||
*(.gnu.warning)
|
||||
|
||||
.fini 0x0000000000430484 0x9
|
||||
*(SORT(.fini))
|
||||
.fini 0x0000000000430484 0x4 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o
|
||||
0x0000000000430484 _fini
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,78 +0,0 @@
|
||||
|
||||
import idc
|
||||
import ida_kernwin
|
||||
|
||||
import imp
|
||||
import snap_cg
|
||||
|
||||
import lfa
|
||||
import maxcut
|
||||
import module
|
||||
import cc_base
|
||||
import modnaming
|
||||
import basicutils_7x as basicutils
|
||||
|
||||
from PyQt5 import QtCore, QtGui, QtWidgets
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def handler(item, column_no):
|
||||
ea = item.ea
|
||||
if is_mapped(ea):
|
||||
jumpto(ea)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
class CBaseTreeViewer(ida_kernwin.PluginForm):
|
||||
def populate_tree(self):
|
||||
# Clear previous items
|
||||
self.tree.clear()
|
||||
|
||||
#Do LFA and MaxCut Analysis to find module boundaries
|
||||
_, lfa_modlist = lfa.analyze()
|
||||
for module_data in lfa_modlist:
|
||||
module_name = "Module 0x%08x:0x%08x" % (module_data.start, module_data.end)
|
||||
item = QtWidgets.QTreeWidgetItem(self.tree)
|
||||
item.setText(0, module_name)
|
||||
item.ea = module_data.start
|
||||
|
||||
for func in Functions(module_data.start, module_data.end):
|
||||
node = QtWidgets.QTreeWidgetItem(item)
|
||||
node.setText(0, "0x%08x: %s" % (func, idc.get_func_name(func)))
|
||||
node.ea = func
|
||||
|
||||
self.tree.itemDoubleClicked.connect(handler)
|
||||
|
||||
def OnCreate(self, form):
|
||||
# Get parent widget
|
||||
self.parent = ida_kernwin.PluginForm.FormToPyQtWidget(form)
|
||||
|
||||
# Create tree control
|
||||
self.tree = QtWidgets.QTreeWidget()
|
||||
self.tree.setHeaderLabels(("Names",))
|
||||
self.tree.setColumnWidth(0, 100)
|
||||
|
||||
# Create layout
|
||||
layout = QtWidgets.QVBoxLayout()
|
||||
layout.addWidget(self.tree)
|
||||
self.populate_tree()
|
||||
|
||||
# Populate PluginForm
|
||||
self.parent.setLayout(layout)
|
||||
|
||||
def Show(self, title):
|
||||
return ida_kernwin.PluginForm.Show(self, title, options = ida_kernwin.PluginForm.WOPN_PERSIST)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
def main():
|
||||
tree_frm = CBaseTreeViewer()
|
||||
tree_frm.Show("Object Files")
|
||||
|
||||
if __name__ == "__main__":
|
||||
imp.reload(modnaming)
|
||||
imp.reload(module)
|
||||
imp.reload(cc_base)
|
||||
imp.reload(lfa)
|
||||
imp.reload(maxcut)
|
||||
imp.reload(snap_cg)
|
||||
imp.reload(basicutils)
|
||||
main()
|
||||
|
||||
@@ -1,292 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
################################################################################
|
||||
### Object File Boundary Detection in IDA Pro with Local Function Affinity ###
|
||||
################################################################################
|
||||
|
||||
# LFA Metric
|
||||
# Local Function Affinity (LFA) is a measurement of the direction a function
|
||||
# is being "pulled" by the functions it calls and the functions that call it.
|
||||
# By looking at an average of the log of the distance between these functions
|
||||
# we get a measurement of whether the function is related to functions in the
|
||||
# positive or negative direction.
|
||||
|
||||
# Edge Detection
|
||||
# In a standard C/C++ development environment, the project is divided into
|
||||
# multiple source files, which are compiled to object files, then linked into
|
||||
# the final binary in order. If external references are eliminated (LFA does
|
||||
# this imperfectly by just eliminating calls whose distance is above a chosen
|
||||
# threshold) we would expect to see LFA starting positive, switching to
|
||||
# negative over the course of a source file, then switching back to positive
|
||||
# at the beginning of the next file. So object file boundaries
|
||||
|
||||
# What is code anyway?
|
||||
# Don't get too hung up on "object file boundaries" - for LFA (or any other
|
||||
# attempt to solve the problem) to be perfect, the design and implementation
|
||||
# of the code would have to be perfect. What LFA is really finding is clusters
|
||||
# of functionality, that should be more or less related to object files
|
||||
# but it will often break up large object files into multiple clusters or
|
||||
# detect 2 or 3 related object files as one file.
|
||||
|
||||
IDA_VERSION = 7
|
||||
import basicutils_7x as basicutils
|
||||
|
||||
#External dependencies
|
||||
import math
|
||||
|
||||
#CodeCut dependencies
|
||||
import cc_base
|
||||
import module
|
||||
|
||||
#Threshold above which a function call is considered "external"
|
||||
#For published research - 0x1000 = 4K
|
||||
MAX_CALL = 0x1000
|
||||
|
||||
#This is a list of the LFA scores for all functions
|
||||
g_function_list = []
|
||||
|
||||
#This is a list of modules a.k.a. object files after the edge_detect()
|
||||
#function is executed
|
||||
g_module_list = []
|
||||
|
||||
|
||||
#func_callers_weight(f):
|
||||
#Return the LFA score for functions that this functions calls (i.e. the "calls from" score)
|
||||
#If there are no references, return 0
|
||||
def func_callers_weight(f):
|
||||
fc = 0
|
||||
fs = 0
|
||||
for xref in basicutils.FuncXrefsFrom(f):
|
||||
dist = abs(xref - f)
|
||||
#print "%08x: %08x %d " % (f, xref, dist),
|
||||
if dist > MAX_CALL:
|
||||
continue
|
||||
if (dist != 0):
|
||||
logdist = math.log(dist)
|
||||
else: #recursive function call
|
||||
logdist = 0
|
||||
if (xref - f < 0):
|
||||
o = -logdist
|
||||
else:
|
||||
o = logdist
|
||||
#print " %f " % o,
|
||||
fs += o
|
||||
fc += 1
|
||||
|
||||
if fc == 0:
|
||||
score = 0
|
||||
else:
|
||||
score = fs / fc
|
||||
return score
|
||||
|
||||
#func_callee_weight(f):
|
||||
#Return the LFA score for calls where this function is the "callee" (i.e. the "calls to" score)
|
||||
#If there are no references, return 0
|
||||
def func_callee_weight(f):
|
||||
fc = 0
|
||||
fs = 0
|
||||
a = 0
|
||||
for xref in basicutils.CodeRefsTo(f):
|
||||
|
||||
dist = abs(xref - f)
|
||||
#print "%08x: %08x %d " % (f, xref, dist),
|
||||
if dist > MAX_CALL:
|
||||
continue
|
||||
if (dist != 0):
|
||||
logdist = math.log(dist)
|
||||
else: #recursive function call
|
||||
logdist = 0
|
||||
if (xref - f < 0):
|
||||
o = -logdist
|
||||
else:
|
||||
o = logdist
|
||||
#print " %f " % o,
|
||||
fs += o
|
||||
fc += 1
|
||||
|
||||
|
||||
if fc == 0:
|
||||
score = 0
|
||||
else:
|
||||
score = fs / fc
|
||||
return score
|
||||
|
||||
#func_call_weight(start,end):
|
||||
#Iterate over each function in the range and calculated the LFA scores
|
||||
# If both scores are 0, skip the function altogether, exclude it from the list
|
||||
# If one score is 0, interpolate that score from the previous score
|
||||
def func_call_weight(f_start, f_end):
|
||||
global g_function_list
|
||||
|
||||
c = 1
|
||||
f = f_start
|
||||
fe = f_end
|
||||
|
||||
if f==0:
|
||||
f = basicutils.NextFunction(0)
|
||||
f_end = basicutils.BADADDR
|
||||
|
||||
prevscore = 0
|
||||
prevscore_1 = 0
|
||||
prevscore_2 = 0
|
||||
z1 = 0
|
||||
z2 = 0
|
||||
|
||||
#for each function in range
|
||||
while (f < fe):
|
||||
|
||||
#get both LFA scores for the function
|
||||
score_1 = func_callers_weight(f)
|
||||
score_2 = func_callee_weight(f)
|
||||
|
||||
#if both scores are 0 (i.e. no references for the function or all refs are above the threshold)
|
||||
#then skip the function altogether
|
||||
if (score_1 == 0) and (score_2 == 0):
|
||||
#print("Skipping 0x%08x\n" % f)
|
||||
prevscore_1 = 0
|
||||
prevscore_2 = 0
|
||||
z1 = 1
|
||||
z2 = 1
|
||||
finf = module.func_info(f,0,0)
|
||||
finf.lfa_skip=1
|
||||
g_function_list.append(finf)
|
||||
f = basicutils.NextFunction(f)
|
||||
continue
|
||||
|
||||
#if 1st or 2nd score is zero, interpolate using previous score and an assumed negative linear slope
|
||||
#otherwise use the score
|
||||
if (score_1 == 0):
|
||||
score_1 = prevscore_1 - z1 * .4
|
||||
z1 += 1
|
||||
else:
|
||||
prevscore_1 = score_1
|
||||
z1 = 1
|
||||
if (score_2 == 0):
|
||||
score_2 = prevscore_2 - z2 * .4
|
||||
z2 += 1
|
||||
else:
|
||||
prevscore_2 = score_2
|
||||
z2 = 1
|
||||
|
||||
total_score = score_1 + score_2
|
||||
|
||||
#Output scores in log window
|
||||
#print("0x%08x, %d , %f, %f, %f" % (f, c,score_1, score_2, total_score))
|
||||
|
||||
#Add scores to the global function score list
|
||||
finf = module.func_info(f,score_1,score_2)
|
||||
finf.lfa_skip=0
|
||||
g_function_list.append(finf)
|
||||
|
||||
line = "0x%08x, %d , %f, %f, %f\n" % (f,c,score_1, score_2, total_score)
|
||||
f=basicutils.NextFunction(f)
|
||||
c+=1
|
||||
|
||||
#get_last _three and get_lfa_start:
|
||||
#Previously LFA would just skip functions if they had no caller or callee score
|
||||
#it would effectively drop them. This meant that when doing edge detection we
|
||||
#knew every function in the function list had a score. Now we're putting all
|
||||
#functions in the function list, and we have a "skip" field if LFA should skip it
|
||||
#for scoring purposes. So these functions help parse that skip field, since for
|
||||
#edge detection we look at the previous three scores.
|
||||
def get_last_three(index):
|
||||
c=0
|
||||
i = index-1
|
||||
p=[]
|
||||
while ((c<3) and (i>0)):
|
||||
#print "get_last_3: %d,%d" % (c,i)
|
||||
if (g_function_list[i].lfa_skip == 0):
|
||||
p.append(g_function_list[i])
|
||||
c+=1
|
||||
i-=1
|
||||
if (c==3):
|
||||
return p[0],p[1],p[2]
|
||||
else:
|
||||
print("Error: could not find 3 scored entries before index: %d (%d,%d)" % (index, i, c))
|
||||
return 0,0,0
|
||||
|
||||
def get_lfa_start():
|
||||
c=0;
|
||||
i=0;
|
||||
while (c < 4):
|
||||
#print "get_lfa_start: %d,%d" % (c,i)
|
||||
if (g_function_list[i].lfa_skip==0):
|
||||
c+=1
|
||||
i+=1
|
||||
return i
|
||||
|
||||
#edge_detect():
|
||||
# Determine boundaries between object files
|
||||
# Edge condition is a delta of at least 2 where the current score is positive
|
||||
# and 2 of the last 3 scores were negative (negative trend)
|
||||
def edge_detect():
|
||||
global g_function_list
|
||||
global g_module_list
|
||||
|
||||
#For published research
|
||||
EDGE_THRESHOLD = 2
|
||||
|
||||
c=get_lfa_start()
|
||||
#do edge detection
|
||||
while (c<len(g_function_list)):
|
||||
if (g_function_list[c].lfa_skip == 0):
|
||||
f_1,f_2,f_3 = get_last_three(c)
|
||||
p_1 = f_1.total_score
|
||||
p_2 = f_2.total_score
|
||||
p_3 = f_3.total_score
|
||||
s = g_function_list[c].total_score
|
||||
#if score is positive and it is diff of at least 2 from previous
|
||||
#and the previous function was not an edge
|
||||
if ((not f_1.edge[0] == 1) and (s > 0) and ((s - p_1) > EDGE_THRESHOLD)):
|
||||
#if 2 of last 3 were negative
|
||||
m = sorted([p_1,p_2,p_3])
|
||||
if (m[1] < 0):
|
||||
g_function_list[c].edge[0]=1
|
||||
c+=1
|
||||
#assign modules based on where the edges are
|
||||
c=0
|
||||
mod_start = g_function_list[0].loc
|
||||
while(c<len(g_function_list)):
|
||||
f = g_function_list[c]
|
||||
if (f.edge[0] == 1):
|
||||
#change from previous code, this will make the modules contiguous
|
||||
b_mod = module.bin_module(mod_start,f.loc-1,0,"")
|
||||
mod_start = f.loc #set the start of the next module to this function (where edge was detected)
|
||||
g_module_list.append(b_mod)
|
||||
c+=1
|
||||
|
||||
#Main entry point - returns an LFA module list and a global function list (with the LFA module edges marked)
|
||||
def analyze():
|
||||
global g_function_list
|
||||
global g_module_list
|
||||
|
||||
#Define range to analyze
|
||||
#just do .text segment if we've got one
|
||||
#otherwise just start from the first function in DB
|
||||
start,end = basicutils.SegByName(".text")
|
||||
if (start == basicutils.BADADDR):
|
||||
start = basicutils.NextFunction(0)
|
||||
end = basicutils.BADADDR
|
||||
|
||||
#Calculate LFA score for all functions
|
||||
func_call_weight(start,end)
|
||||
#Detect edges - object file boundaries
|
||||
edge_detect()
|
||||
|
||||
return g_function_list, g_module_list
|
||||
@@ -1,343 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
##############################################################################################
|
||||
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
import sys
|
||||
|
||||
#Syntax: map_read.py <ground truth .map file> <LFA produced .map file>
|
||||
#Reads the two map files and outputs a score
|
||||
#Score is % overlap, % underlap, and % gap (the sum of which should be 100%)
|
||||
|
||||
#Raw list of modules
|
||||
g_mod_list1 = []
|
||||
g_mod_list2 = []
|
||||
#"Reconciled" module list - after modules have been combined to represent best alignment
|
||||
g_rec_list1 = []
|
||||
g_rec_list2 = []
|
||||
|
||||
#name
|
||||
#offset - starting address of the module
|
||||
#mlen - length of the module
|
||||
#reach - end address of the module (offset + mlen)
|
||||
#gap - when collapsing two modules,
|
||||
|
||||
class bin_mod:
|
||||
def __init__(self, n, o, ml):
|
||||
self.name = n
|
||||
self.offset = o
|
||||
self.mlen = ml
|
||||
self.reach = o+ml
|
||||
self.gap = 0
|
||||
|
||||
#map_parse(function, mlist):
|
||||
#Parse a gcc/ld formatted .map file
|
||||
# (mlist == 1): ground truth map, saved to g_mod_list1
|
||||
# (mlist == 2): LFA map, saved to g_mod_list2
|
||||
def map_parse(f,mlist):
|
||||
global g_mod_list1
|
||||
global g_mod_list2
|
||||
|
||||
line = f.readline()
|
||||
prev_name = ""
|
||||
while (line != ""):
|
||||
#print "line %s" % line
|
||||
if (not line.startswith(" .text") or (len(line) < 17)):
|
||||
line = f.readline()
|
||||
continue
|
||||
#line wrap case
|
||||
if not ((line[16] == '0') and (line[17] == 'x')):
|
||||
seg = line.strip()
|
||||
line = f.readline()
|
||||
else:
|
||||
seg = line[0:15].strip()
|
||||
offset = int(line[16:34],16)
|
||||
mlen = int(line[35:45].strip(),16)
|
||||
name = line[46:].strip()
|
||||
|
||||
#print "%s\n%s\n%s\n%s\n"% (line[0:15],line[16:33],line[34:45],line[46:])
|
||||
|
||||
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,offset,mlen,name)
|
||||
|
||||
if (offset == 0) or (mlen == 0):
|
||||
line = f.readline()
|
||||
continue
|
||||
|
||||
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,offset,mlen,name)
|
||||
if (name == prev_name):
|
||||
#print "Combining"
|
||||
if (mlist == 1):
|
||||
new_reach = offset+mlen
|
||||
begin = g_mod_list1[-1].offset
|
||||
new_len = new_reach-begin
|
||||
g_mod_list1[-1].mlen = new_len
|
||||
g_mod_list1[-1].reach = new_reach
|
||||
else:
|
||||
new_reach = offset+mlen
|
||||
begin = g_mod_list2[-1].offset
|
||||
new_len = new_reach-begin
|
||||
g_mod_list2[-1].mlen = new_len
|
||||
g_mod_list2[-1].reach = new_reach
|
||||
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,begin,new_len,name)
|
||||
else:
|
||||
bm = bin_mod(name,offset,mlen)
|
||||
if (mlist == 1):
|
||||
g_mod_list1.append(bm)
|
||||
else:
|
||||
g_mod_list2.append(bm)
|
||||
|
||||
#read next line
|
||||
line = f.readline()
|
||||
prev_name = name
|
||||
|
||||
#map_print():
|
||||
#Print both ground truth and LFA map output
|
||||
def map_print(n):
|
||||
if (n==1):
|
||||
print("Map 1 (ground truth):")
|
||||
mod_list = g_mod_list1
|
||||
else:
|
||||
print("Map 2:")
|
||||
mod_list = g_mod_list2
|
||||
print("# of modules: %d" % len(mod_list))
|
||||
for x in range(len(mod_list)):
|
||||
print("Name: %s Offset: %x Len: %x" % (mod_list[x].name,mod_list[x].offset,mod_list[x].mlen))
|
||||
|
||||
|
||||
#score_underlap(module1,module2):
|
||||
#opposite of overlap - actually "disjoint areas" might be more accurate
|
||||
#For the purposes of scoring this is the area of m1 that m2 doesn't cover
|
||||
#to ensure that the underlap does not get counted twice
|
||||
def score_underlap(m1,m2):
|
||||
#Assume that the m1s are contiguous (from .map files)
|
||||
#Only measure the portion of this m1 that the m2 doesn't cover
|
||||
#This ensures that disjoint areas don't get counted twice
|
||||
m2_upper = max(m1.offset,m2.offset)
|
||||
m2_lower = min(m1.reach, m2.reach)
|
||||
ul = abs (m1.offset - m2_upper)
|
||||
ul += abs (m1.reach - m2_lower)
|
||||
return ul
|
||||
|
||||
#mod_underlap(m1,m2):
|
||||
#Like score underlap but this is a simpler calculation for use with module list reconciliation
|
||||
def mod_underlap(m1,m2):
|
||||
ul = abs (m1.offset - m2.offset)
|
||||
ul += abs (m1.reach - m2.reach)
|
||||
return ul
|
||||
|
||||
|
||||
#mod_collapse(module1,module2):
|
||||
#Return a module object that is the combination of the two modules
|
||||
#Does not update either of the global module lists
|
||||
def mod_collapse(m1,m2):
|
||||
nname = m1.name + "_and_" + m2.name
|
||||
noffset = min(m1.offset,m2.offset)
|
||||
nr = max(m1.reach,m2.reach)
|
||||
nlen = nr - noffset
|
||||
|
||||
cm = bin_mod(nname, noffset, nlen)
|
||||
|
||||
cm.gap = m1.gap
|
||||
cm.gap += m2.gap
|
||||
#will work regardless of module order,
|
||||
#the correct one will be positive, the wrong one negative
|
||||
cm.gap += max(m2.offset - m1.reach, m1.offset - m2.reach)
|
||||
|
||||
return cm
|
||||
|
||||
#mod_print(m):
|
||||
#Print a single module
|
||||
def mod_print(m):
|
||||
#print "%s: %08x - %08x" % (m.name,m.offset,m.reach),
|
||||
print("%08x - %08x" % (m.offset,m.reach), end=' ')
|
||||
if (m.gap != 0):
|
||||
print(" gap: %x" % m.gap, end=' ')
|
||||
|
||||
#rec_list_print():
|
||||
#Print side by side the reconciled module lists
|
||||
def rec_list_print():
|
||||
i1 = len(g_rec_list1)
|
||||
i2 = len(g_rec_list2)
|
||||
if (i1 != i2):
|
||||
print("Error: List lengths don't match, not fully reconciled (%d and %d)." % (i1,i2))
|
||||
return
|
||||
for i in range(i1):
|
||||
mod_print(g_rec_list1[i])
|
||||
mod_print(g_rec_list2[i])
|
||||
print("u: %x" % (score_underlap(g_rec_list1[i],g_rec_list2[i])))
|
||||
|
||||
#final_score():
|
||||
#Determine the scores by iterating through the reconciled module lists
|
||||
#and tallying underlap areas and gap areas
|
||||
def final_score():
|
||||
start = min(g_rec_list1[0].offset,g_rec_list2[0].offset)
|
||||
end = max(g_rec_list1[-1].reach,g_rec_list2[-1].reach)
|
||||
i1 = len(g_rec_list1)
|
||||
i2 = len(g_rec_list2)
|
||||
if (i1 != i2):
|
||||
print("Error: List lengths don't match, not fully reconciled (%d and %d)." % (i1,i2))
|
||||
return
|
||||
s=0
|
||||
g=0
|
||||
for i in range(0,i1):
|
||||
s+=score_underlap(g_rec_list1[i],g_rec_list2[i])
|
||||
#only count gaps from the "compare" map file (the one we generate with LFA)
|
||||
g+=g_rec_list2[i].gap
|
||||
#Area of overlap - total area - (underlaps + gaps)
|
||||
good_area = (end-start) - (s+g)
|
||||
print("Length: 0x%x Good: 0x%x (%2f) Underlap: 0x%x (%2f) Gaps: 0x%x (%2f)" % (end-start,good_area, good_area*100.0/(end-start),s,s*100.0/(end-start),g,g*100.0/(end-start)))
|
||||
return (s+g)/1.0/(end-start)
|
||||
|
||||
#map_reconcile():
|
||||
#Attempt to combine modules in either list to make the maps more similar
|
||||
#When combining modules, keep track of gaps between the modules so we can account for that in the overall score
|
||||
#This might seem like cheating, but here's why it's not:
|
||||
# - we want to give the algorithm credit if it finds a couple of clusters of functionality within a .o file
|
||||
# (i.e. it says one .o file is really 2 or 3 .o files)
|
||||
# - we want to give the algorithm credit if it says nearby .o files are so inter-related that they are essentially one
|
||||
# (i.e. it says that 2 or 3 adjacent .o files are really one .o file
|
||||
#
|
||||
#I'm definitely open to suggestions on better ways to do this
|
||||
def map_reconcile():
|
||||
i1 = 0
|
||||
i2 = 0
|
||||
|
||||
while (i1 < len(g_mod_list1)) and (i2 < len(g_mod_list2)):
|
||||
m1 = g_mod_list1[i1]
|
||||
m2 = g_mod_list2[i2]
|
||||
|
||||
#"reach" - aka the end of the current modules under consideration
|
||||
m1r = m1.reach
|
||||
m2r = m2.reach
|
||||
|
||||
#current underlap
|
||||
po = mod_underlap(m1,m2)
|
||||
pc = 0x10000000000
|
||||
|
||||
print(" m1 (%d): " % i1, end=' ')
|
||||
mod_print(m1)
|
||||
print(" m2 (%d): " % i2, end=' ')
|
||||
mod_print(m2)
|
||||
print(" underlap: %x" % (po))
|
||||
|
||||
d=0
|
||||
#module 1 is longer than module 2, so attempt to collapse modules in list 2 to optimize
|
||||
if (m1r > m2r):
|
||||
nm2 = g_mod_list2[i2]
|
||||
#add/collapse m2 modules, but check to see if makes it better
|
||||
while (d == 0) and (i2+1 < len(g_mod_list2)):
|
||||
pnm2 = nm2
|
||||
nm2 = mod_collapse(nm2,g_mod_list2[i2+1])
|
||||
pc = mod_underlap(m1, nm2)
|
||||
print("nm2 (%d): (%x)" % (i2+1,pc), end=' ')
|
||||
mod_print(nm2)
|
||||
print("")
|
||||
if (pc < po):
|
||||
po = pc
|
||||
i2+=1
|
||||
else:
|
||||
d=1
|
||||
print("Collapsed m2 (%d): " % i2, end=' ')
|
||||
mod_print(pnm2)
|
||||
print("")
|
||||
|
||||
#add final collapsed modules to reconciled list
|
||||
g_rec_list1.append(m1)
|
||||
g_rec_list2.append(pnm2)
|
||||
#module 2 is longer than module 1, so attempt to collapse modules in list 1 to optimize
|
||||
else:
|
||||
nm1 = g_mod_list1[i1]
|
||||
while (d==0) and (i1+1 < len(g_mod_list1)):
|
||||
pnm1 = nm1
|
||||
nm1 = mod_collapse(nm1,g_mod_list1[i1+1])
|
||||
pc = mod_underlap(nm1, m2)
|
||||
print("nm1 (%d): (%x)" % (i1 + 1, pc), end=' ')
|
||||
mod_print(nm1)
|
||||
print("")
|
||||
if (pc < po):
|
||||
po = pc
|
||||
i1 += 1
|
||||
else:
|
||||
d=1
|
||||
print("Collapsed m1 (%d): " % i1, end=' ')
|
||||
mod_print(pnm1)
|
||||
print("")
|
||||
g_rec_list1.append(pnm1)
|
||||
g_rec_list2.append(m2)
|
||||
|
||||
i1+=1
|
||||
i2+=1
|
||||
|
||||
print("")
|
||||
|
||||
#end case
|
||||
#if we've got one module left on either side,
|
||||
#collapse all the other modules on the other side to match
|
||||
if (i1 == len(g_mod_list1)-1):
|
||||
m1 = g_mod_list1[i1]
|
||||
print("end m1 (%d):" % (i1), end=' ')
|
||||
mod_print(m1)
|
||||
print("")
|
||||
nm2 = g_mod_list2[i2]
|
||||
i2 += 1
|
||||
while (i2 < len(g_mod_list2)):
|
||||
nm2 = mod_collapse(nm2,g_mod_list2[i2])
|
||||
print("end nm2 (%d):" % (i2), end=' ')
|
||||
mod_print(nm2)
|
||||
print("")
|
||||
i2 += 1
|
||||
g_rec_list1.append(m1)
|
||||
g_rec_list2.append(nm2)
|
||||
if (i2 == len(g_mod_list2)-1):
|
||||
m2 = g_mod_list2[i2]
|
||||
print("end m2 (%d):" % (i2), end=' ')
|
||||
mod_print(m2)
|
||||
print("")
|
||||
nm1 = g_mod_list1[i1]
|
||||
i1 += 1
|
||||
while (i1 < len(g_mod_list1)):
|
||||
nm1 = mod_collapse(nm1,g_mod_list1[i1])
|
||||
print("end nm1 (%d):" % (i1), end=' ')
|
||||
mod_print(nm1)
|
||||
print("")
|
||||
i1 += 1
|
||||
g_rec_list1.append(nm1)
|
||||
g_rec_list2.append(m2)
|
||||
|
||||
|
||||
|
||||
#"ground truth" map file
|
||||
f = open(sys.argv[1], 'r')
|
||||
map_parse(f,1)
|
||||
#map file to compare
|
||||
f2 = open(sys.argv[2], 'r')
|
||||
map_parse(f2,2)
|
||||
|
||||
map_print(1)
|
||||
map_print(2)
|
||||
|
||||
#"Reconcile" maps to make them more similar - see comment above for why we do this
|
||||
map_reconcile()
|
||||
|
||||
#Print reconciled map
|
||||
rec_list_print()
|
||||
|
||||
#Print score
|
||||
print("Score: %f" % (final_score()))
|
||||
f.close()
|
||||
f2.close()
|
||||
@@ -1,180 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
###############################################################
|
||||
### Object File Boundary Detection in IDA Pro with MaxCut ###
|
||||
###############################################################
|
||||
|
||||
import snap
|
||||
import sys
|
||||
import snap_cg
|
||||
import module
|
||||
|
||||
g_maxcut_modlist = []
|
||||
|
||||
#make_subgraph()
|
||||
#returns a Snap subgraph for just the address region specified
|
||||
#(i.e. the subgraph will not have any edges that originate outside the region
|
||||
#or terminate outside the region)
|
||||
|
||||
def make_subgraph(region_start,region_end, graph):
|
||||
print("make_subgraph: start: 0x%x and end: 0x%x" % (region_start,region_end))
|
||||
NIdV = snap.TIntV()
|
||||
#this would be much faster if we had a linear list of functions (nodes)
|
||||
for Node in graph.Nodes():
|
||||
start = Node.GetId()
|
||||
if (start >= region_start) and (start <= region_end):
|
||||
NIdV.Add(start)
|
||||
if (start > region_end):
|
||||
break
|
||||
return snap.GetSubGraph(graph, NIdV)
|
||||
|
||||
#make_cut()
|
||||
#This function analyzes the region specified and returns the cut address for the address with the
|
||||
#maximum score, i.e. the address that has the highest average distance call length of function calls
|
||||
#that go across the address. If multiple addresses with zero calls are found (inf. score) the one
|
||||
#closest to the middle of the region is returned.
|
||||
def make_cut(region_start, region_end, graph):
|
||||
|
||||
print("make_cut: start: 0x%x end: 0x%x" % (region_start,region_end))
|
||||
|
||||
weight = {}
|
||||
z = 0
|
||||
zeroes = []
|
||||
for Node in graph.Nodes():
|
||||
start = Node.GetId()
|
||||
#iterate only over nodes in this region
|
||||
cut_address = start - 1
|
||||
if cut_address < region_start:
|
||||
continue
|
||||
|
||||
weight[cut_address] = 0
|
||||
edge_count = 0
|
||||
|
||||
for Edge in graph.Edges():
|
||||
edge_start = Edge.GetSrcNId()
|
||||
edge_end = Edge.GetDstNId()
|
||||
#only look at edges that cross the possible cut address
|
||||
#handle both cases for the directed graph
|
||||
if (edge_start < cut_address and edge_end > cut_address) or (edge_end < cut_address and edge_start > cut_address):
|
||||
#print " cut %x, %x to %x cross" % (cut_address,edge_start,edge_end)
|
||||
weight[cut_address] += abs(edge_end - edge_start)
|
||||
edge_count +=1
|
||||
|
||||
#If we have a place where we have no edges crossing - keep track of it
|
||||
#We will pick the place closest to the center of the module
|
||||
if edge_count == 0:
|
||||
print(" returning 0 weight count at: 0x%0x" % cut_address)
|
||||
z+=1
|
||||
zeroes.append(cut_address)
|
||||
weight[cut_address] = 0
|
||||
else:
|
||||
weight[cut_address] = weight[cut_address]/ edge_count
|
||||
#print "w: %x: %x" % (cut_address, weight[cut_address])
|
||||
|
||||
#if we had edges with zero crossings, pick the one closest to the center
|
||||
if (z > 0):
|
||||
print(" total of %d zero weight counts" % (z))
|
||||
center = region_start + ((region_end-region_start)/2)
|
||||
min_dist = sys.maxsize
|
||||
for i in range(z):
|
||||
dist = abs(center - zeroes[i])
|
||||
if dist < min_dist:
|
||||
min_dist = dist
|
||||
min_zero = zeroes[i]
|
||||
print(" returning zero cut at addr: %x" % min_zero)
|
||||
return min_zero
|
||||
|
||||
#otherwise pick the edge with the maximum weight score
|
||||
max_weight=0
|
||||
#print " weight table:"
|
||||
for addr,w in weight.items():
|
||||
#print " %x: %x" % (addr,w)
|
||||
if w > max_weight:
|
||||
max_addr = addr
|
||||
max_weight = w
|
||||
|
||||
print(" returning max weight: %f at addr: 0x%x" % (max_weight,max_addr))
|
||||
return max_addr
|
||||
|
||||
#do_cutting()
|
||||
#This is the main recursive algorithm for MaxCut
|
||||
#Find a cut address, split the graph into two subgraphs, and recurse on those subgraphs
|
||||
#Stop if the area being cut is below a particular threshold
|
||||
def do_cutting(start, end, graph):
|
||||
nodes = graph.GetNodes()
|
||||
print("do_cutting: start: 0x%x end: 0x%x nodes: 0x%x" % (start, end, nodes))
|
||||
THRESHOLD = 0x1000
|
||||
#THRESHOLD = 0x2000
|
||||
|
||||
if (end - start > THRESHOLD) and (nodes > 1):
|
||||
cut_address = make_cut(start, end,graph)
|
||||
|
||||
graph1 = make_subgraph(start,cut_address,graph)
|
||||
graph2 = make_subgraph(cut_address+1,end,graph)
|
||||
|
||||
do_cutting(start,cut_address,graph1)
|
||||
do_cutting(cut_address+1,end,graph2)
|
||||
else:
|
||||
print("Module 0x%x to 0x%x" % (start, end))
|
||||
b_mod = module.bin_module(start,end,0,"")
|
||||
g_maxcut_modlist.append(b_mod)
|
||||
|
||||
#func_list_annotate()
|
||||
#This function copies our list of modules into the function list
|
||||
#This allows us to have a single function list with modules from multiple algorithms (LFA and MaxCut)
|
||||
def func_list_annotate(flist):
|
||||
c=0
|
||||
m=0
|
||||
while (m < len(g_maxcut_modlist)):
|
||||
start = g_maxcut_modlist[m].start
|
||||
while (flist[c].loc < start):
|
||||
#print "F: %08x M: %08x" % (flist[c].loc, start)
|
||||
c+=1
|
||||
if (c == len(flist)):
|
||||
print("Error: Maxcut module list does not reconcile with function list")
|
||||
return None
|
||||
flist[c].edge[1]=1
|
||||
#print "MC: Set %08x func edge to 1" % flist[c].loc
|
||||
m+=1
|
||||
return flist
|
||||
|
||||
#Main entry point
|
||||
#Returns a global function list (annotated with MaxCut edges) and a global module list
|
||||
def analyze(flist):
|
||||
|
||||
sys.setrecursionlimit(5000)
|
||||
UGraph = snap_cg.create_snap_cg()
|
||||
|
||||
g_min_node=sys.maxsize
|
||||
g_max_node=0
|
||||
|
||||
for Node in UGraph.Nodes():
|
||||
id = Node.GetId()
|
||||
if id < g_min_node:
|
||||
g_min_node = id
|
||||
if id > g_max_node:
|
||||
g_max_node = id
|
||||
|
||||
do_cutting(g_min_node,g_max_node, UGraph)
|
||||
|
||||
r_flist = func_list_annotate(flist)
|
||||
|
||||
return r_flist,g_maxcut_modlist
|
||||
|
||||
|
||||
@@ -1,309 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
IDA_VERSION = 7
|
||||
|
||||
if (IDA_VERSION < 7):
|
||||
import idc
|
||||
import struct
|
||||
import idautils
|
||||
import basicutils_6x as basicutils
|
||||
else:
|
||||
import ida_idaapi
|
||||
import ida_idc
|
||||
import ida_funcs
|
||||
import ida_nalt
|
||||
import ida_segment
|
||||
import idautils
|
||||
import basicutils_7x as basicutils
|
||||
|
||||
import math
|
||||
import nltk
|
||||
import nltk.collocations
|
||||
import re
|
||||
|
||||
|
||||
### NLP Section ###
|
||||
|
||||
# This section of code attempts to name the modules based on common strings in the string references
|
||||
# Not really based on any sound science or anything - your mileage may heavily vary. :-D
|
||||
|
||||
#string_range_tokenize(start,end,sep):
|
||||
#Compile all string references between start and end as a list of strings (called "tokens")
|
||||
# <sep> should be a nonsense word, and will show up in the list
|
||||
def string_range_tokenize(start,end,sep):
|
||||
# get all string references in this range concatenated into a single string
|
||||
t = basicutils.CompileTextFromRange(start,end,sep)
|
||||
|
||||
#Enable this if you already have a bunch of function names and want to include that in the mix
|
||||
#t+= basicutils.CompileFuncNamesFromRangeAsText(start,end,sep)
|
||||
|
||||
#print "string_range_tokenize: raw text:"
|
||||
#print t
|
||||
#remove printf/sprintf format strings
|
||||
tc = re.sub("%[0-9A-Za-z]+"," ",t)
|
||||
#convert dash to underscore
|
||||
tc = re.sub("-","_",tc)
|
||||
#replace _ and / with space - may want to turn this off sometimes
|
||||
#this will break up snake case and paths
|
||||
#problem is that if you have a path that is used throughout the binary it will probably dominate results
|
||||
tc = re.sub("_"," ",tc)
|
||||
#replace / and \\ with a space
|
||||
tc = re.sub("[/\\\\]"," ",tc)
|
||||
#remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _
|
||||
tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc)
|
||||
|
||||
#lowercase it - and store this as the original set of tokens to work with
|
||||
tokens = [tk.lower() for tk in tc.split()]
|
||||
|
||||
#remove English stop words
|
||||
#this is the list from the MIT *bow project
|
||||
eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"}
|
||||
#remove "code" stop words
|
||||
#e.g. common words in debugging strings
|
||||
code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"}
|
||||
stopw = eng_stopw.union(code_sw)
|
||||
c = 0
|
||||
|
||||
tokens_f = []
|
||||
|
||||
for t in tokens:
|
||||
if t not in stopw:
|
||||
tokens_f.append(t)
|
||||
|
||||
return tokens_f
|
||||
|
||||
#bracket_strings(start,end,b_brack,e_brack):
|
||||
#Return the most common string in the range <star,end> that begins with b_brack and ends with e_brack
|
||||
# The count of how many times this string appeared is also returned
|
||||
#I find somewhat often people format debug strings like "[MOD_NAME] Function X did Y!"
|
||||
#This function is called by guess_module_names() - if you see this format with different brackets
|
||||
#you can edit that call
|
||||
def bracket_strings(start,end,b_brack,e_brack):
|
||||
sep = "tzvlw"
|
||||
t = basicutils.CompileTextFromRange(start,end,sep)
|
||||
tokens = [tk.lower() for tk in t.split(sep)]
|
||||
|
||||
b=[]
|
||||
for tk in tokens:
|
||||
tk = tk.strip()
|
||||
|
||||
if tk.startswith(b_brack) :
|
||||
b_contents = tk[1:tk.find(e_brack)]
|
||||
#Hack to get rid of [-],[+],[*] - could also try to remove non alpha
|
||||
if (len(b_contents) > 3):
|
||||
#Hack for debug prints that started with [0x%x]
|
||||
if (b_contents != "0x%x"):
|
||||
b.append(tk[1:tk.find(e_brack)])
|
||||
|
||||
print("bracket_strings tokens:")
|
||||
print(tokens)
|
||||
print(b)
|
||||
|
||||
u_gram=""
|
||||
u_gram_score=0
|
||||
if (len(b) > 0):
|
||||
f = nltk.FreqDist(b)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
return (u_gram,u_gram_score)
|
||||
|
||||
#source_file_strings(start,end):
|
||||
#Return the most common string that looks like a source file name in the given range
|
||||
# The count of how many times this string appeared is also returned
|
||||
def source_file_strings(start,end):
|
||||
sep = "tzvlw"
|
||||
t = basicutils.CompileTextFromRange(start,end,sep)
|
||||
#normally would do lower here to normalize but we lose camel case that way
|
||||
tokens = [tk for tk in t.split(sep)]
|
||||
|
||||
#for each string, remove quotes and commas, then tokenize based on spaces to generate the final list
|
||||
tokens2=[]
|
||||
for tk in tokens:
|
||||
tk = tk.strip()
|
||||
#strip punctuation, need to leave in _ for filenames and / and \ for paths
|
||||
tk = re.sub("[\"\'\,]"," ",tk)
|
||||
for tk2 in tk.split(" "):
|
||||
tokens2.append(tk2)
|
||||
|
||||
b=[]
|
||||
for tk in tokens2:
|
||||
tk = tk.strip()
|
||||
if tk.endswith(".c") or tk.endswith(".cpp") or tk.endswith(".cc"):
|
||||
#If there's a dir path, only use the end filename
|
||||
#This could be tweaked if the directory structure is part of the software architecture
|
||||
#e.g. if there are multiple source directories with meaningful names
|
||||
if tk.rfind("/") != -1:
|
||||
ntk = tk[tk.rfind("/")+1:]
|
||||
elif tk.rfind("\\") != -1:
|
||||
ntk = tk[tk.rfind("\\")+1:]
|
||||
else:
|
||||
ntk = tk
|
||||
b.append(ntk)
|
||||
|
||||
print("source_file_strings tokens:")
|
||||
#print tokens
|
||||
print(b)
|
||||
|
||||
#a better way to do this (if there are multiple)
|
||||
#would be to sort, uniquify, and then make the name foo.c_and_bar.c
|
||||
u_gram=""
|
||||
u_gram_score=0
|
||||
if (len(b) > 0):
|
||||
f = nltk.FreqDist(b)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
return (u_gram,u_gram_score)
|
||||
|
||||
#common_strings(start,end):
|
||||
#Return a list of the common strings in the given range
|
||||
#Uses NLTK to generate a list of unigrams, bigrams, and trigrams (1 word, 2 word phrase, 3 word phrase)
|
||||
#If the trigram score > 1/2 * bigram score, the most common trigram is used
|
||||
#If the bigram score > 1/2 * unigram score, the most common bigram is used
|
||||
#Otherwise the most common unigram (single word is used)
|
||||
def common_strings(start,end):
|
||||
CS_THRESHOLD = 6
|
||||
sep = "tvlwz"
|
||||
|
||||
tokens = string_range_tokenize(start,end,sep)
|
||||
|
||||
#make a copy since we're going to edit it
|
||||
u_tokens = tokens
|
||||
c=0
|
||||
while (c<len(u_tokens)):
|
||||
if u_tokens[c] == sep:
|
||||
del u_tokens[c]
|
||||
else:
|
||||
c+=1
|
||||
|
||||
print("common_strings tokens:")
|
||||
print(tokens)
|
||||
|
||||
if len(u_tokens) < CS_THRESHOLD:
|
||||
#print "%08x - %08x : %s" % (start,end,"no string")
|
||||
return ("",0)
|
||||
|
||||
f = nltk.FreqDist(u_tokens)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
#print "Tokens:"
|
||||
#print tokens
|
||||
#print len(tokens)
|
||||
|
||||
bgs = list(nltk.bigrams(tokens))
|
||||
c=0
|
||||
while (c<len(bgs)):
|
||||
if sep in bgs[c]:
|
||||
del bgs[c]
|
||||
else:
|
||||
c+=1
|
||||
|
||||
#print "Bigrams:"
|
||||
#print bgs
|
||||
if (len(bgs) != 0):
|
||||
fs = nltk.FreqDist(bgs)
|
||||
b_gram = fs.most_common(1)[0][0]
|
||||
#print "Most Common:"
|
||||
#print b_gram
|
||||
b_str = b_gram[0] + "_" + b_gram[1]
|
||||
b_gram_score = fs.most_common(1)[0][1]
|
||||
else:
|
||||
b_str =""
|
||||
b_gram_score = 0
|
||||
|
||||
tgs = list(nltk.trigrams(tokens))
|
||||
c=0
|
||||
while (c<len(tgs)):
|
||||
if sep in tgs[c]:
|
||||
del tgs[c]
|
||||
else:
|
||||
c+=1
|
||||
#print "Trigrams:"
|
||||
#print tgs
|
||||
if (len(tgs) != 0):
|
||||
ft = nltk.FreqDist(tgs)
|
||||
t_gram = ft.most_common(1)[0][0]
|
||||
t_str = t_gram[0] + "_" + t_gram[1] + "_" + t_gram[2]
|
||||
t_gram_score = ft.most_common(1)[0][1]
|
||||
else:
|
||||
t_str = ""
|
||||
t_gram_score = 0
|
||||
|
||||
|
||||
#print "1: %s - %d 2: %s - %d 3: %s - %d\n" % (u_gram,u_gram_score,b_str,b_gram_score,t_str,t_gram_score)
|
||||
|
||||
if (b_gram_score * 2 >= u_gram_score):
|
||||
if (t_gram_score * 2 >= b_gram_score):
|
||||
ret = t_str
|
||||
ret_s = t_gram_score
|
||||
else:
|
||||
ret = b_str
|
||||
ret_s = b_gram_score
|
||||
else:
|
||||
ret = u_gram
|
||||
ret_s = u_gram_score
|
||||
|
||||
#print "%08x - %08x : %s" % (start,end,ret)
|
||||
|
||||
return (ret,ret_s)
|
||||
|
||||
### End of NLP Section ###
|
||||
|
||||
|
||||
|
||||
#guess_module_names():
|
||||
#Use the NLP section (above) to guess the names of modules and add them to the global module list
|
||||
#Attempts to find common bracket strings (e.g. "[MOD_NAME] Debug print!")
|
||||
#then source file names (most often left over from calls to assert())
|
||||
#then common trigram/bigram/unigrams
|
||||
#You can tweak the switchover thresholds below.
|
||||
def guess_module_names(module_list):
|
||||
#idea - make score threshold based on the size of the module
|
||||
# (e.g. smaller modules should have a smaller threshold
|
||||
C_SCORE_THRESHOLD = 3
|
||||
S_SCORE_THRESHOLD = 1
|
||||
B_SCORE_THRESHOLD = 1
|
||||
c=0
|
||||
unk_mod=0
|
||||
while (c<len(module_list)):
|
||||
m = module_list[c]
|
||||
# first look for strings that start with [FOO], (bracket strings)
|
||||
# then look for strings that contain source files (.c,.cpp,etc.)
|
||||
# then try common strings
|
||||
# above thresholds can be tweaked - they represent the number of strings that have to be repeated
|
||||
# in order to use that string as the module name
|
||||
(name,scr) = bracket_strings(m.start,m.end,"[","]")
|
||||
if (scr < B_SCORE_THRESHOLD):
|
||||
(name,scr) = source_file_strings(m.start,m.end)
|
||||
if (scr < S_SCORE_THRESHOLD):
|
||||
(name,scr) = common_strings(m.start,m.end)
|
||||
if (scr < C_SCORE_THRESHOLD):
|
||||
#Couldn't come up with a name so name it umod1, umod2, etc.
|
||||
name = "umod%d" % (unk_mod)
|
||||
#"word cloud" or something to get an idea of what the module is
|
||||
#print basicutils.CompileTextFromRange(m.start,m.end," ")
|
||||
unk_mod+=1
|
||||
module_list[c].name = name
|
||||
module_list[c].score = scr
|
||||
print("%08x - %08x : %s (%d)" % (m.start,m.end,name,scr))
|
||||
c+=1
|
||||
|
||||
return module_list
|
||||
@@ -1,52 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
#This represents the information we want to record about an individual function
|
||||
#The function lists returned by LFA and MaxCut are made up of these
|
||||
class func_info():
|
||||
def __init__(self,loc,score1,score2):
|
||||
self.loc = loc #the effective address of the function
|
||||
self.score1=score1 #"Calls from" local function affinity score
|
||||
self.score2=score2 #"Calls to" local function affinity score
|
||||
self.total_score=score1+score2
|
||||
self.lfa_skip=0 #Set to 1 if "skipped" (not scored) by LFA
|
||||
self.edge=[0,0] #Set by edge_detect() - if 1, this is the start of a new module
|
||||
#index 0 for LFA, 1 for MaxCut
|
||||
|
||||
def __repr__(self):
|
||||
return "Function: 0x%08x" % (self.loc)
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
#This represents the object files (aka modules) identified by LFA and MaxCut
|
||||
class bin_module():
|
||||
def __init__(self,start,end,score,name):
|
||||
self.start=start
|
||||
self.end=end
|
||||
self.score=score #Currently unused
|
||||
self.name=name
|
||||
|
||||
def __repr__(self):
|
||||
line = "Module at 0x%08x:0x%08x" % (self.start, self.end)
|
||||
if self.name != "" and self.name is not None:
|
||||
line += " (name %s)" % self.name
|
||||
return line
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
@@ -1,67 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
## This code creates a Snap PNGraph object that represents the call graph of a binary
|
||||
## (the .text section)
|
||||
|
||||
import snap
|
||||
import sys
|
||||
|
||||
import idc
|
||||
import struct
|
||||
import idautils
|
||||
import basicutils_7x as basicutils
|
||||
|
||||
MAX_DIST = 0
|
||||
|
||||
|
||||
UGraph = []
|
||||
|
||||
def add_edge(f, t):
|
||||
global UGraph
|
||||
n = basicutils.GetFunctionName(f)
|
||||
if n != "":
|
||||
#since we're only doing one edge for each xref, we'll do weight based on distance from the middle of the caller to the callee
|
||||
f_start = idc.get_func_attr(f, idc.FUNCATTR_START)
|
||||
|
||||
if (not UGraph.IsNode(f_start)):
|
||||
print("Error: had to add node (to): %08x" % f_start)
|
||||
UGraph.AddNode(f_start)
|
||||
|
||||
print("%08x -> %08x" % (f_start, t))
|
||||
UGraph.AddEdge(t,f_start)
|
||||
|
||||
#print "s_%#x -> s_%#x" % (f_start,t)," [len = ",get_weight(func_mid, t), "]"
|
||||
|
||||
|
||||
def add_node(f):
|
||||
basicutils.ForEveryXrefToD(f, add_edge)
|
||||
|
||||
def create_snap_cg():
|
||||
global UGraph
|
||||
UGraph= snap.PNGraph.New()
|
||||
|
||||
#Add every function linearly, this makes sure the nodes are in order
|
||||
basicutils.ForEveryFuncInSeg(".text",UGraph.AddNode)
|
||||
basicutils.ForEveryFuncInSeg(".text",add_node)
|
||||
|
||||
for NI in UGraph.Nodes():
|
||||
print("node id 0x%x with out-degree %d and in-degree %d" %(
|
||||
NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()))
|
||||
|
||||
return UGraph
|
||||
@@ -1,15 +0,0 @@
|
||||
The "data" directory is intended to hold data files that will be used by this module and will
|
||||
not end up in the .jar file, but will be present in the zip or tar file. Typically, data
|
||||
files are placed here rather than in the resources directory if the user may need to edit them.
|
||||
|
||||
An optional data/languages directory can exist for the purpose of containing various Sleigh language
|
||||
specification files and importer opinion files.
|
||||
|
||||
The data/buildLanguage.xml is used for building the contents of the data/languages directory.
|
||||
|
||||
The skel language definition has been commented-out within the skel.ldefs file so that the
|
||||
skeleton language does not show-up within Ghidra.
|
||||
|
||||
See the Sleigh language documentation (docs/languages/index.html) for details Sleigh language
|
||||
specification syntax.
|
||||
|
||||
@@ -1,348 +0,0 @@
|
||||
##############################################################################################
|
||||
# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
#
|
||||
# This material is based upon work supported by the Defense Advanced Research
|
||||
# Projects Agency (DARPA) and Naval Information Warfare Center Pacific (NIWC Pacific)
|
||||
# under Contract Number N66001-20-C-4024.
|
||||
#
|
||||
|
||||
import sys
|
||||
import math
|
||||
import nltk
|
||||
import nltk.collocations
|
||||
import re
|
||||
|
||||
#uncomment "print" to get debug prints
|
||||
def debug_print(x):
|
||||
#print(x)
|
||||
return
|
||||
|
||||
### NLP Section ###
|
||||
|
||||
# This section of code attempts to name the modules based on common strings in the string references
|
||||
# Not really based on any sound science or anything - your mileage may heavily vary. :-D
|
||||
|
||||
#string_range_tokenize(t):
|
||||
#Take a long string and convert it into a list of tokens. If using a separator, this will appear in the token list
|
||||
def string_range_tokenize(t):
|
||||
|
||||
#print "string_range_tokenize: raw text:"
|
||||
#print t
|
||||
#remove printf/sprintf format strings
|
||||
#tc = re.sub("%[0-9A-Za-z]+"," ",t)
|
||||
#convert dash to underscore
|
||||
#tc = re.sub("-","_",tc)
|
||||
#replace _ and / with space - may want to turn this off sometimes
|
||||
#this will break up snake case and paths
|
||||
#problem is that if you have a path that is used throughout the binary it will probably dominate results
|
||||
#tc = re.sub("_"," ",tc)
|
||||
#replace / and \\ with a space
|
||||
#tc = re.sub("[/\\\\]"," ",tc)
|
||||
#remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _
|
||||
#tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc)
|
||||
|
||||
#lowercase it - and store this as the original set of tokens to work with
|
||||
tokens = [tk.lower() for tk in t.split()]
|
||||
|
||||
#remove English stop words
|
||||
#this is the list from the MIT *bow project
|
||||
eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"}
|
||||
#remove "code" stop words
|
||||
#e.g. common words in debugging strings
|
||||
code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"}
|
||||
#remove code stop words (from Joxean Koret's "IDAMagicStrings")
|
||||
jk_sw = {"copyright", "char", "bool", "int", "unsigned", "long",
|
||||
"double", "float", "signed", "license", "version", "cannot", "error",
|
||||
"invalid", "null", "warning", "general", "argument", "written", "report",
|
||||
"failed", "assert", "object", "integer", "unknown", "localhost", "native",
|
||||
"memory", "system", "write", "read", "open", "close", "help", "exit", "test",
|
||||
"return", "libs", "home", "ambiguous", "internal", "request", "inserting",
|
||||
"deleting", "removing", "updating", "adding", "assertion", "flags",
|
||||
"overflow", "enabled", "disabled", "enable", "disable", "virtual", "client",
|
||||
"server", "switch", "while", "offset", "abort", "panic", "static", "updated",
|
||||
"pointer", "reason", "month", "year", "week", "hour", "minute", "second",
|
||||
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
|
||||
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
|
||||
'september', 'october', 'november', 'december', "arguments", "corrupt",
|
||||
"corrupted", "default", "success", "expecting", "missing", "phrase",
|
||||
"unrecognized", "undefined"}
|
||||
|
||||
stopw = eng_stopw.union(code_sw)
|
||||
stopw = stopw.union(jk_sw)
|
||||
|
||||
c = 0
|
||||
|
||||
tokens_f = []
|
||||
|
||||
for t in tokens:
|
||||
if t not in stopw:
|
||||
tokens_f.append(t)
|
||||
|
||||
return tokens_f
|
||||
|
||||
#bracket_strings(t,b_brack,e_brack):
|
||||
#Return the most common string in the text that begins with b_brack and ends with e_brack
|
||||
# The count of how many times this string appeared is also returned
|
||||
#I find somewhat often people format debug strings like "[MOD_NAME] Function X did Y!"
|
||||
#This function is called by guess_module_names() - if you see this format with different brackets
|
||||
#you can edit that call
|
||||
def bracket_strings(t, b_brack,e_brack, sep):
|
||||
#sep = "tzvlw"
|
||||
#t = basicutils.CompileTextFromRange(start,end,sep)
|
||||
tokens = [tk.lower() for tk in t.split(sep)]
|
||||
#don't want to use tokenize here because it removes brackets
|
||||
|
||||
b=[]
|
||||
for tk in tokens:
|
||||
tk = tk.strip()
|
||||
|
||||
if tk.startswith(b_brack) :
|
||||
b_contents = tk[1:tk.find(e_brack)]
|
||||
#print("found bracket string, content: %s" % b_contents)
|
||||
#Hack to get rid of [-],[+],[*] - could also try to remove non alpha
|
||||
if (len(b_contents) > 3):
|
||||
#Hack for debug prints that started with [0x%x]
|
||||
if (b_contents != "0x%x"):
|
||||
b.append(b_contents)
|
||||
|
||||
debug_print("bracket_strings tokens:")
|
||||
debug_print(tokens)
|
||||
debug_print(b)
|
||||
|
||||
u_gram=""
|
||||
u_gram_score=0
|
||||
if (len(b) > 0):
|
||||
f = nltk.FreqDist(b)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
return (u_gram,u_gram_score)
|
||||
|
||||
#is_source_file_str(f):
|
||||
#return True if the file string ends with one of the source file extensions
|
||||
#This uses structure borrowed from Joxean Koret's IDAMagicStrings
|
||||
LANGS = {}
|
||||
LANGS["C/C++"] = ["c", "cc", "cxx", "cpp", "h", "hpp"]
|
||||
LANGS["C"] = ["c"]
|
||||
LANGS["C++"] = ["cc", "cxx", "cpp", "hpp", "c++"]
|
||||
LANGS["Obj-C"] = ["m"]
|
||||
LANGS["Rust"] = ["rs"]
|
||||
LANGS["Golang"] = ["go"]
|
||||
LANGS["OCaml"] = ["ml"]
|
||||
def is_source_file_str(f):
|
||||
for key in LANGS:
|
||||
for ext in LANGS[key]:
|
||||
if f.endswith("." + ext):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
#source_file_strings(start,end):
|
||||
#Return the most common string that looks like a source file name in the given text string
|
||||
# The count of how many times this string appeared is also returned
|
||||
def source_file_strings(t,sep):
|
||||
#sep = "tzvlw"
|
||||
#t = basicutils.CompileTextFromRange(start,end,sep)
|
||||
#normally would do lower here to normalize but we lose camel case that way
|
||||
tokens = [tk for tk in t.split(sep)]
|
||||
|
||||
#for each string, remove quotes and commas, then tokenize based on spaces to generate the final list
|
||||
tokens2=[]
|
||||
for tk in tokens:
|
||||
tk = tk.strip()
|
||||
#strip punctuation, need to leave in _ for filenames and / and \ for paths
|
||||
tk = re.sub("[\"\'\,]"," ",tk)
|
||||
for tk2 in tk.split(" "):
|
||||
tokens2.append(tk2)
|
||||
|
||||
debug_print("source_file_strings tokens2:")
|
||||
debug_print(tokens2)
|
||||
|
||||
b=[]
|
||||
for tk in tokens2:
|
||||
tk = tk.strip()
|
||||
if is_source_file_str(tk):
|
||||
#If there's a dir path, only use the end filename
|
||||
#This could be tweaked if the directory structure is part of the software architecture
|
||||
#e.g. if there are multiple source directories with meaningful names
|
||||
if tk.rfind("/") != -1:
|
||||
ntk = tk[tk.rfind("/")+1:]
|
||||
elif tk.rfind("\\") != -1:
|
||||
ntk = tk[tk.rfind("\\")+1:]
|
||||
else:
|
||||
ntk = tk
|
||||
b.append(ntk)
|
||||
|
||||
debug_print("source_file_strings tokens:")
|
||||
debug_print(tokens)
|
||||
debug_print(b)
|
||||
|
||||
#a better way to do this (if there are multiple)
|
||||
#would be to sort, uniquify, and then make the name foo.c_and_bar.c
|
||||
u_gram=""
|
||||
u_gram_score=0
|
||||
if (len(b) > 0):
|
||||
f = nltk.FreqDist(b)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
return (u_gram,u_gram_score)
|
||||
|
||||
#common_strings(t, sep):
|
||||
#Return a list of the common strings in the string "t" - lines separated by "sep"
|
||||
#Uses NLTK to generate a list of unigrams, bigrams, and trigrams (1 word, 2 word phrase, 3 word phrase)
|
||||
#If the trigram score > 1/2 * bigram score, the most common trigram is used
|
||||
#If the bigram score > 1/2 * unigram score, the most common bigram is used
|
||||
#Otherwise the most common unigram (single word is used)
|
||||
def common_strings(t,sep):
|
||||
CS_THRESHOLD = 6
|
||||
|
||||
tokens = string_range_tokenize(t)
|
||||
|
||||
#make a copy since we're going to edit it
|
||||
u_tokens = tokens
|
||||
c=0
|
||||
while (c<len(u_tokens)):
|
||||
if u_tokens[c] == sep:
|
||||
del u_tokens[c]
|
||||
else:
|
||||
c+=1
|
||||
|
||||
debug_print("common_strings tokens:")
|
||||
debug_print(tokens)
|
||||
|
||||
if len(u_tokens) < CS_THRESHOLD:
|
||||
#print("less than threshold")
|
||||
return ("",0)
|
||||
|
||||
f = nltk.FreqDist(u_tokens)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
#print "Tokens:"
|
||||
#print tokens
|
||||
#print len(tokens)
|
||||
|
||||
bgs = list(nltk.bigrams(tokens))
|
||||
c=0
|
||||
while (c<len(bgs)):
|
||||
if sep in bgs[c]:
|
||||
del bgs[c]
|
||||
else:
|
||||
c+=1
|
||||
|
||||
debug_print("Bigrams:")
|
||||
debug_print(bgs)
|
||||
if (len(bgs) != 0):
|
||||
fs = nltk.FreqDist(bgs)
|
||||
b_gram = fs.most_common(1)[0][0]
|
||||
#print "Most Common:"
|
||||
#print b_gram
|
||||
b_str = b_gram[0] + "_" + b_gram[1]
|
||||
b_gram_score = fs.most_common(1)[0][1]
|
||||
else:
|
||||
b_str =""
|
||||
b_gram_score = 0
|
||||
|
||||
tgs = list(nltk.trigrams(tokens))
|
||||
c=0
|
||||
while (c<len(tgs)):
|
||||
if sep in tgs[c]:
|
||||
del tgs[c]
|
||||
else:
|
||||
c+=1
|
||||
debug_print("Trigrams:")
|
||||
debug_print(tgs)
|
||||
if (len(tgs) != 0):
|
||||
ft = nltk.FreqDist(tgs)
|
||||
t_gram = ft.most_common(1)[0][0]
|
||||
t_str = t_gram[0] + "_" + t_gram[1] + "_" + t_gram[2]
|
||||
t_gram_score = ft.most_common(1)[0][1]
|
||||
else:
|
||||
t_str = ""
|
||||
t_gram_score = 0
|
||||
|
||||
|
||||
debug_print("1: %s - %d 2: %s - %d 3: %s - %d\n" % (u_gram,u_gram_score,b_str,b_gram_score,t_str,t_gram_score))
|
||||
|
||||
if (b_gram_score > 1) and (b_gram_score * 2 >= u_gram_score):
|
||||
if (t_gram_score > 1) and (t_gram_score * 2 >= b_gram_score):
|
||||
ret = t_str
|
||||
ret_s = t_gram_score
|
||||
else:
|
||||
ret = b_str
|
||||
ret_s = b_gram_score
|
||||
else:
|
||||
ret = u_gram
|
||||
ret_s = u_gram_score
|
||||
|
||||
return (ret,ret_s)
|
||||
|
||||
### End of NLP Section ###
|
||||
|
||||
|
||||
|
||||
#guess_module_names():
|
||||
#Use the NLP section (above) to guess the names of modules and add them to the global module list
|
||||
#Attempts to find common bracket strings (e.g. "[MOD_NAME] Debug print!")
|
||||
#then source file names (most often left over from calls to assert())
|
||||
#then common trigram/bigram/unigrams
|
||||
#You can tweak the switchover thresholds below.
|
||||
|
||||
def guess_module_names(t,sep):
|
||||
#idea - make score threshold based on the size of the module
|
||||
# (e.g. smaller modules should have a smaller threshold
|
||||
C_SCORE_THRESHOLD = 4 #we need to see at least <N> occurrences of a string set in order to pick that name
|
||||
S_SCORE_THRESHOLD = 2 #if we see <N> occurrences of foo.c we'll pick "foo.c"
|
||||
B_SCORE_THRESHOLD = 2 #if we see <N> occurrences of [foo] we'll pick "foo"
|
||||
|
||||
# first look for strings that start with [FOO], (bracket strings)
|
||||
# then look for strings that contain source files (.c,.cpp,etc.)
|
||||
# then try common strings
|
||||
# above thresholds can be tweaked - they represent the number of strings that have to be repeated
|
||||
# in order to use that string as the module name
|
||||
(name,scr) = bracket_strings(t,"[","]",sep)
|
||||
debug_print("bracket name: %s score: %d" %(name, scr))
|
||||
#if (True):
|
||||
if (scr < B_SCORE_THRESHOLD):
|
||||
(name,scr) = source_file_strings(t,sep)
|
||||
debug_print("source name: %s score: %d" % (name, scr))
|
||||
#if (True):e
|
||||
if (scr < S_SCORE_THRESHOLD):
|
||||
(name,scr) = common_strings(t,sep)
|
||||
debug_print("common name: %s score: %d" % (name, scr))
|
||||
if (scr < C_SCORE_THRESHOLD):
|
||||
#Couldn't come up with a name
|
||||
name = "unknown"
|
||||
|
||||
return name
|
||||
|
||||
def main():
|
||||
#t=""
|
||||
sep = "tzvlw"
|
||||
# java side handles adding sep between strings,
|
||||
# read all in at once (no newlines between strings)
|
||||
#t = sys.stdin.readline()
|
||||
t = input()
|
||||
#print ("text in: %s" % t)
|
||||
name = guess_module_names(t,sep)
|
||||
print(name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
74
codecut-gui/ghidra_scripts/ModNamingRun.py
Normal file
74
codecut-gui/ghidra_scripts/ModNamingRun.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# @category CodeCut
|
||||
# @menupath CodeCut.ModNaming (Run)
|
||||
# @toolbar codecut.png
|
||||
# @runtime PyGhidra
|
||||
|
||||
# (C) 2022 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# (JHU/APL). All Rights Reserved.
|
||||
#
|
||||
# This material may be only be used, modified, or reproduced by or for
|
||||
# the U.S. Government pursuant to the license rights granted under the
|
||||
# clauses at DFARS 252.227-7013/7014 or FAR 52.227-14. For any other
|
||||
# permission, please contact the Office of Technology Transfer at
|
||||
# JHU/APL.
|
||||
#
|
||||
# NO WARRANTY, NO LIABILITY. THIS MATERIAL IS PROVIDED "AS IS." JHU/APL
|
||||
# MAKES NO REPRESENTATION OR WARRANTY WITH RESPECT TO THE PERFORMANCE OF
|
||||
# THE MATERIALS, INCLUDING THEIR SAFETY, EFFECTIVENESS, OR COMMERCIAL
|
||||
# VIABILITY, AND DISCLAIMS ALL WARRANTIES IN THE MATERIAL, WHETHER
|
||||
# EXPRESS OR IMPLIED, INCLUDING (BUT NOT LIMITED TO) ANY AND ALL IMPLIED
|
||||
# WARRANTIES OF PERFORMANCE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE, AND NON-INFRINGEMENT OF INTELLECTUAL PROPERTY OR OTHER THIRD
|
||||
# PARTY RIGHTS. ANY USER OF THE MATERIAL ASSUMES THE ENTIRE RISK AND
|
||||
# LIABILITY FOR USING THE MATERIAL. IN NO EVENT SHALL JHU/APL BE LIABLE
|
||||
# TO ANY USER OF THE MATERIAL FOR ANY ACTUAL, INDIRECT, CONSEQUENTIAL,
|
||||
# SPECIAL OR OTHER DAMAGES ARISING FROM THE USE OF, OR INABILITY TO USE,
|
||||
# THE MATERIAL, INCLUDING, BUT NOT LIMITED TO, ANY DAMAGES FOR LOST
|
||||
# PROFITS.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
|
||||
# This material is based upon work supported by the Defense Advanced Research
|
||||
# Projects Agency (DARPA) and Naval Information Warfare Center Pacific (NIWC Pacific)
|
||||
# under Contract Number N66001-20-C-4024.
|
||||
|
||||
|
||||
from dependency_bootstrap import DependencyManager
|
||||
|
||||
# list the packages you need
|
||||
# dictionary of "import name" : "pip name"
|
||||
# for when they differ, e.g. "sklearn": "scikit-learn"
|
||||
deps = DependencyManager(
|
||||
{"nltk": "nltk"})
|
||||
|
||||
# make sure they're installed
|
||||
if not deps.ensure_or_prompt():
|
||||
println("[ModNaming] Required Python packages not available, exiting.")
|
||||
exit(1)
|
||||
|
||||
|
||||
from modnaming import *
|
||||
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
# Pass Ghidra context + args into your package entry point
|
||||
# run(currentProgram, state, monitor, *args)
|
||||
|
||||
def main():
|
||||
args = list(getScriptArgs())
|
||||
|
||||
with open(args[0], "r") as f:
|
||||
t = f.read()
|
||||
|
||||
sep = "tzvlw"
|
||||
name = guess_module_names(t, sep)
|
||||
|
||||
with open(args[1], "w") as f:
|
||||
f.write(name)
|
||||
|
||||
print("Successfully guessed module name: ", name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,3 +1,6 @@
|
||||
#@category CodeCut
|
||||
#@runtime PyGhidra
|
||||
#
|
||||
## Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
## (JHU/APL). All Rights Reserved.
|
||||
#
|
||||
|
||||
@@ -35,11 +35,11 @@ def decompile_user_functions_in_range(
|
||||
current_program.getAddressFactory().getAddress(end_address_str)
|
||||
|
||||
if start_address is None or end_address is None:
|
||||
print 'Invalid address range specified.'
|
||||
print('Invalid address range specified.')
|
||||
return
|
||||
|
||||
if start_address >= end_address:
|
||||
print 'Invalid address range: start address should be less than end address.'
|
||||
print('Invalid address range: start address should be less than end address.')
|
||||
return
|
||||
|
||||
decompiler = DecompInterface()
|
||||
|
||||
100
codecut-gui/ghidra_scripts/dependency_bootstrap.py
Normal file
100
codecut-gui/ghidra_scripts/dependency_bootstrap.py
Normal file
@@ -0,0 +1,100 @@
|
||||
from __future__ import annotations
|
||||
import sys, os, io, importlib, subprocess
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
# list the packages you need
|
||||
# dictionary of "import name" : "pip name"
|
||||
# for when they differ, e.g. "sklearn": "scikit-learn"
|
||||
|
||||
class DependencyManager:
|
||||
"""
|
||||
Minimal dependency manager for Ghidra Python (PyGhidra/CPython).
|
||||
- Takes a dict {import_name: pip_name}.
|
||||
- Prompts the user to install missing ones via a Swing/Ghidra popup.
|
||||
- Reloads site/import caches so new installs are importable immediately.
|
||||
"""
|
||||
|
||||
def __init__(self, packages: Dict[str, str], *, title: str = "Missing Python Packages"):
|
||||
self.packages = packages
|
||||
self.title = title
|
||||
|
||||
# -------- public API --------
|
||||
def ensure_or_prompt(self) -> bool:
|
||||
_, missing = self._try_imports(list(self.packages.keys()))
|
||||
if not missing:
|
||||
return True
|
||||
|
||||
if not self._ask_to_install(missing):
|
||||
return False
|
||||
|
||||
pip_names = [self.packages[name] for name in missing]
|
||||
if not self._pip_install(pip_names):
|
||||
return False
|
||||
|
||||
self._reload_paths()
|
||||
_, still = self._try_imports(missing)
|
||||
if still:
|
||||
print("[deps] Still missing after install:", still)
|
||||
return False
|
||||
return True
|
||||
|
||||
# -------- internals --------
|
||||
def _try_imports(self, names: List[str]) -> Tuple[List[str], List[str]]:
|
||||
ok, missing = [], []
|
||||
for n in names:
|
||||
try:
|
||||
importlib.import_module(n)
|
||||
ok.append(n)
|
||||
except Exception:
|
||||
missing.append(n)
|
||||
return ok, missing
|
||||
|
||||
def _ask_to_install(self, missing: List[str]) -> bool:
|
||||
# Prefer Ghidra OptionDialog (GUI-safe)
|
||||
try:
|
||||
from docking.widgets import OptionDialog
|
||||
lines = ["The following Python packages are required and missing:\n"]
|
||||
lines += [f" • import '{name}' (pip install {self.packages[name]})" for name in missing]
|
||||
lines += ["", "Install them now with pip?"]
|
||||
msg = "\n".join(lines)
|
||||
return OptionDialog.showYesNoDialog(None, self.title, msg) == OptionDialog.YES_OPTION
|
||||
except Exception:
|
||||
# Headless fallback is unlikely in-tool, but just in case:
|
||||
print(f"{self.title}: will install {', '.join(self.packages[n] for n in missing)}")
|
||||
return True
|
||||
|
||||
def _pip_install(self, pip_names: List[str]) -> bool:
|
||||
args = ["install", "--upgrade", "--no-input"] + pip_names
|
||||
print(f"[deps] pip {' '.join(args)}")
|
||||
|
||||
# Suppress pip’s version check and ensure no interactive prompts
|
||||
env = dict(os.environ)
|
||||
env.setdefault("PIP_DISABLE_PIP_VERSION_CHECK", "1")
|
||||
env.setdefault("PYTHONWARNINGS", "ignore") # optional: quiet noisy warnings
|
||||
|
||||
|
||||
# pip 20+: use cli.main
|
||||
from pip._internal.cli.main import main as pip_main # type: ignore
|
||||
|
||||
try:
|
||||
code = pip_main(args)
|
||||
except SystemExit as e: # pip may call sys.exit()
|
||||
code = int(e.code) if e.code is not None else 0
|
||||
|
||||
if int(code) == 0:
|
||||
return True
|
||||
print(f"[deps] pip (in-process) failed with code {code}")
|
||||
|
||||
|
||||
def _reload_paths(self) -> None:
|
||||
importlib.invalidate_caches()
|
||||
try:
|
||||
import site
|
||||
importlib.reload(site) # process site-packages & .pth files
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
import pkg_resources # type: ignore
|
||||
pkg_resources.working_set.__init__() # rebuild dist cache
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1,3 +1,4 @@
|
||||
# @category CodeCut
|
||||
from ghidra.program.model.listing import Function
|
||||
from ghidra.program.model.symbol import SourceType
|
||||
|
||||
@@ -39,6 +40,7 @@ def get_referenced_function_signatures_base(function, monitor):
|
||||
return signatures
|
||||
|
||||
|
||||
|
||||
def getFunctionReferences(function, monitor):
|
||||
refs = set()
|
||||
instructions = \
|
||||
@@ -47,9 +49,10 @@ def getFunctionReferences(function, monitor):
|
||||
for instr in instructions:
|
||||
flowType = instr.getFlowType()
|
||||
if flowType.isCall():
|
||||
target = instr.getOperandReferences(0)[0].getToAddress()
|
||||
func = \
|
||||
function.getProgram().getFunctionManager().getFunctionAt(target)
|
||||
oprefs = instr.getOperandReferences(0)
|
||||
if not oprefs: continue
|
||||
target = oprefs[0].getToAddress()
|
||||
func = function.getProgram().getFunctionManager().getFunctionAt(target)
|
||||
if func is not None:
|
||||
refs.add(func)
|
||||
return refs
|
||||
|
||||
@@ -17,7 +17,7 @@ def get_global_variables(program, start_addr, end_addr):
|
||||
#set.addRange(start_addr, end_addr)
|
||||
print(start_address, end_address)
|
||||
print(addrset)
|
||||
|
||||
|
||||
#for symbol in symbol_table.getAllSymbols(False):
|
||||
for symbol in symbol_table.getSymbols(addrset,SymbolType.LABEL,True):
|
||||
print(symbol)
|
||||
@@ -27,30 +27,30 @@ def get_global_variables(program, start_addr, end_addr):
|
||||
if (program.getListing().getDataAt(symbol.getAddress())):
|
||||
global_vars.append(symbol)
|
||||
|
||||
'''
|
||||
'''
|
||||
def is_user_defined(var):
|
||||
var_name = var.getName()
|
||||
var_addr = var.getAddress()
|
||||
|
||||
|
||||
if var_name.startswith('__') or var_name.startswith('_'):
|
||||
return False
|
||||
|
||||
|
||||
if var_name.startswith('imp_') or var_name.startswith('thunk_'):
|
||||
return False
|
||||
|
||||
|
||||
if var_name.startswith('fde_') or var_name.startswith('cie_'):
|
||||
return False
|
||||
|
||||
|
||||
if var_name.startswith('completed.0') \
|
||||
or var_name.startswith('data_start'):
|
||||
return False
|
||||
|
||||
|
||||
if var_addr.toString().startswith('EXTERNAL:'):
|
||||
return False
|
||||
section_name = program.getMemory().getBlock(var_addr).getName()
|
||||
#if section_name not in ['.data', '.bss']:
|
||||
# return False
|
||||
|
||||
|
||||
return True
|
||||
'''
|
||||
|
||||
|
||||
351
codecut-gui/ghidra_scripts/modnaming.py
Normal file
351
codecut-gui/ghidra_scripts/modnaming.py
Normal file
@@ -0,0 +1,351 @@
|
||||
##############################################################################################
|
||||
# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||
# All rights reserved.
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
# software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify,
|
||||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
# OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# HAVE A NICE DAY.
|
||||
#
|
||||
# This material is based upon work supported by the Defense Advanced Research
|
||||
# Projects Agency (DARPA) and Naval Information Warfare Center Pacific (NIWC Pacific)
|
||||
# under Contract Number N66001-20-C-4024.
|
||||
#
|
||||
|
||||
import sys
|
||||
print(sys.executable)
|
||||
|
||||
import sys
|
||||
import math
|
||||
import nltk
|
||||
import nltk.collocations
|
||||
import re
|
||||
|
||||
#uncomment "print" to get debug prints
|
||||
def debug_print(x):
|
||||
#print(x)
|
||||
return
|
||||
|
||||
### NLP Section ###
|
||||
|
||||
# This section of code attempts to name the modules based on common strings in the string references
|
||||
# Not really based on any sound science or anything - your mileage may heavily vary. :-D
|
||||
|
||||
#string_range_tokenize(t):
|
||||
#Take a long string and convert it into a list of tokens. If using a separator, this will appear in the token list
|
||||
def string_range_tokenize(t):
|
||||
|
||||
#print "string_range_tokenize: raw text:"
|
||||
#print t
|
||||
#remove printf/sprintf format strings
|
||||
#tc = re.sub("%[0-9A-Za-z]+"," ",t)
|
||||
#convert dash to underscore
|
||||
#tc = re.sub("-","_",tc)
|
||||
#replace _ and / with space - may want to turn this off sometimes
|
||||
#this will break up snake case and paths
|
||||
#problem is that if you have a path that is used throughout the binary it will probably dominate results
|
||||
#tc = re.sub("_"," ",tc)
|
||||
#replace / and \\ with a space
|
||||
#tc = re.sub("[/\\\\]"," ",tc)
|
||||
#remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _
|
||||
#tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc)
|
||||
|
||||
#lowercase it - and store this as the original set of tokens to work with
|
||||
tokens = [tk.lower() for tk in t.split()]
|
||||
|
||||
#remove English stop words
|
||||
#this is the list from the MIT *bow project
|
||||
eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"}
|
||||
#remove "code" stop words
|
||||
#e.g. common words in debugging strings
|
||||
code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"}
|
||||
#remove code stop words (from Joxean Koret's "IDAMagicStrings")
|
||||
jk_sw = {"copyright", "char", "bool", "int", "unsigned", "long",
|
||||
"double", "float", "signed", "license", "version", "cannot", "error",
|
||||
"invalid", "null", "warning", "general", "argument", "written", "report",
|
||||
"failed", "assert", "object", "integer", "unknown", "localhost", "native",
|
||||
"memory", "system", "write", "read", "open", "close", "help", "exit", "test",
|
||||
"return", "libs", "home", "ambiguous", "internal", "request", "inserting",
|
||||
"deleting", "removing", "updating", "adding", "assertion", "flags",
|
||||
"overflow", "enabled", "disabled", "enable", "disable", "virtual", "client",
|
||||
"server", "switch", "while", "offset", "abort", "panic", "static", "updated",
|
||||
"pointer", "reason", "month", "year", "week", "hour", "minute", "second",
|
||||
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
|
||||
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
|
||||
'september', 'october', 'november', 'december', "arguments", "corrupt",
|
||||
"corrupted", "default", "success", "expecting", "missing", "phrase",
|
||||
"unrecognized", "undefined"}
|
||||
|
||||
stopw = eng_stopw.union(code_sw)
|
||||
stopw = stopw.union(jk_sw)
|
||||
|
||||
c = 0
|
||||
|
||||
tokens_f = []
|
||||
|
||||
for t in tokens:
|
||||
if t not in stopw:
|
||||
tokens_f.append(t)
|
||||
|
||||
return tokens_f
|
||||
|
||||
#bracket_strings(t,b_brack,e_brack):
|
||||
#Return the most common string in the text that begins with b_brack and ends with e_brack
|
||||
# The count of how many times this string appeared is also returned
|
||||
#I find somewhat often people format debug strings like "[MOD_NAME] Function X did Y!"
|
||||
#This function is called by guess_module_names() - if you see this format with different brackets
|
||||
#you can edit that call
|
||||
def bracket_strings(t, b_brack,e_brack, sep):
|
||||
#sep = "tzvlw"
|
||||
#t = basicutils.CompileTextFromRange(start,end,sep)
|
||||
tokens = [tk.lower() for tk in t.split(sep)]
|
||||
#don't want to use tokenize here because it removes brackets
|
||||
|
||||
b=[]
|
||||
for tk in tokens:
|
||||
tk = tk.strip()
|
||||
|
||||
if tk.startswith(b_brack):
|
||||
b_contents = tk[1:tk.find(e_brack)] if e_brack in tk else tk[1:]
|
||||
#print("found bracket string, content: %s" % b_contents)
|
||||
#Hack to get rid of [-],[+],[*] - could also try to remove non alpha
|
||||
if (len(b_contents) > 3):
|
||||
#Hack for debug prints that started with [0x%x]
|
||||
if (b_contents != "0x%x"):
|
||||
b.append(b_contents)
|
||||
|
||||
debug_print("bracket_strings tokens:")
|
||||
debug_print(tokens)
|
||||
debug_print(b)
|
||||
|
||||
u_gram=""
|
||||
u_gram_score=0
|
||||
if (len(b) > 0):
|
||||
f = nltk.FreqDist(b)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
return (u_gram,u_gram_score)
|
||||
|
||||
#is_source_file_str(f):
|
||||
#return True if the file string ends with one of the source file extensions
|
||||
#This uses structure borrowed from Joxean Koret's IDAMagicStrings
|
||||
LANGS = {}
|
||||
LANGS["C/C++"] = ["c", "cc", "cxx", "cpp", "h", "hpp"]
|
||||
LANGS["C"] = ["c"]
|
||||
LANGS["C++"] = ["cc", "cxx", "cpp", "hpp", "c++"]
|
||||
LANGS["Obj-C"] = ["m"]
|
||||
LANGS["Rust"] = ["rs"]
|
||||
LANGS["Golang"] = ["go"]
|
||||
LANGS["OCaml"] = ["ml"]
|
||||
def is_source_file_str(f):
|
||||
for key in LANGS:
|
||||
for ext in LANGS[key]:
|
||||
if f.endswith("." + ext):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
#source_file_strings(start,end):
|
||||
#Return the most common string that looks like a source file name in the given text string
|
||||
# The count of how many times this string appeared is also returned
|
||||
def source_file_strings(t, sep):
|
||||
#sep = "tzvlw"
|
||||
#t = basicutils.CompileTextFromRange(start,end,sep)
|
||||
#normally would do lower here to normalize but we lose camel case that way
|
||||
tokens = [tk for tk in t.split(sep)]
|
||||
|
||||
#for each string, remove quotes and commas, then tokenize based on spaces to generate the final list
|
||||
tokens2=[]
|
||||
for tk in tokens:
|
||||
tk = tk.strip()
|
||||
#strip punctuation, need to leave in _ for filenames and / and \ for paths
|
||||
tk = re.sub("[\"\',]"," ",tk)
|
||||
for tk2 in tk.split(" "):
|
||||
tokens2.append(tk2)
|
||||
|
||||
debug_print("source_file_strings tokens2:")
|
||||
debug_print(tokens2)
|
||||
|
||||
b=[]
|
||||
for tk in tokens2:
|
||||
tk = tk.strip()
|
||||
if is_source_file_str(tk):
|
||||
#If there's a dir path, only use the end filename
|
||||
#This could be tweaked if the directory structure is part of the software architecture
|
||||
#e.g. if there are multiple source directories with meaningful names
|
||||
if tk.rfind("/") != -1:
|
||||
ntk = tk[tk.rfind("/")+1:]
|
||||
elif tk.rfind("\\") != -1:
|
||||
ntk = tk[tk.rfind("\\")+1:]
|
||||
else:
|
||||
ntk = tk
|
||||
b.append(ntk)
|
||||
|
||||
debug_print("source_file_strings tokens:")
|
||||
debug_print(tokens)
|
||||
debug_print(b)
|
||||
|
||||
#a better way to do this (if there are multiple)
|
||||
#would be to sort, uniquify, and then make the name foo.c_and_bar.c
|
||||
u_gram=""
|
||||
u_gram_score=0
|
||||
if (len(b) > 0):
|
||||
f = nltk.FreqDist(b)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
return (u_gram,u_gram_score)
|
||||
|
||||
#common_strings(t, sep):
|
||||
#Return a list of the common strings in the string "t" - lines separated by "sep"
|
||||
#Uses NLTK to generate a list of unigrams, bigrams, and trigrams (1 word, 2 word phrase, 3 word phrase)
|
||||
#If the trigram score > 1/2 * bigram score, the most common trigram is used
|
||||
#If the bigram score > 1/2 * unigram score, the most common bigram is used
|
||||
#Otherwise the most common unigram (single word is used)
|
||||
def common_strings(t,sep):
|
||||
CS_THRESHOLD = 6
|
||||
|
||||
tokens = string_range_tokenize(t)
|
||||
|
||||
#make a copy since we're going to edit it
|
||||
u_tokens = tokens
|
||||
c=0
|
||||
while (c<len(u_tokens)):
|
||||
if u_tokens[c] == sep:
|
||||
del u_tokens[c]
|
||||
else:
|
||||
c+=1
|
||||
|
||||
debug_print("common_strings tokens:")
|
||||
debug_print(tokens)
|
||||
|
||||
if len(u_tokens) < CS_THRESHOLD:
|
||||
#print("less than threshold")
|
||||
return ("",0)
|
||||
|
||||
f = nltk.FreqDist(u_tokens)
|
||||
u_gram = f.most_common(1)[0][0]
|
||||
u_gram_score = f.most_common(1)[0][1]
|
||||
|
||||
#print "Tokens:"
|
||||
#print tokens
|
||||
#print len(tokens)
|
||||
|
||||
bgs = list(nltk.bigrams(tokens))
|
||||
c=0
|
||||
while (c<len(bgs)):
|
||||
if sep in bgs[c]:
|
||||
del bgs[c]
|
||||
else:
|
||||
c+=1
|
||||
|
||||
debug_print("Bigrams:")
|
||||
debug_print(bgs)
|
||||
if (len(bgs) != 0):
|
||||
fs = nltk.FreqDist(bgs)
|
||||
b_gram = fs.most_common(1)[0][0]
|
||||
#print "Most Common:"
|
||||
#print b_gram
|
||||
b_str = b_gram[0] + "_" + b_gram[1]
|
||||
b_gram_score = fs.most_common(1)[0][1]
|
||||
else:
|
||||
b_str =""
|
||||
b_gram_score = 0
|
||||
|
||||
tgs = list(nltk.trigrams(tokens))
|
||||
c=0
|
||||
while (c<len(tgs)):
|
||||
if sep in tgs[c]:
|
||||
del tgs[c]
|
||||
else:
|
||||
c+=1
|
||||
debug_print("Trigrams:")
|
||||
debug_print(tgs)
|
||||
if (len(tgs) != 0):
|
||||
ft = nltk.FreqDist(tgs)
|
||||
t_gram = ft.most_common(1)[0][0]
|
||||
t_str = t_gram[0] + "_" + t_gram[1] + "_" + t_gram[2]
|
||||
t_gram_score = ft.most_common(1)[0][1]
|
||||
else:
|
||||
t_str = ""
|
||||
t_gram_score = 0
|
||||
|
||||
|
||||
debug_print("1: %s - %d 2: %s - %d 3: %s - %d\n" % (u_gram,u_gram_score,b_str,b_gram_score,t_str,t_gram_score))
|
||||
|
||||
if (b_gram_score > 1) and (b_gram_score * 2 >= u_gram_score):
|
||||
if (t_gram_score > 1) and (t_gram_score * 2 >= b_gram_score):
|
||||
ret = t_str
|
||||
ret_s = t_gram_score
|
||||
else:
|
||||
ret = b_str
|
||||
ret_s = b_gram_score
|
||||
else:
|
||||
ret = u_gram
|
||||
ret_s = u_gram_score
|
||||
|
||||
return (ret,ret_s)
|
||||
|
||||
### End of NLP Section ###
|
||||
|
||||
|
||||
|
||||
#guess_module_names():
|
||||
#Use the NLP section (above) to guess the names of modules and add them to the global module list
|
||||
#Attempts to find common bracket strings (e.g. "[MOD_NAME] Debug print!")
|
||||
#then source file names (most often left over from calls to assert())
|
||||
#then common trigram/bigram/unigrams
|
||||
#You can tweak the switchover thresholds below.
|
||||
|
||||
def guess_module_names(t,sep):
|
||||
#idea - make score threshold based on the size of the module
|
||||
# (e.g. smaller modules should have a smaller threshold
|
||||
C_SCORE_THRESHOLD = 4 #we need to see at least <N> occurrences of a string set in order to pick that name
|
||||
S_SCORE_THRESHOLD = 2 #if we see <N> occurrences of foo.c we'll pick "foo.c"
|
||||
B_SCORE_THRESHOLD = 2 #if we see <N> occurrences of [foo] we'll pick "foo"
|
||||
|
||||
# first look for strings that start with [FOO], (bracket strings)
|
||||
# then look for strings that contain source files (.c,.cpp,etc.)
|
||||
# then try common strings
|
||||
# above thresholds can be tweaked - they represent the number of strings that have to be repeated
|
||||
# in order to use that string as the module name
|
||||
(name,scr) = bracket_strings(t,"[","]",sep)
|
||||
debug_print("bracket name: %s score: %d" %(name, scr))
|
||||
#if (True):
|
||||
if (scr < B_SCORE_THRESHOLD):
|
||||
(name,scr) = source_file_strings(t,sep)
|
||||
debug_print("source name: %s score: %d" % (name, scr))
|
||||
#if (True):e
|
||||
if (scr < S_SCORE_THRESHOLD):
|
||||
(name,scr) = common_strings(t,sep)
|
||||
debug_print("common name: %s score: %d" % (name, scr))
|
||||
if (scr < C_SCORE_THRESHOLD):
|
||||
#Couldn't come up with a name
|
||||
name = "unknown"
|
||||
|
||||
return name
|
||||
|
||||
def main():
|
||||
#t=""
|
||||
sep = "tzvlw"
|
||||
# java side handles adding sep between strings,
|
||||
# read all in at once (no newlines between strings)
|
||||
#t = sys.stdin.readline()
|
||||
t = input()
|
||||
#print ("text in: %s" % t)
|
||||
name = guess_module_names(t,sep)
|
||||
print(name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,122 +0,0 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
+ Version 0.28 (2022.02.03)
|
||||
|
||||
- Added a method for returning the index of a section by name (#331)
|
||||
- Allow filtering by section types in iter_sections (#345)
|
||||
- Support Android compressed rel/rela sections (#357)
|
||||
- Initial support for PPC64LE (#360)
|
||||
- Initial DWARF v5 support (#363 with several follow-ups)
|
||||
- Fixed parsing for structures containing uids or gids in core
|
||||
dumps (#354)
|
||||
- Allow filtering by segment types in iter_segments (#375)
|
||||
- Add support for .note.gnu.property (#386)
|
||||
- Update readelf tests to work with more recent version of
|
||||
readelf (#387)
|
||||
- Add support for note GNU_PROPERTY_X86_FEATURE_1_AND (#388)
|
||||
|
||||
+ Version 0.27 (2020.10.27)
|
||||
|
||||
- Print addend wfor RELA relocations without symbol (#292)
|
||||
- Implement symbol lookup for {GNU,}HashSection (#290)
|
||||
- Major rewrite of expression parsing
|
||||
- Cashed random access to CUs and DIEs (#264)
|
||||
- GNU expressions (#303)
|
||||
- Support parsing LSDA pointers from FDEs (#308)
|
||||
- Add support for DWA_OP_GNU_push_tls_address in expressions (#315)
|
||||
- Some initial support for AArch64 little-endian (#318)
|
||||
- Support for ELF files with a large number of sections (#333)
|
||||
- Some minimal support for DWARFv1 (#335)
|
||||
- Many small bug fixes; see git log.
|
||||
|
||||
+ Version 0.26 (2019.12.05)
|
||||
|
||||
- Call relocation for ARM v3 (#194)
|
||||
- More complete architecture coverage for ENUM_E_MACHINE (#206)
|
||||
- Support for .debug_pubtypes and .debug_pubnames sections (#208)
|
||||
- Support for DWARF v4 location lists (#214)
|
||||
- Decode strings in dynamic string tables (#217)
|
||||
- Improve symbol table handling in dynamic segments (#219)
|
||||
- Improved handling of location information (#225)
|
||||
- Avoid deprecation warnings in Python 3.7+
|
||||
- Add DWARF v5 OPs (#240)
|
||||
- Handle many new translation forms and constants
|
||||
- Lazy DIE parsing to speed up partial parsing of DWARF info (#249)
|
||||
|
||||
+ Version 0.25 (2018.09.01)
|
||||
|
||||
- Make parsing of SH_TYPE and PT_TYPE fields dependent on the machine
|
||||
(e_machine header field), making it possible to support conflicting type
|
||||
enums between different machines (#71 and #121).
|
||||
- Add parsing and readelf dumping for .eh_frame (#155)
|
||||
- Support compressed sections (#152)
|
||||
- Better support for parsing core dumps (#147)
|
||||
- More comprehensive handling of ARM relocations (#121)
|
||||
- Convert all ascii encoding to utf-8 encoding (#182)
|
||||
- Don't attempt to hex/string dump SHT_NOBITS sections in readelf (#119).
|
||||
- Test with Python 3.6
|
||||
- Minor bugfixes (#118)
|
||||
- Cleanup: Use argparse instead of optparse
|
||||
- Make readelf comparison tests run in parallel using multiprocessing; cuts
|
||||
testing time 3-5x
|
||||
- Improvements in MIPS flags handling (#165)
|
||||
|
||||
+ Version 0.24 (2016.08.04)
|
||||
|
||||
- Retrieve symbols by name - get_symbol_by_name (#58).
|
||||
- Symbol/section names are strings internally now, not bytestrings (this may
|
||||
affect API usage in Python 3) (#76).
|
||||
- Added DT_MIPS_* constants to ENUM_D_TAG (#79)
|
||||
- Made dwarf_decode_address example a bit more useful for command-line
|
||||
invocation.
|
||||
- More DWARF v4 support w.r.t decoding function ranges; DW_AT_high_pc value
|
||||
is now either absolute or relative to DW_AT_low_pc, depending on the class
|
||||
of the form encoded in the file. Also #89.
|
||||
- Support for SHT_NOTE sections (#109)
|
||||
- Support for .debug_aranges section (#108)
|
||||
- Support for zlib-compressed debug sections (#102)
|
||||
- Support for DWARF v4 line programs (#82)
|
||||
|
||||
+ Version 0.23 (2014.11.08)
|
||||
|
||||
- Minimal Python 2.x version raised to 2.7
|
||||
- Basic support for MIPS (contributed by Karl Vogel).
|
||||
- Support for PT_NOTE segment parsing (contributed by Alex Deymo).
|
||||
- Support for parsing symbol table in dynamic segment
|
||||
(contributed by Nam T. Nguyen).
|
||||
|
||||
+ Version 0.22 (2014.03.30)
|
||||
|
||||
- pyelftools repository moved to https://github.com/eliben/pyelftools
|
||||
- Support for version sections - contributed by Yann Rouillard.
|
||||
- Better ARM support (including AArch64) - contributed by Dobromir Stefanov.
|
||||
- Added some initial support for parsing Solaris OpenCSW ELF files
|
||||
(contributed by Yann Rouillard).
|
||||
- Added some initial support for DWARF4 (as generated by gcc 4.8)
|
||||
and DWARF generated by recent versions of Clang (3.3).
|
||||
- Added the get_full_path utility method to DIEs that have an associated
|
||||
file name / path (based on pull request #16 by Shaheed Haque).
|
||||
- Set up Travis CI integration.
|
||||
|
||||
+ Version 0.21 (2013.04.17)
|
||||
|
||||
- Added new example: dwarf_decode_address - decode function name and
|
||||
file & line information from an address.
|
||||
- Issue #7: parsing incorrect DWARF was made a bit more forgiving for cases
|
||||
where serialized DIE trees have extra NULLs at the end.
|
||||
- Very initial support for ARM ELF files (Matthew Fernandez - pull
|
||||
request #6).
|
||||
- Support for dumping the dynamic section (Mike Frysinger - pull
|
||||
request #7).
|
||||
- Output of scripts/readelf.py now matches that of binutils 2.23.52.
|
||||
- Added more machine EM_ values to ENUM_E_TYPE.
|
||||
|
||||
+ Version 0.20 (2012.01.27)
|
||||
|
||||
- Python 3 support
|
||||
- Fixed some problems with running tests
|
||||
- Issue #2: made all examples run (and test/run_examples_test.py pass)
|
||||
on Windows.
|
||||
|
||||
+ Version 0.10 - Initial public release (2012.01.06)
|
||||
@@ -1,32 +0,0 @@
|
||||
pyelftools is in the public domain (see below if you need more details).
|
||||
|
||||
pyelftools uses the construct library for structured parsing of a binary
|
||||
stream. construct is packaged in pyelftools/construct - see its LICENSE
|
||||
file for the license.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
@@ -1,8 +0,0 @@
|
||||
recursive-include elftools *.py
|
||||
recursive-include scripts *.py
|
||||
recursive-include examples *.py *.elf *.out
|
||||
recursive-include test *.py *.elf *.arm *.mips *.o
|
||||
include README.rst
|
||||
include LICENSE
|
||||
include CHANGES
|
||||
include tox.ini
|
||||
@@ -1,55 +0,0 @@
|
||||
==========
|
||||
pyelftools
|
||||
==========
|
||||
|
||||
.. image:: https://github.com/eliben/pyelftools/workflows/pyelftools-tests/badge.svg
|
||||
:align: center
|
||||
:target: https://github.com/eliben/pyelftools/actions
|
||||
|
||||
**pyelftools** is a pure-Python library for parsing and analyzing ELF files
|
||||
and DWARF debugging information. See the
|
||||
`User's guide <https://github.com/eliben/pyelftools/wiki/User's-guide>`_
|
||||
for more details.
|
||||
|
||||
Pre-requisites
|
||||
--------------
|
||||
|
||||
As a user of **pyelftools**, one only needs Python to run. It works with
|
||||
Python versions 2.7 and 3.x (x >= 5). For hacking on **pyelftools** the
|
||||
requirements are a bit more strict, please see the
|
||||
`hacking guide <https://github.com/eliben/pyelftools/wiki/Hacking-guide>`_.
|
||||
|
||||
Installing
|
||||
----------
|
||||
|
||||
**pyelftools** can be installed from PyPI (Python package index)::
|
||||
|
||||
> pip install pyelftools
|
||||
|
||||
Alternatively, you can download the source distribution for the most recent and
|
||||
historic versions from the *Downloads* tab on the `pyelftools project page
|
||||
<https://github.com/eliben/pyelftools>`_ (by going to *Tags*). Then, you can
|
||||
install from source, as usual::
|
||||
|
||||
> python setup.py install
|
||||
|
||||
Since **pyelftools** is a work in progress, it's recommended to have the most
|
||||
recent version of the code. This can be done by downloading the `master zip
|
||||
file <https://github.com/eliben/pyelftools/archive/master.zip>`_ or just
|
||||
cloning the Git repository.
|
||||
|
||||
Since **pyelftools** has no external dependencies, it's also easy to use it
|
||||
without installing, by locally adjusting ``PYTHONPATH``.
|
||||
|
||||
How to use it?
|
||||
--------------
|
||||
|
||||
**pyelftools** is a regular Python library: you import and invoke it from your
|
||||
own code. For a detailed usage guide and links to examples, please consult the
|
||||
`user's guide <https://github.com/eliben/pyelftools/wiki/User's-guide>`_.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
**pyelftools** is open source software. Its code is in the public domain. See
|
||||
the ``LICENSE`` file for more details.
|
||||
@@ -1,36 +0,0 @@
|
||||
New version
|
||||
-----------
|
||||
|
||||
* Update elftools/__init__.py
|
||||
* Update setup.py
|
||||
* Update CHANGES
|
||||
* Tag in git (v0.xx)
|
||||
|
||||
construct
|
||||
---------
|
||||
|
||||
construct seems to be maintained again - they also backported my Python 3 fixes.
|
||||
Theoretically, I can remove construct from pyelftools and use it as a dependency
|
||||
instead. I don't really have time to play with this now, but may do so in the
|
||||
future.
|
||||
|
||||
Preparing a new release
|
||||
-----------------------
|
||||
|
||||
* Run 'tox' tests (with '-r' to create new venvs)
|
||||
* Make sure new version was updated everywhere appropriate
|
||||
* Run ``python setup.py build sdist bdist_wheel`` (no 'upload' yet)
|
||||
* Untar the created ``dist/pyelftools-x.y.tar.gz`` and make sure
|
||||
everything looks ok
|
||||
* Now build with upload to send it to PyPi
|
||||
* Tag new version in git
|
||||
* Test with pip install from some new virtualenv
|
||||
|
||||
Distribution
|
||||
------------
|
||||
|
||||
1. First install Twine (https://packaging.python.org/tutorials/packaging-projects/)
|
||||
2. python3 -m twine upload dist/*, but make sure ``setup.py`` was already run
|
||||
and the updated whl and tarbal are in dist/.
|
||||
|
||||
Credentials for PyPI are stored in ~/.pypirc
|
||||
@@ -1,7 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
__version__ = '0.28'
|
||||
@@ -1,91 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: common/construct_utils.py
|
||||
#
|
||||
# Some complementary construct utilities
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..construct import (
|
||||
Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil,
|
||||
Rename, SizeofError
|
||||
)
|
||||
|
||||
|
||||
class RepeatUntilExcluding(Subconstruct):
|
||||
""" A version of construct's RepeatUntil that doesn't include the last
|
||||
element (which casued the repeat to exit) in the return value.
|
||||
|
||||
Only parsing is currently implemented.
|
||||
|
||||
P.S. removed some code duplication
|
||||
"""
|
||||
__slots__ = ["predicate"]
|
||||
def __init__(self, predicate, subcon):
|
||||
Subconstruct.__init__(self, subcon)
|
||||
self.predicate = predicate
|
||||
self._clear_flag(self.FLAG_COPY_CONTEXT)
|
||||
self._set_flag(self.FLAG_DYNAMIC)
|
||||
def _parse(self, stream, context):
|
||||
obj = []
|
||||
try:
|
||||
context_for_subcon = context
|
||||
if self.subcon.conflags & self.FLAG_COPY_CONTEXT:
|
||||
context_for_subcon = context.__copy__()
|
||||
|
||||
while True:
|
||||
subobj = self.subcon._parse(stream, context_for_subcon)
|
||||
if self.predicate(subobj, context):
|
||||
break
|
||||
obj.append(subobj)
|
||||
except ConstructError as ex:
|
||||
raise ArrayError("missing terminator", ex)
|
||||
return obj
|
||||
def _build(self, obj, stream, context):
|
||||
raise NotImplementedError('no building')
|
||||
def _sizeof(self, context):
|
||||
raise SizeofError("can't calculate size")
|
||||
|
||||
|
||||
def _LEB128_reader():
|
||||
""" Read LEB128 variable-length data from the stream. The data is terminated
|
||||
by a byte with 0 in its highest bit.
|
||||
"""
|
||||
return RepeatUntil(
|
||||
lambda obj, ctx: ord(obj) < 0x80,
|
||||
Field(None, 1))
|
||||
|
||||
|
||||
class _ULEB128Adapter(Adapter):
|
||||
""" An adapter for ULEB128, given a sequence of bytes in a sub-construct.
|
||||
"""
|
||||
def _decode(self, obj, context):
|
||||
value = 0
|
||||
for b in reversed(obj):
|
||||
value = (value << 7) + (ord(b) & 0x7F)
|
||||
return value
|
||||
|
||||
|
||||
class _SLEB128Adapter(Adapter):
|
||||
""" An adapter for SLEB128, given a sequence of bytes in a sub-construct.
|
||||
"""
|
||||
def _decode(self, obj, context):
|
||||
value = 0
|
||||
for b in reversed(obj):
|
||||
value = (value << 7) + (ord(b) & 0x7F)
|
||||
if ord(obj[-1]) & 0x40:
|
||||
# negative -> sign extend
|
||||
value |= - (1 << (7 * len(obj)))
|
||||
return value
|
||||
|
||||
|
||||
def ULEB128(name):
|
||||
""" A construct creator for ULEB128 encoding.
|
||||
"""
|
||||
return Rename(name, _ULEB128Adapter(_LEB128_reader()))
|
||||
|
||||
|
||||
def SLEB128(name):
|
||||
""" A construct creator for SLEB128 encoding.
|
||||
"""
|
||||
return Rename(name, _SLEB128Adapter(_LEB128_reader()))
|
||||
@@ -1,22 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: common/exceptions.py
|
||||
#
|
||||
# Exception classes for elftools
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
class ELFError(Exception):
|
||||
pass
|
||||
|
||||
class ELFRelocationError(ELFError):
|
||||
pass
|
||||
|
||||
class ELFParseError(ELFError):
|
||||
pass
|
||||
|
||||
class ELFCompressionError(ELFError):
|
||||
pass
|
||||
|
||||
class DWARFError(Exception):
|
||||
pass
|
||||
@@ -1,82 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: common/py3compat.py
|
||||
#
|
||||
# Python 2/3 compatibility code
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import sys
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
|
||||
if PY3:
|
||||
import io
|
||||
StringIO = io.StringIO
|
||||
BytesIO = io.BytesIO
|
||||
|
||||
# Functions for acting on bytestrings and strings. In Python 2 and 3,
|
||||
# strings and bytes are the same and chr/ord can be used to convert between
|
||||
# numeric byte values and their string pepresentations. In Python 3, bytes
|
||||
# and strings are different types and bytes hold numeric values when
|
||||
# iterated over.
|
||||
|
||||
def bytes2hex(b, sep=''):
|
||||
if not sep:
|
||||
return b.hex()
|
||||
return sep.join(map('{:02x}'.format, b))
|
||||
|
||||
def bytes2str(b): return b.decode('latin-1')
|
||||
def str2bytes(s): return s.encode('latin-1')
|
||||
def int2byte(i): return bytes((i,))
|
||||
def byte2int(b): return b
|
||||
|
||||
def iterbytes(b):
|
||||
"""Return an iterator over the elements of a bytes object.
|
||||
|
||||
For example, for b'abc' yields b'a', b'b' and then b'c'.
|
||||
"""
|
||||
for i in range(len(b)):
|
||||
yield b[i:i+1]
|
||||
|
||||
ifilter = filter
|
||||
|
||||
maxint = sys.maxsize
|
||||
else:
|
||||
import cStringIO
|
||||
StringIO = BytesIO = cStringIO.StringIO
|
||||
|
||||
def bytes2hex(b, sep=''):
|
||||
res = b.encode('hex')
|
||||
if not sep:
|
||||
return res
|
||||
return sep.join(res[i:i+2] for i in range(0, len(res), 2))
|
||||
|
||||
def bytes2str(b): return b
|
||||
def str2bytes(s): return s
|
||||
int2byte = chr
|
||||
byte2int = ord
|
||||
def iterbytes(b):
|
||||
return iter(b)
|
||||
|
||||
from itertools import ifilter
|
||||
|
||||
maxint = sys.maxint
|
||||
|
||||
|
||||
def iterkeys(d):
|
||||
"""Return an iterator over the keys of a dictionary."""
|
||||
return getattr(d, 'keys' if PY3 else 'iterkeys')()
|
||||
|
||||
def itervalues(d):
|
||||
"""Return an iterator over the values of a dictionary."""
|
||||
return getattr(d, 'values' if PY3 else 'itervalues')()
|
||||
|
||||
def iteritems(d):
|
||||
"""Return an iterator over the items of a dictionary."""
|
||||
return getattr(d, 'items' if PY3 else 'iteritems')()
|
||||
|
||||
try:
|
||||
from collections.abc import Mapping # python >= 3.3
|
||||
except ImportError:
|
||||
from collections import Mapping # python < 3.3
|
||||
@@ -1,114 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: common/utils.py
|
||||
#
|
||||
# Miscellaneous utilities for elftools
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from contextlib import contextmanager
|
||||
from .exceptions import ELFParseError, ELFError, DWARFError
|
||||
from .py3compat import int2byte
|
||||
from ..construct import ConstructError, ULInt8
|
||||
|
||||
|
||||
def merge_dicts(*dicts):
|
||||
"Given any number of dicts, merges them into a new one."""
|
||||
result = {}
|
||||
for d in dicts:
|
||||
result.update(d)
|
||||
return result
|
||||
|
||||
|
||||
def bytelist2string(bytelist):
|
||||
""" Convert a list of byte values (e.g. [0x10 0x20 0x00]) to a bytes object
|
||||
(e.g. b'\x10\x20\x00').
|
||||
"""
|
||||
return b''.join(int2byte(b) for b in bytelist)
|
||||
|
||||
|
||||
def struct_parse(struct, stream, stream_pos=None):
|
||||
""" Convenience function for using the given struct to parse a stream.
|
||||
If stream_pos is provided, the stream is seeked to this position before
|
||||
the parsing is done. Otherwise, the current position of the stream is
|
||||
used.
|
||||
Wraps the error thrown by construct with ELFParseError.
|
||||
"""
|
||||
try:
|
||||
if stream_pos is not None:
|
||||
stream.seek(stream_pos)
|
||||
return struct.parse_stream(stream)
|
||||
except ConstructError as e:
|
||||
raise ELFParseError(str(e))
|
||||
|
||||
|
||||
def parse_cstring_from_stream(stream, stream_pos=None):
|
||||
""" Parse a C-string from the given stream. The string is returned without
|
||||
the terminating \x00 byte. If the terminating byte wasn't found, None
|
||||
is returned (the stream is exhausted).
|
||||
If stream_pos is provided, the stream is seeked to this position before
|
||||
the parsing is done. Otherwise, the current position of the stream is
|
||||
used.
|
||||
Note: a bytes object is returned here, because this is what's read from
|
||||
the binary file.
|
||||
"""
|
||||
if stream_pos is not None:
|
||||
stream.seek(stream_pos)
|
||||
CHUNKSIZE = 64
|
||||
chunks = []
|
||||
found = False
|
||||
while True:
|
||||
chunk = stream.read(CHUNKSIZE)
|
||||
end_index = chunk.find(b'\x00')
|
||||
if end_index >= 0:
|
||||
chunks.append(chunk[:end_index])
|
||||
found = True
|
||||
break
|
||||
else:
|
||||
chunks.append(chunk)
|
||||
if len(chunk) < CHUNKSIZE:
|
||||
break
|
||||
return b''.join(chunks) if found else None
|
||||
|
||||
|
||||
def elf_assert(cond, msg=''):
|
||||
""" Assert that cond is True, otherwise raise ELFError(msg)
|
||||
"""
|
||||
_assert_with_exception(cond, msg, ELFError)
|
||||
|
||||
|
||||
def dwarf_assert(cond, msg=''):
|
||||
""" Assert that cond is True, otherwise raise DWARFError(msg)
|
||||
"""
|
||||
_assert_with_exception(cond, msg, DWARFError)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def preserve_stream_pos(stream):
|
||||
""" Usage:
|
||||
# stream has some position FOO (return value of stream.tell())
|
||||
with preserve_stream_pos(stream):
|
||||
# do stuff that manipulates the stream
|
||||
# stream still has position FOO
|
||||
"""
|
||||
saved_pos = stream.tell()
|
||||
yield
|
||||
stream.seek(saved_pos)
|
||||
|
||||
|
||||
def roundup(num, bits):
|
||||
""" Round up a number to nearest multiple of 2^bits. The result is a number
|
||||
where the least significant bits passed in bits are 0.
|
||||
"""
|
||||
return (num - 1 | (1 << bits) - 1) + 1
|
||||
|
||||
def read_blob(stream, length):
|
||||
"""Read length bytes from stream, return a list of ints
|
||||
"""
|
||||
return [struct_parse(ULInt8(''), stream) for i in range(length)]
|
||||
|
||||
#------------------------- PRIVATE -------------------------
|
||||
|
||||
def _assert_with_exception(cond, msg, exception_type):
|
||||
if not cond:
|
||||
raise exception_type(msg)
|
||||
@@ -1,19 +0,0 @@
|
||||
Copyright (C) 2009 Tomer Filiba, 2010-2011 Corbin Simpson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1,13 +0,0 @@
|
||||
construct is a Python library for declarative parsing and building of binary
|
||||
data. This is my fork of construct 2, with some modifications for Python 3
|
||||
and bug fixes. The construct website is http://construct.readthedocs.org
|
||||
|
||||
pyelftools carries construct around because construct has been abandoned for
|
||||
a long time and didn't get bugfixes; it also didn't work with Python 3.
|
||||
|
||||
These days (Feb 2018) construct is maintained again, but its APIs have
|
||||
underwent extensive changes that would require rewriting all of the
|
||||
construct-facing code in pyelftools. I'm still evaluating the pros/cons of
|
||||
this effort. See https://github.com/eliben/pyelftools/issues/180 for details.
|
||||
|
||||
LICENSE is the original license.
|
||||
@@ -1,110 +0,0 @@
|
||||
"""
|
||||
#### ####
|
||||
## #### ## ## #### ###### ##### ## ## #### ###### ## ##
|
||||
## ## ## ### ## ## ## ## ## ## ## ## ## #### ##
|
||||
## ## ## ###### ### ## ##### ## ## ## ## ##
|
||||
## ## ## ## ### ## ## ## ## ## ## ## ## ##
|
||||
#### #### ## ## #### ## ## ## ##### #### ## ######
|
||||
|
||||
Parsing made even more fun (and faster too)
|
||||
|
||||
Homepage:
|
||||
http://construct.wikispaces.com (including online tutorial)
|
||||
|
||||
Typical usage:
|
||||
>>> from construct import *
|
||||
|
||||
Hands-on example:
|
||||
>>> from construct import *
|
||||
>>> s = Struct("foo",
|
||||
... UBInt8("a"),
|
||||
... UBInt16("b"),
|
||||
... )
|
||||
>>> s.parse("\\x01\\x02\\x03")
|
||||
Container(a = 1, b = 515)
|
||||
>>> print s.parse("\\x01\\x02\\x03")
|
||||
Container:
|
||||
a = 1
|
||||
b = 515
|
||||
>>> s.build(Container(a = 1, b = 0x0203))
|
||||
"\\x01\\x02\\x03"
|
||||
"""
|
||||
|
||||
from .core import *
|
||||
from .adapters import *
|
||||
from .macros import *
|
||||
from .debug import Probe, Debugger
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# Metadata
|
||||
#===============================================================================
|
||||
__author__ = "tomer filiba (tomerfiliba [at] gmail.com)"
|
||||
__maintainer__ = "Corbin Simpson <MostAwesomeDude@gmail.com>"
|
||||
__version__ = "2.06"
|
||||
|
||||
#===============================================================================
|
||||
# Shorthand expressions
|
||||
#===============================================================================
|
||||
Bits = BitField
|
||||
Byte = UBInt8
|
||||
Bytes = Field
|
||||
Const = ConstAdapter
|
||||
Tunnel = TunnelAdapter
|
||||
Embed = Embedded
|
||||
|
||||
#===============================================================================
|
||||
# Deprecated names
|
||||
# Next scheduled name cleanout: 2.1
|
||||
#===============================================================================
|
||||
import functools, warnings
|
||||
|
||||
def deprecated(f):
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
warnings.warn(
|
||||
"This name is deprecated, use %s instead" % f.__name__,
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return f(*args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
MetaBytes = deprecated(MetaField)
|
||||
GreedyRepeater = deprecated(GreedyRange)
|
||||
OptionalGreedyRepeater = deprecated(OptionalGreedyRange)
|
||||
Repeater = deprecated(Range)
|
||||
StrictRepeater = deprecated(Array)
|
||||
MetaRepeater = deprecated(Array)
|
||||
OneOfValidator = deprecated(OneOf)
|
||||
NoneOfValidator = deprecated(NoneOf)
|
||||
|
||||
#===============================================================================
|
||||
# exposed names
|
||||
#===============================================================================
|
||||
__all__ = [
|
||||
'AdaptationError', 'Adapter', 'Alias', 'Aligned', 'AlignedStruct',
|
||||
'Anchor', 'Array', 'ArrayError', 'BFloat32', 'BFloat64', 'Bit', 'BitField',
|
||||
'BitIntegerAdapter', 'BitIntegerError', 'BitStruct', 'Bits', 'Bitwise',
|
||||
'Buffered', 'Byte', 'Bytes', 'CString', 'CStringAdapter', 'Const',
|
||||
'ConstAdapter', 'ConstError', 'Construct', 'ConstructError', 'Container',
|
||||
'Debugger', 'Embed', 'Embedded', 'EmbeddedBitStruct', 'Enum', 'ExprAdapter',
|
||||
'Field', 'FieldError', 'Flag', 'FlagsAdapter', 'FlagsContainer',
|
||||
'FlagsEnum', 'FormatField', 'GreedyRange', 'GreedyRepeater',
|
||||
'HexDumpAdapter', 'If', 'IfThenElse', 'IndexingAdapter', 'LFloat32',
|
||||
'LFloat64', 'LazyBound', 'LengthValueAdapter', 'ListContainer',
|
||||
'MappingAdapter', 'MappingError', 'MetaArray', 'MetaBytes', 'MetaField',
|
||||
'MetaRepeater', 'NFloat32', 'NFloat64', 'Nibble', 'NoneOf',
|
||||
'NoneOfValidator', 'Octet', 'OnDemand', 'OnDemandPointer', 'OneOf',
|
||||
'OneOfValidator', 'OpenRange', 'Optional', 'OptionalGreedyRange',
|
||||
'OptionalGreedyRepeater', 'PaddedStringAdapter', 'Padding',
|
||||
'PaddingAdapter', 'PaddingError', 'PascalString', 'Pass', 'Peek',
|
||||
'Pointer', 'PrefixedArray', 'Probe', 'Range', 'RangeError', 'Reconfig',
|
||||
'Rename', 'RepeatUntil', 'Repeater', 'Restream', 'SBInt16', 'SBInt32',
|
||||
'SBInt64', 'SBInt8', 'SLInt16', 'SLInt32', 'SLInt64', 'SLInt8', 'SNInt16',
|
||||
'SNInt32', 'SNInt64', 'SNInt8', 'Select', 'SelectError', 'Sequence',
|
||||
'SizeofError', 'SlicingAdapter', 'StaticField', 'StrictRepeater', 'String',
|
||||
'StringAdapter', 'Struct', 'Subconstruct', 'Switch', 'SwitchError',
|
||||
'SymmetricMapping', 'Terminator', 'TerminatorError', 'Tunnel',
|
||||
'TunnelAdapter', 'UBInt16', 'UBInt32', 'UBInt64', 'UBInt8', 'ULInt16',
|
||||
'ULInt32', 'ULInt64', 'ULInt8', 'UNInt16', 'UNInt32', 'UNInt64', 'UNInt8',
|
||||
'Union', 'ValidationError', 'Validator', 'Value', "Magic",
|
||||
]
|
||||
@@ -1,470 +0,0 @@
|
||||
from .core import Adapter, AdaptationError, Pass
|
||||
from .lib import int_to_bin, bin_to_int, swap_bytes
|
||||
from .lib import FlagsContainer, HexString
|
||||
from .lib.py3compat import BytesIO, decodebytes
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# exceptions
|
||||
#===============================================================================
|
||||
class BitIntegerError(AdaptationError):
|
||||
__slots__ = []
|
||||
class MappingError(AdaptationError):
|
||||
__slots__ = []
|
||||
class ConstError(AdaptationError):
|
||||
__slots__ = []
|
||||
class ValidationError(AdaptationError):
|
||||
__slots__ = []
|
||||
class PaddingError(AdaptationError):
|
||||
__slots__ = []
|
||||
|
||||
#===============================================================================
|
||||
# adapters
|
||||
#===============================================================================
|
||||
class BitIntegerAdapter(Adapter):
|
||||
"""
|
||||
Adapter for bit-integers (converts bitstrings to integers, and vice versa).
|
||||
See BitField.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to adapt
|
||||
* width - the size of the subcon, in bits
|
||||
* swapped - whether to swap byte order (little endian/big endian).
|
||||
default is False (big endian)
|
||||
* signed - whether the value is signed (two's complement). the default
|
||||
is False (unsigned)
|
||||
* bytesize - number of bits per byte, used for byte-swapping (if swapped).
|
||||
default is 8.
|
||||
"""
|
||||
__slots__ = ["width", "swapped", "signed", "bytesize"]
|
||||
def __init__(self, subcon, width, swapped = False, signed = False,
|
||||
bytesize = 8):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.width = width
|
||||
self.swapped = swapped
|
||||
self.signed = signed
|
||||
self.bytesize = bytesize
|
||||
def _encode(self, obj, context):
|
||||
if obj < 0 and not self.signed:
|
||||
raise BitIntegerError("object is negative, but field is not signed",
|
||||
obj)
|
||||
obj2 = int_to_bin(obj, width = self.width)
|
||||
if self.swapped:
|
||||
obj2 = swap_bytes(obj2, bytesize = self.bytesize)
|
||||
return obj2
|
||||
def _decode(self, obj, context):
|
||||
if self.swapped:
|
||||
obj = swap_bytes(obj, bytesize = self.bytesize)
|
||||
return bin_to_int(obj, signed = self.signed)
|
||||
|
||||
class MappingAdapter(Adapter):
|
||||
"""
|
||||
Adapter that maps objects to other objects.
|
||||
See SymmetricMapping and Enum.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to map
|
||||
* decoding - the decoding (parsing) mapping (a dict)
|
||||
* encoding - the encoding (building) mapping (a dict)
|
||||
* decdefault - the default return value when the object is not found
|
||||
in the decoding mapping. if no object is given, an exception is raised.
|
||||
if `Pass` is used, the unmapped object will be passed as-is
|
||||
* encdefault - the default return value when the object is not found
|
||||
in the encoding mapping. if no object is given, an exception is raised.
|
||||
if `Pass` is used, the unmapped object will be passed as-is
|
||||
"""
|
||||
__slots__ = ["encoding", "decoding", "encdefault", "decdefault"]
|
||||
def __init__(self, subcon, decoding, encoding,
|
||||
decdefault = NotImplemented, encdefault = NotImplemented):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.decoding = decoding
|
||||
self.encoding = encoding
|
||||
self.decdefault = decdefault
|
||||
self.encdefault = encdefault
|
||||
def _encode(self, obj, context):
|
||||
try:
|
||||
return self.encoding[obj]
|
||||
except (KeyError, TypeError):
|
||||
if self.encdefault is NotImplemented:
|
||||
raise MappingError("no encoding mapping for %r [%s]" % (
|
||||
obj, self.subcon.name))
|
||||
if self.encdefault is Pass:
|
||||
return obj
|
||||
return self.encdefault
|
||||
def _decode(self, obj, context):
|
||||
try:
|
||||
return self.decoding[obj]
|
||||
except (KeyError, TypeError):
|
||||
if self.decdefault is NotImplemented:
|
||||
raise MappingError("no decoding mapping for %r [%s]" % (
|
||||
obj, self.subcon.name))
|
||||
if self.decdefault is Pass:
|
||||
return obj
|
||||
return self.decdefault
|
||||
|
||||
class FlagsAdapter(Adapter):
|
||||
"""
|
||||
Adapter for flag fields. Each flag is extracted from the number, resulting
|
||||
in a FlagsContainer object. Not intended for direct usage.
|
||||
See FlagsEnum.
|
||||
|
||||
Parameters
|
||||
* subcon - the subcon to extract
|
||||
* flags - a dictionary mapping flag-names to their value
|
||||
"""
|
||||
__slots__ = ["flags"]
|
||||
def __init__(self, subcon, flags):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.flags = flags
|
||||
def _encode(self, obj, context):
|
||||
flags = 0
|
||||
for name, value in self.flags.items():
|
||||
if getattr(obj, name, False):
|
||||
flags |= value
|
||||
return flags
|
||||
def _decode(self, obj, context):
|
||||
obj2 = FlagsContainer()
|
||||
for name, value in self.flags.items():
|
||||
setattr(obj2, name, bool(obj & value))
|
||||
return obj2
|
||||
|
||||
class StringAdapter(Adapter):
|
||||
"""
|
||||
Adapter for strings. Converts a sequence of characters into a python
|
||||
string, and optionally handles character encoding.
|
||||
See String.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to convert
|
||||
* encoding - the character encoding name (e.g., "utf8"), or None to
|
||||
return raw bytes (usually 8-bit ASCII).
|
||||
"""
|
||||
__slots__ = ["encoding"]
|
||||
def __init__(self, subcon, encoding = None):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.encoding = encoding
|
||||
def _encode(self, obj, context):
|
||||
if self.encoding:
|
||||
obj = obj.encode(self.encoding)
|
||||
return obj
|
||||
def _decode(self, obj, context):
|
||||
if self.encoding:
|
||||
obj = obj.decode(self.encoding)
|
||||
return obj
|
||||
|
||||
class PaddedStringAdapter(Adapter):
|
||||
r"""
|
||||
Adapter for padded strings.
|
||||
See String.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to adapt
|
||||
* padchar - the padding character. default is b"\x00".
|
||||
* paddir - the direction where padding is placed ("right", "left", or
|
||||
"center"). the default is "right".
|
||||
* trimdir - the direction where trimming will take place ("right" or
|
||||
"left"). the default is "right". trimming is only meaningful for
|
||||
building, when the given string is too long.
|
||||
"""
|
||||
__slots__ = ["padchar", "paddir", "trimdir"]
|
||||
def __init__(self, subcon, padchar = b"\x00", paddir = "right",
|
||||
trimdir = "right"):
|
||||
if paddir not in ("right", "left", "center"):
|
||||
raise ValueError("paddir must be 'right', 'left' or 'center'",
|
||||
paddir)
|
||||
if trimdir not in ("right", "left"):
|
||||
raise ValueError("trimdir must be 'right' or 'left'", trimdir)
|
||||
Adapter.__init__(self, subcon)
|
||||
self.padchar = padchar
|
||||
self.paddir = paddir
|
||||
self.trimdir = trimdir
|
||||
def _decode(self, obj, context):
|
||||
if self.paddir == "right":
|
||||
obj = obj.rstrip(self.padchar)
|
||||
elif self.paddir == "left":
|
||||
obj = obj.lstrip(self.padchar)
|
||||
else:
|
||||
obj = obj.strip(self.padchar)
|
||||
return obj
|
||||
def _encode(self, obj, context):
|
||||
size = self._sizeof(context)
|
||||
if self.paddir == "right":
|
||||
obj = obj.ljust(size, self.padchar)
|
||||
elif self.paddir == "left":
|
||||
obj = obj.rjust(size, self.padchar)
|
||||
else:
|
||||
obj = obj.center(size, self.padchar)
|
||||
if len(obj) > size:
|
||||
if self.trimdir == "right":
|
||||
obj = obj[:size]
|
||||
else:
|
||||
obj = obj[-size:]
|
||||
return obj
|
||||
|
||||
class LengthValueAdapter(Adapter):
|
||||
"""
|
||||
Adapter for length-value pairs. It extracts only the value from the
|
||||
pair, and calculates the length based on the value.
|
||||
See PrefixedArray and PascalString.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon returning a length-value pair
|
||||
"""
|
||||
__slots__ = []
|
||||
def _encode(self, obj, context):
|
||||
return (len(obj), obj)
|
||||
def _decode(self, obj, context):
|
||||
return obj[1]
|
||||
|
||||
class CStringAdapter(StringAdapter):
|
||||
r"""
|
||||
Adapter for C-style strings (strings terminated by a terminator char).
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to convert
|
||||
* terminators - a sequence of terminator chars. default is b"\x00".
|
||||
* encoding - the character encoding to use (e.g., "utf8"), or None to
|
||||
return raw-bytes. the terminator characters are not affected by the
|
||||
encoding.
|
||||
"""
|
||||
__slots__ = ["terminators"]
|
||||
def __init__(self, subcon, terminators = b"\x00", encoding = None):
|
||||
StringAdapter.__init__(self, subcon, encoding = encoding)
|
||||
self.terminators = terminators
|
||||
def _encode(self, obj, context):
|
||||
return StringAdapter._encode(self, obj, context) + self.terminators[0:1]
|
||||
def _decode(self, obj, context):
|
||||
return StringAdapter._decode(self, b''.join(obj[:-1]), context)
|
||||
|
||||
class TunnelAdapter(Adapter):
|
||||
"""
|
||||
Adapter for tunneling (as in protocol tunneling). A tunnel is construct
|
||||
nested upon another (layering). For parsing, the lower layer first parses
|
||||
the data (note: it must return a string!), then the upper layer is called
|
||||
to parse that data (bottom-up). For building it works in a top-down manner;
|
||||
first the upper layer builds the data, then the lower layer takes it and
|
||||
writes it to the stream.
|
||||
|
||||
Parameters:
|
||||
* subcon - the lower layer subcon
|
||||
* inner_subcon - the upper layer (tunneled/nested) subcon
|
||||
|
||||
Example:
|
||||
# a pascal string containing compressed data (zlib encoding), so first
|
||||
# the string is read, decompressed, and finally re-parsed as an array
|
||||
# of UBInt16
|
||||
TunnelAdapter(
|
||||
PascalString("data", encoding = "zlib"),
|
||||
GreedyRange(UBInt16("elements"))
|
||||
)
|
||||
"""
|
||||
__slots__ = ["inner_subcon"]
|
||||
def __init__(self, subcon, inner_subcon):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.inner_subcon = inner_subcon
|
||||
def _decode(self, obj, context):
|
||||
return self.inner_subcon._parse(BytesIO(obj), context)
|
||||
def _encode(self, obj, context):
|
||||
stream = BytesIO()
|
||||
self.inner_subcon._build(obj, stream, context)
|
||||
return stream.getvalue()
|
||||
|
||||
class ExprAdapter(Adapter):
|
||||
"""
|
||||
A generic adapter that accepts 'encoder' and 'decoder' as parameters. You
|
||||
can use ExprAdapter instead of writing a full-blown class when only a
|
||||
simple expression is needed.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to adapt
|
||||
* encoder - a function that takes (obj, context) and returns an encoded
|
||||
version of obj
|
||||
* decoder - a function that takes (obj, context) and returns a decoded
|
||||
version of obj
|
||||
|
||||
Example:
|
||||
ExprAdapter(UBInt8("foo"),
|
||||
encoder = lambda obj, ctx: obj / 4,
|
||||
decoder = lambda obj, ctx: obj * 4,
|
||||
)
|
||||
"""
|
||||
__slots__ = ["_encode", "_decode"]
|
||||
def __init__(self, subcon, encoder, decoder):
|
||||
Adapter.__init__(self, subcon)
|
||||
self._encode = encoder
|
||||
self._decode = decoder
|
||||
|
||||
class HexDumpAdapter(Adapter):
|
||||
"""
|
||||
Adapter for hex-dumping strings. It returns a HexString, which is a string
|
||||
"""
|
||||
__slots__ = ["linesize"]
|
||||
def __init__(self, subcon, linesize = 16):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.linesize = linesize
|
||||
def _encode(self, obj, context):
|
||||
return obj
|
||||
def _decode(self, obj, context):
|
||||
return HexString(obj, linesize = self.linesize)
|
||||
|
||||
class ConstAdapter(Adapter):
|
||||
"""
|
||||
Adapter for enforcing a constant value ("magic numbers"). When decoding,
|
||||
the return value is checked; when building, the value is substituted in.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to validate
|
||||
* value - the expected value
|
||||
|
||||
Example:
|
||||
Const(Field("signature", 2), "MZ")
|
||||
"""
|
||||
__slots__ = ["value"]
|
||||
def __init__(self, subcon, value):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.value = value
|
||||
def _encode(self, obj, context):
|
||||
if obj is None or obj == self.value:
|
||||
return self.value
|
||||
else:
|
||||
raise ConstError("expected %r, found %r" % (self.value, obj))
|
||||
def _decode(self, obj, context):
|
||||
if obj != self.value:
|
||||
raise ConstError("expected %r, found %r" % (self.value, obj))
|
||||
return obj
|
||||
|
||||
class SlicingAdapter(Adapter):
|
||||
"""
|
||||
Adapter for slicing a list (getting a slice from that list)
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to slice
|
||||
* start - start index
|
||||
* stop - stop index (or None for up-to-end)
|
||||
* step - step (or None for every element)
|
||||
"""
|
||||
__slots__ = ["start", "stop", "step"]
|
||||
def __init__(self, subcon, start, stop = None):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.start = start
|
||||
self.stop = stop
|
||||
def _encode(self, obj, context):
|
||||
if self.start is None:
|
||||
return obj
|
||||
return [None] * self.start + obj
|
||||
def _decode(self, obj, context):
|
||||
return obj[self.start:self.stop]
|
||||
|
||||
class IndexingAdapter(Adapter):
|
||||
"""
|
||||
Adapter for indexing a list (getting a single item from that list)
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to index
|
||||
* index - the index of the list to get
|
||||
"""
|
||||
__slots__ = ["index"]
|
||||
def __init__(self, subcon, index):
|
||||
Adapter.__init__(self, subcon)
|
||||
if type(index) is not int:
|
||||
raise TypeError("index must be an integer", type(index))
|
||||
self.index = index
|
||||
def _encode(self, obj, context):
|
||||
return [None] * self.index + [obj]
|
||||
def _decode(self, obj, context):
|
||||
return obj[self.index]
|
||||
|
||||
class PaddingAdapter(Adapter):
|
||||
r"""
|
||||
Adapter for padding.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to pad
|
||||
* pattern - the padding pattern (character as byte). default is b"\x00"
|
||||
* strict - whether or not to verify, during parsing, that the given
|
||||
padding matches the padding pattern. default is False (unstrict)
|
||||
"""
|
||||
__slots__ = ["pattern", "strict"]
|
||||
def __init__(self, subcon, pattern = b"\x00", strict = False):
|
||||
Adapter.__init__(self, subcon)
|
||||
self.pattern = pattern
|
||||
self.strict = strict
|
||||
def _encode(self, obj, context):
|
||||
return self._sizeof(context) * self.pattern
|
||||
def _decode(self, obj, context):
|
||||
if self.strict:
|
||||
expected = self._sizeof(context) * self.pattern
|
||||
if obj != expected:
|
||||
raise PaddingError("expected %r, found %r" % (expected, obj))
|
||||
return obj
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# validators
|
||||
#===============================================================================
|
||||
class Validator(Adapter):
|
||||
"""
|
||||
Abstract class: validates a condition on the encoded/decoded object.
|
||||
Override _validate(obj, context) in deriving classes.
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to validate
|
||||
"""
|
||||
__slots__ = []
|
||||
def _decode(self, obj, context):
|
||||
if not self._validate(obj, context):
|
||||
raise ValidationError("invalid object", obj)
|
||||
return obj
|
||||
def _encode(self, obj, context):
|
||||
return self._decode(obj, context)
|
||||
def _validate(self, obj, context):
|
||||
raise NotImplementedError()
|
||||
|
||||
class OneOf(Validator):
|
||||
"""
|
||||
Validates that the object is one of the listed values.
|
||||
|
||||
:param ``Construct`` subcon: object to validate
|
||||
:param iterable valids: a set of valid values
|
||||
|
||||
>>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x05")
|
||||
5
|
||||
>>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08")
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
construct.core.ValidationError: ('invalid object', 8)
|
||||
>>>
|
||||
>>> OneOf(UBInt8("foo"), [4,5,6,7]).build(5)
|
||||
'\\x05'
|
||||
>>> OneOf(UBInt8("foo"), [4,5,6,7]).build(9)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
construct.core.ValidationError: ('invalid object', 9)
|
||||
"""
|
||||
__slots__ = ["valids"]
|
||||
def __init__(self, subcon, valids):
|
||||
Validator.__init__(self, subcon)
|
||||
self.valids = valids
|
||||
def _validate(self, obj, context):
|
||||
return obj in self.valids
|
||||
|
||||
class NoneOf(Validator):
|
||||
"""
|
||||
Validates that the object is none of the listed values.
|
||||
|
||||
:param ``Construct`` subcon: object to validate
|
||||
:param iterable invalids: a set of invalid values
|
||||
|
||||
>>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08")
|
||||
8
|
||||
>>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x06")
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
construct.core.ValidationError: ('invalid object', 6)
|
||||
"""
|
||||
__slots__ = ["invalids"]
|
||||
def __init__(self, subcon, invalids):
|
||||
Validator.__init__(self, subcon)
|
||||
self.invalids = invalids
|
||||
def _validate(self, obj, context):
|
||||
return obj not in self.invalids
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,133 +0,0 @@
|
||||
"""
|
||||
Debugging utilities for constructs
|
||||
"""
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import traceback
|
||||
import pdb
|
||||
import inspect
|
||||
from .core import Construct, Subconstruct
|
||||
from .lib import HexString, Container, ListContainer
|
||||
|
||||
|
||||
class Probe(Construct):
|
||||
"""
|
||||
A probe: dumps the context, stack frames, and stream content to the screen
|
||||
to aid the debugging process.
|
||||
See also Debugger.
|
||||
|
||||
Parameters:
|
||||
* name - the display name
|
||||
* show_stream - whether or not to show stream contents. default is True.
|
||||
the stream must be seekable.
|
||||
* show_context - whether or not to show the context. default is True.
|
||||
* show_stack - whether or not to show the upper stack frames. default
|
||||
is True.
|
||||
* stream_lookahead - the number of bytes to dump when show_stack is set.
|
||||
default is 100.
|
||||
|
||||
Example:
|
||||
Struct("foo",
|
||||
UBInt8("a"),
|
||||
Probe("between a and b"),
|
||||
UBInt8("b"),
|
||||
)
|
||||
"""
|
||||
__slots__ = [
|
||||
"printname", "show_stream", "show_context", "show_stack",
|
||||
"stream_lookahead"
|
||||
]
|
||||
counter = 0
|
||||
|
||||
def __init__(self, name = None, show_stream = True,
|
||||
show_context = True, show_stack = True,
|
||||
stream_lookahead = 100):
|
||||
Construct.__init__(self, None)
|
||||
if name is None:
|
||||
Probe.counter += 1
|
||||
name = "<unnamed %d>" % (Probe.counter,)
|
||||
self.printname = name
|
||||
self.show_stream = show_stream
|
||||
self.show_context = show_context
|
||||
self.show_stack = show_stack
|
||||
self.stream_lookahead = stream_lookahead
|
||||
def __repr__(self):
|
||||
return "%s(%r)" % (self.__class__.__name__, self.printname)
|
||||
def _parse(self, stream, context):
|
||||
self.printout(stream, context)
|
||||
def _build(self, obj, stream, context):
|
||||
self.printout(stream, context)
|
||||
def _sizeof(self, context):
|
||||
return 0
|
||||
|
||||
def printout(self, stream, context):
|
||||
obj = Container()
|
||||
if self.show_stream:
|
||||
obj.stream_position = stream.tell()
|
||||
follows = stream.read(self.stream_lookahead)
|
||||
if not follows:
|
||||
obj.following_stream_data = "EOF reached"
|
||||
else:
|
||||
stream.seek(-len(follows), 1)
|
||||
obj.following_stream_data = HexString(follows)
|
||||
print
|
||||
|
||||
if self.show_context:
|
||||
obj.context = context
|
||||
|
||||
if self.show_stack:
|
||||
obj.stack = ListContainer()
|
||||
frames = [s[0] for s in inspect.stack()][1:-1]
|
||||
frames.reverse()
|
||||
for f in frames:
|
||||
a = Container()
|
||||
a.__update__(f.f_locals)
|
||||
obj.stack.append(a)
|
||||
|
||||
print("=" * 80)
|
||||
print("Probe", self.printname)
|
||||
print(obj)
|
||||
print("=" * 80)
|
||||
|
||||
class Debugger(Subconstruct):
|
||||
"""
|
||||
A pdb-based debugger. When an exception occurs in the subcon, a debugger
|
||||
will appear and allow you to debug the error (and even fix on-the-fly).
|
||||
|
||||
Parameters:
|
||||
* subcon - the subcon to debug
|
||||
|
||||
Example:
|
||||
Debugger(
|
||||
Enum(UBInt8("foo"),
|
||||
a = 1,
|
||||
b = 2,
|
||||
c = 3
|
||||
)
|
||||
)
|
||||
"""
|
||||
__slots__ = ["retval"]
|
||||
def _parse(self, stream, context):
|
||||
try:
|
||||
return self.subcon._parse(stream, context)
|
||||
except Exception:
|
||||
self.retval = NotImplemented
|
||||
self.handle_exc("(you can set the value of 'self.retval', "
|
||||
"which will be returned)")
|
||||
if self.retval is NotImplemented:
|
||||
raise
|
||||
else:
|
||||
return self.retval
|
||||
def _build(self, obj, stream, context):
|
||||
try:
|
||||
self.subcon._build(obj, stream, context)
|
||||
except Exception:
|
||||
self.handle_exc()
|
||||
def handle_exc(self, msg = None):
|
||||
print("=" * 80)
|
||||
print("Debugging exception of %s:" % (self.subcon,))
|
||||
print("".join(traceback.format_exception(*sys.exc_info())[1:]))
|
||||
if msg:
|
||||
print(msg)
|
||||
pdb.post_mortem(sys.exc_info()[2])
|
||||
print("=" * 80)
|
||||
@@ -1,7 +0,0 @@
|
||||
from .binary import (
|
||||
int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin)
|
||||
from .bitstream import BitStreamReader, BitStreamWriter
|
||||
from .container import (Container, FlagsContainer, ListContainer,
|
||||
LazyContainer)
|
||||
from .hex import HexString, hexdump
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
from .py3compat import int2byte
|
||||
|
||||
|
||||
def int_to_bin(number, width=32):
|
||||
r"""
|
||||
Convert an integer into its binary representation in a bytes object.
|
||||
Width is the amount of bits to generate. If width is larger than the actual
|
||||
amount of bits required to represent number in binary, sign-extension is
|
||||
used. If it's smaller, the representation is trimmed to width bits.
|
||||
Each "bit" is either '\x00' or '\x01'. The MSBit is first.
|
||||
|
||||
Examples:
|
||||
|
||||
>>> int_to_bin(19, 5)
|
||||
b'\x01\x00\x00\x01\x01'
|
||||
>>> int_to_bin(19, 8)
|
||||
b'\x00\x00\x00\x01\x00\x00\x01\x01'
|
||||
"""
|
||||
if number < 0:
|
||||
number += 1 << width
|
||||
i = width - 1
|
||||
bits = bytearray(width)
|
||||
while number and i >= 0:
|
||||
bits[i] = number & 1
|
||||
number >>= 1
|
||||
i -= 1
|
||||
return bytes(bits)
|
||||
|
||||
|
||||
_bit_values = {
|
||||
0: 0,
|
||||
1: 1,
|
||||
48: 0, # '0'
|
||||
49: 1, # '1'
|
||||
|
||||
# The following are for Python 2, in which iteration over a bytes object
|
||||
# yields single-character bytes and not integers.
|
||||
'\x00': 0,
|
||||
'\x01': 1,
|
||||
'0': 0,
|
||||
'1': 1,
|
||||
}
|
||||
|
||||
def bin_to_int(bits, signed=False):
|
||||
r"""
|
||||
Logical opposite of int_to_bin. Both '0' and '\x00' are considered zero,
|
||||
and both '1' and '\x01' are considered one. Set sign to True to interpret
|
||||
the number as a 2-s complement signed integer.
|
||||
"""
|
||||
number = 0
|
||||
bias = 0
|
||||
ptr = 0
|
||||
if signed and _bit_values[bits[0]] == 1:
|
||||
bits = bits[1:]
|
||||
bias = 1 << len(bits)
|
||||
for b in bits:
|
||||
number <<= 1
|
||||
number |= _bit_values[b]
|
||||
return number - bias
|
||||
|
||||
|
||||
def swap_bytes(bits, bytesize=8):
|
||||
r"""
|
||||
Bits is a b'' object containing a binary representation. Assuming each
|
||||
bytesize bits constitute a bytes, perform a endianness byte swap. Example:
|
||||
|
||||
>>> swap_bytes(b'00011011', 2)
|
||||
b'11100100'
|
||||
"""
|
||||
i = 0
|
||||
l = len(bits)
|
||||
output = [b""] * ((l // bytesize) + 1)
|
||||
j = len(output) - 1
|
||||
while i < l:
|
||||
output[j] = bits[i : i + bytesize]
|
||||
i += bytesize
|
||||
j -= 1
|
||||
return b"".join(output)
|
||||
|
||||
|
||||
_char_to_bin = {}
|
||||
_bin_to_char = {}
|
||||
for i in range(256):
|
||||
ch = int2byte(i)
|
||||
bin = int_to_bin(i, 8)
|
||||
# Populate with for both keys i and ch, to support Python 2 & 3
|
||||
_char_to_bin[ch] = bin
|
||||
_char_to_bin[i] = bin
|
||||
_bin_to_char[bin] = ch
|
||||
|
||||
|
||||
def encode_bin(data):
|
||||
"""
|
||||
Create a binary representation of the given b'' object. Assume 8-bit
|
||||
ASCII. Example:
|
||||
|
||||
>>> encode_bin('ab')
|
||||
b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00"
|
||||
"""
|
||||
return b"".join(_char_to_bin[ch] for ch in data)
|
||||
|
||||
|
||||
def decode_bin(data):
|
||||
"""
|
||||
Locical opposite of decode_bin.
|
||||
"""
|
||||
if len(data) & 7:
|
||||
raise ValueError("Data length must be a multiple of 8")
|
||||
i = 0
|
||||
j = 0
|
||||
l = len(data) // 8
|
||||
chars = [b""] * l
|
||||
while j < l:
|
||||
chars[j] = _bin_to_char[data[i:i+8]]
|
||||
i += 8
|
||||
j += 1
|
||||
return b"".join(chars)
|
||||
@@ -1,77 +0,0 @@
|
||||
from .binary import encode_bin, decode_bin
|
||||
|
||||
class BitStreamReader(object):
|
||||
|
||||
__slots__ = ["substream", "buffer", "total_size"]
|
||||
|
||||
def __init__(self, substream):
|
||||
self.substream = substream
|
||||
self.total_size = 0
|
||||
self.buffer = ""
|
||||
|
||||
def close(self):
|
||||
if self.total_size % 8 != 0:
|
||||
raise ValueError("total size of read data must be a multiple of 8",
|
||||
self.total_size)
|
||||
|
||||
def tell(self):
|
||||
return self.substream.tell()
|
||||
|
||||
def seek(self, pos, whence = 0):
|
||||
self.buffer = ""
|
||||
self.total_size = 0
|
||||
self.substream.seek(pos, whence)
|
||||
|
||||
def read(self, count):
|
||||
if count < 0:
|
||||
raise ValueError("count cannot be negative")
|
||||
|
||||
l = len(self.buffer)
|
||||
if count == 0:
|
||||
data = ""
|
||||
elif count <= l:
|
||||
data = self.buffer[:count]
|
||||
self.buffer = self.buffer[count:]
|
||||
else:
|
||||
data = self.buffer
|
||||
count -= l
|
||||
bytes = count // 8
|
||||
if count & 7:
|
||||
bytes += 1
|
||||
buf = encode_bin(self.substream.read(bytes))
|
||||
data += buf[:count]
|
||||
self.buffer = buf[count:]
|
||||
self.total_size += len(data)
|
||||
return data
|
||||
|
||||
class BitStreamWriter(object):
|
||||
|
||||
__slots__ = ["substream", "buffer", "pos"]
|
||||
|
||||
def __init__(self, substream):
|
||||
self.substream = substream
|
||||
self.buffer = []
|
||||
self.pos = 0
|
||||
|
||||
def close(self):
|
||||
self.flush()
|
||||
|
||||
def flush(self):
|
||||
bytes = decode_bin("".join(self.buffer))
|
||||
self.substream.write(bytes)
|
||||
self.buffer = []
|
||||
self.pos = 0
|
||||
|
||||
def tell(self):
|
||||
return self.substream.tell() + self.pos // 8
|
||||
|
||||
def seek(self, pos, whence = 0):
|
||||
self.flush()
|
||||
self.substream.seek(pos, whence)
|
||||
|
||||
def write(self, data):
|
||||
if not data:
|
||||
return
|
||||
if type(data) is not str:
|
||||
raise TypeError("data must be a string, not %r" % (type(data),))
|
||||
self.buffer.append(data)
|
||||
@@ -1,161 +0,0 @@
|
||||
"""
|
||||
Various containers.
|
||||
"""
|
||||
|
||||
from pprint import pformat
|
||||
from .py3compat import MutableMapping
|
||||
|
||||
def recursion_lock(retval, lock_name = "__recursion_lock__"):
|
||||
def decorator(func):
|
||||
def wrapper(self, *args, **kw):
|
||||
if getattr(self, lock_name, False):
|
||||
return retval
|
||||
setattr(self, lock_name, True)
|
||||
try:
|
||||
return func(self, *args, **kw)
|
||||
finally:
|
||||
setattr(self, lock_name, False)
|
||||
wrapper.__name__ = func.__name__
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
class Container(MutableMapping):
|
||||
"""
|
||||
A generic container of attributes.
|
||||
|
||||
Containers are the common way to express parsed data.
|
||||
"""
|
||||
|
||||
def __init__(self, **kw):
|
||||
self.__dict__ = kw
|
||||
|
||||
# The core dictionary interface.
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self.__dict__[name]
|
||||
|
||||
def __delitem__(self, name):
|
||||
del self.__dict__[name]
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
self.__dict__[name] = value
|
||||
|
||||
def keys(self):
|
||||
return self.__dict__.keys()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.__dict__.keys())
|
||||
|
||||
# Extended dictionary interface.
|
||||
|
||||
def update(self, other):
|
||||
self.__dict__.update(other)
|
||||
|
||||
__update__ = update
|
||||
|
||||
def __contains__(self, value):
|
||||
return value in self.__dict__
|
||||
|
||||
# Rich comparisons.
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return self.__dict__ == other.__dict__
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
# Copy interface.
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(**self.__dict__)
|
||||
|
||||
__copy__ = copy
|
||||
|
||||
# Iterator interface.
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.__dict__)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__, repr(self.__dict__))
|
||||
|
||||
def __str__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__, str(self.__dict__))
|
||||
|
||||
class FlagsContainer(Container):
|
||||
"""
|
||||
A container providing pretty-printing for flags.
|
||||
|
||||
Only set flags are displayed.
|
||||
"""
|
||||
|
||||
@recursion_lock("<...>")
|
||||
def __str__(self):
|
||||
d = dict((k, self[k]) for k in self
|
||||
if self[k] and not k.startswith("_"))
|
||||
return "%s(%s)" % (self.__class__.__name__, pformat(d))
|
||||
|
||||
class ListContainer(list):
|
||||
"""
|
||||
A container for lists.
|
||||
"""
|
||||
|
||||
__slots__ = ["__recursion_lock__"]
|
||||
|
||||
@recursion_lock("[...]")
|
||||
def __str__(self):
|
||||
return pformat(self)
|
||||
|
||||
class LazyContainer(object):
|
||||
|
||||
__slots__ = ["subcon", "stream", "pos", "context", "_value"]
|
||||
|
||||
def __init__(self, subcon, stream, pos, context):
|
||||
self.subcon = subcon
|
||||
self.stream = stream
|
||||
self.pos = pos
|
||||
self.context = context
|
||||
self._value = NotImplemented
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return self._value == other._value
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __str__(self):
|
||||
return self.__pretty_str__()
|
||||
|
||||
def __pretty_str__(self, nesting = 1, indentation = " "):
|
||||
if self._value is NotImplemented:
|
||||
text = "<unread>"
|
||||
elif hasattr(self._value, "__pretty_str__"):
|
||||
text = self._value.__pretty_str__(nesting, indentation)
|
||||
else:
|
||||
text = str(self._value)
|
||||
return "%s: %s" % (self.__class__.__name__, text)
|
||||
|
||||
def read(self):
|
||||
self.stream.seek(self.pos)
|
||||
return self.subcon._parse(self.stream, self.context)
|
||||
|
||||
def dispose(self):
|
||||
self.subcon = None
|
||||
self.stream = None
|
||||
self.context = None
|
||||
self.pos = None
|
||||
|
||||
def _get_value(self):
|
||||
if self._value is NotImplemented:
|
||||
self._value = self.read()
|
||||
return self._value
|
||||
|
||||
value = property(_get_value)
|
||||
|
||||
has_value = property(lambda self: self._value is not NotImplemented)
|
||||
@@ -1,43 +0,0 @@
|
||||
from .py3compat import byte2int, int2byte, bytes2str
|
||||
|
||||
|
||||
# Map an integer in the inclusive range 0-255 to its string byte representation
|
||||
_printable = dict((i, ".") for i in range(256))
|
||||
_printable.update((i, bytes2str(int2byte(i))) for i in range(32, 128))
|
||||
|
||||
|
||||
def hexdump(data, linesize):
|
||||
"""
|
||||
data is a bytes object. The returned result is a string.
|
||||
"""
|
||||
prettylines = []
|
||||
if len(data) < 65536:
|
||||
fmt = "%%04X %%-%ds %%s"
|
||||
else:
|
||||
fmt = "%%08X %%-%ds %%s"
|
||||
fmt = fmt % (3 * linesize - 1,)
|
||||
for i in range(0, len(data), linesize):
|
||||
line = data[i : i + linesize]
|
||||
hextext = " ".join('%02x' % byte2int(b) for b in line)
|
||||
rawtext = "".join(_printable[byte2int(b)] for b in line)
|
||||
prettylines.append(fmt % (i, str(hextext), str(rawtext)))
|
||||
return prettylines
|
||||
|
||||
|
||||
class HexString(bytes):
|
||||
"""
|
||||
Represents bytes that will be hex-dumped to a string when its string
|
||||
representation is requested.
|
||||
"""
|
||||
def __init__(self, data, linesize = 16):
|
||||
self.linesize = linesize
|
||||
|
||||
def __new__(cls, data, *args, **kwargs):
|
||||
return bytes.__new__(cls, data)
|
||||
|
||||
def __str__(self):
|
||||
if not self:
|
||||
return "''"
|
||||
sep = "\n"
|
||||
return sep + sep.join(
|
||||
hexdump(self, self.linesize))
|
||||
@@ -1,74 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# py3compat.py
|
||||
#
|
||||
# Some Python2&3 compatibility code
|
||||
#-------------------------------------------------------------------------------
|
||||
import sys
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping # python >= 3.3
|
||||
except ImportError:
|
||||
from collections import MutableMapping # python < 3.3
|
||||
|
||||
|
||||
if PY3:
|
||||
import io
|
||||
StringIO = io.StringIO
|
||||
BytesIO = io.BytesIO
|
||||
|
||||
def bchr(i):
|
||||
""" When iterating over b'...' in Python 2 you get single b'_' chars
|
||||
and in Python 3 you get integers. Call bchr to always turn this
|
||||
to single b'_' chars.
|
||||
"""
|
||||
return bytes((i,))
|
||||
|
||||
def u(s):
|
||||
return s
|
||||
|
||||
def int2byte(i):
|
||||
return bytes((i,))
|
||||
|
||||
def byte2int(b):
|
||||
return b
|
||||
|
||||
def str2bytes(s):
|
||||
return s.encode("latin-1")
|
||||
|
||||
def str2unicode(s):
|
||||
return s
|
||||
|
||||
def bytes2str(b):
|
||||
return b.decode('latin-1')
|
||||
|
||||
def decodebytes(b, encoding):
|
||||
return bytes(b, encoding)
|
||||
|
||||
advance_iterator = next
|
||||
|
||||
else:
|
||||
import cStringIO
|
||||
StringIO = BytesIO = cStringIO.StringIO
|
||||
|
||||
int2byte = chr
|
||||
byte2int = ord
|
||||
bchr = lambda i: i
|
||||
|
||||
def u(s):
|
||||
return unicode(s, "unicode_escape")
|
||||
|
||||
def str2bytes(s):
|
||||
return s
|
||||
|
||||
def str2unicode(s):
|
||||
return unicode(s, "unicode_escape")
|
||||
|
||||
def bytes2str(b):
|
||||
return b
|
||||
|
||||
def decodebytes(b, encoding):
|
||||
return b.decode(encoding)
|
||||
|
||||
def advance_iterator(it):
|
||||
return it.next()
|
||||
@@ -1,634 +0,0 @@
|
||||
from .lib.py3compat import int2byte
|
||||
from .lib import (BitStreamReader, BitStreamWriter, encode_bin,
|
||||
decode_bin)
|
||||
from .core import (Struct, MetaField, StaticField, FormatField,
|
||||
OnDemand, Pointer, Switch, Value, RepeatUntil, MetaArray, Sequence, Range,
|
||||
Select, Pass, SizeofError, Buffered, Restream, Reconfig)
|
||||
from .adapters import (BitIntegerAdapter, PaddingAdapter,
|
||||
ConstAdapter, CStringAdapter, LengthValueAdapter, IndexingAdapter,
|
||||
PaddedStringAdapter, FlagsAdapter, StringAdapter, MappingAdapter)
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# fields
|
||||
#===============================================================================
|
||||
def Field(name, length):
|
||||
"""
|
||||
A field consisting of a specified number of bytes.
|
||||
|
||||
:param str name: the name of the field
|
||||
:param length: the length of the field. the length can be either an integer
|
||||
(StaticField), or a function that takes the context as an argument and
|
||||
returns the length (MetaField)
|
||||
"""
|
||||
if callable(length):
|
||||
return MetaField(name, length)
|
||||
else:
|
||||
return StaticField(name, length)
|
||||
|
||||
def BitField(name, length, swapped = False, signed = False, bytesize = 8):
|
||||
"""
|
||||
BitFields, as the name suggests, are fields that operate on raw, unaligned
|
||||
bits, and therefore must be enclosed in a BitStruct. Using them is very
|
||||
similar to all normal fields: they take a name and a length (in bits).
|
||||
|
||||
:param str name: name of the field
|
||||
:param int length: number of bits in the field, or a function that takes
|
||||
the context as its argument and returns the length
|
||||
:param bool swapped: whether the value is byte-swapped
|
||||
:param bool signed: whether the value is signed
|
||||
:param int bytesize: number of bits per byte, for byte-swapping
|
||||
|
||||
>>> foo = BitStruct("foo",
|
||||
... BitField("a", 3),
|
||||
... Flag("b"),
|
||||
... Padding(3),
|
||||
... Nibble("c"),
|
||||
... BitField("d", 5),
|
||||
... )
|
||||
>>> foo.parse("\\xe1\\x1f")
|
||||
Container(a = 7, b = False, c = 8, d = 31)
|
||||
>>> foo = BitStruct("foo",
|
||||
... BitField("a", 3),
|
||||
... Flag("b"),
|
||||
... Padding(3),
|
||||
... Nibble("c"),
|
||||
... Struct("bar",
|
||||
... Nibble("d"),
|
||||
... Bit("e"),
|
||||
... )
|
||||
... )
|
||||
>>> foo.parse("\\xe1\\x1f")
|
||||
Container(a = 7, b = False, bar = Container(d = 15, e = 1), c = 8)
|
||||
"""
|
||||
|
||||
return BitIntegerAdapter(Field(name, length),
|
||||
length,
|
||||
swapped=swapped,
|
||||
signed=signed,
|
||||
bytesize=bytesize
|
||||
)
|
||||
|
||||
def Padding(length, pattern = b"\x00", strict = False):
|
||||
r"""a padding field (value is discarded)
|
||||
* length - the length of the field. the length can be either an integer,
|
||||
or a function that takes the context as an argument and returns the
|
||||
length
|
||||
* pattern - the padding pattern (character/byte) to use. default is b"\x00"
|
||||
* strict - whether or not to raise an exception is the actual padding
|
||||
pattern mismatches the desired pattern. default is False.
|
||||
"""
|
||||
return PaddingAdapter(Field(None, length),
|
||||
pattern = pattern,
|
||||
strict = strict,
|
||||
)
|
||||
|
||||
def Flag(name, truth = 1, falsehood = 0, default = False):
|
||||
"""
|
||||
A flag.
|
||||
|
||||
Flags are usually used to signify a Boolean value, and this construct
|
||||
maps values onto the ``bool`` type.
|
||||
|
||||
.. note:: This construct works with both bit and byte contexts.
|
||||
|
||||
.. warning:: Flags default to False, not True. This is different from the
|
||||
C and Python way of thinking about truth, and may be subject to change
|
||||
in the future.
|
||||
|
||||
:param str name: field name
|
||||
:param int truth: value of truth (default 1)
|
||||
:param int falsehood: value of falsehood (default 0)
|
||||
:param bool default: default value (default False)
|
||||
"""
|
||||
|
||||
return SymmetricMapping(Field(name, 1),
|
||||
{True : int2byte(truth), False : int2byte(falsehood)},
|
||||
default = default,
|
||||
)
|
||||
|
||||
#===============================================================================
|
||||
# field shortcuts
|
||||
#===============================================================================
|
||||
def Bit(name):
|
||||
"""a 1-bit BitField; must be enclosed in a BitStruct"""
|
||||
return BitField(name, 1)
|
||||
def Nibble(name):
|
||||
"""a 4-bit BitField; must be enclosed in a BitStruct"""
|
||||
return BitField(name, 4)
|
||||
def Octet(name):
|
||||
"""an 8-bit BitField; must be enclosed in a BitStruct"""
|
||||
return BitField(name, 8)
|
||||
|
||||
def UBInt8(name):
|
||||
"""unsigned, big endian 8-bit integer"""
|
||||
return FormatField(name, ">", "B")
|
||||
def UBInt16(name):
|
||||
"""unsigned, big endian 16-bit integer"""
|
||||
return FormatField(name, ">", "H")
|
||||
def UBInt32(name):
|
||||
"""unsigned, big endian 32-bit integer"""
|
||||
return FormatField(name, ">", "L")
|
||||
def UBInt64(name):
|
||||
"""unsigned, big endian 64-bit integer"""
|
||||
return FormatField(name, ">", "Q")
|
||||
|
||||
def SBInt8(name):
|
||||
"""signed, big endian 8-bit integer"""
|
||||
return FormatField(name, ">", "b")
|
||||
def SBInt16(name):
|
||||
"""signed, big endian 16-bit integer"""
|
||||
return FormatField(name, ">", "h")
|
||||
def SBInt32(name):
|
||||
"""signed, big endian 32-bit integer"""
|
||||
return FormatField(name, ">", "l")
|
||||
def SBInt64(name):
|
||||
"""signed, big endian 64-bit integer"""
|
||||
return FormatField(name, ">", "q")
|
||||
|
||||
def ULInt8(name):
|
||||
"""unsigned, little endian 8-bit integer"""
|
||||
return FormatField(name, "<", "B")
|
||||
def ULInt16(name):
|
||||
"""unsigned, little endian 16-bit integer"""
|
||||
return FormatField(name, "<", "H")
|
||||
def ULInt32(name):
|
||||
"""unsigned, little endian 32-bit integer"""
|
||||
return FormatField(name, "<", "L")
|
||||
def ULInt64(name):
|
||||
"""unsigned, little endian 64-bit integer"""
|
||||
return FormatField(name, "<", "Q")
|
||||
|
||||
def SLInt8(name):
|
||||
"""signed, little endian 8-bit integer"""
|
||||
return FormatField(name, "<", "b")
|
||||
def SLInt16(name):
|
||||
"""signed, little endian 16-bit integer"""
|
||||
return FormatField(name, "<", "h")
|
||||
def SLInt32(name):
|
||||
"""signed, little endian 32-bit integer"""
|
||||
return FormatField(name, "<", "l")
|
||||
def SLInt64(name):
|
||||
"""signed, little endian 64-bit integer"""
|
||||
return FormatField(name, "<", "q")
|
||||
|
||||
def UNInt8(name):
|
||||
"""unsigned, native endianity 8-bit integer"""
|
||||
return FormatField(name, "=", "B")
|
||||
def UNInt16(name):
|
||||
"""unsigned, native endianity 16-bit integer"""
|
||||
return FormatField(name, "=", "H")
|
||||
def UNInt32(name):
|
||||
"""unsigned, native endianity 32-bit integer"""
|
||||
return FormatField(name, "=", "L")
|
||||
def UNInt64(name):
|
||||
"""unsigned, native endianity 64-bit integer"""
|
||||
return FormatField(name, "=", "Q")
|
||||
|
||||
def SNInt8(name):
|
||||
"""signed, native endianity 8-bit integer"""
|
||||
return FormatField(name, "=", "b")
|
||||
def SNInt16(name):
|
||||
"""signed, native endianity 16-bit integer"""
|
||||
return FormatField(name, "=", "h")
|
||||
def SNInt32(name):
|
||||
"""signed, native endianity 32-bit integer"""
|
||||
return FormatField(name, "=", "l")
|
||||
def SNInt64(name):
|
||||
"""signed, native endianity 64-bit integer"""
|
||||
return FormatField(name, "=", "q")
|
||||
|
||||
def BFloat32(name):
|
||||
"""big endian, 32-bit IEEE floating point number"""
|
||||
return FormatField(name, ">", "f")
|
||||
def LFloat32(name):
|
||||
"""little endian, 32-bit IEEE floating point number"""
|
||||
return FormatField(name, "<", "f")
|
||||
def NFloat32(name):
|
||||
"""native endianity, 32-bit IEEE floating point number"""
|
||||
return FormatField(name, "=", "f")
|
||||
|
||||
def BFloat64(name):
|
||||
"""big endian, 64-bit IEEE floating point number"""
|
||||
return FormatField(name, ">", "d")
|
||||
def LFloat64(name):
|
||||
"""little endian, 64-bit IEEE floating point number"""
|
||||
return FormatField(name, "<", "d")
|
||||
def NFloat64(name):
|
||||
"""native endianity, 64-bit IEEE floating point number"""
|
||||
return FormatField(name, "=", "d")
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# arrays
|
||||
#===============================================================================
|
||||
def Array(count, subcon):
|
||||
"""
|
||||
Repeats the given unit a fixed number of times.
|
||||
|
||||
:param int count: number of times to repeat
|
||||
:param ``Construct`` subcon: construct to repeat
|
||||
|
||||
>>> c = Array(4, UBInt8("foo"))
|
||||
>>> c.parse("\\x01\\x02\\x03\\x04")
|
||||
[1, 2, 3, 4]
|
||||
>>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06")
|
||||
[1, 2, 3, 4]
|
||||
>>> c.build([5,6,7,8])
|
||||
'\\x05\\x06\\x07\\x08'
|
||||
>>> c.build([5,6,7,8,9])
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
construct.core.RangeError: expected 4..4, found 5
|
||||
"""
|
||||
|
||||
if callable(count):
|
||||
con = MetaArray(count, subcon)
|
||||
else:
|
||||
con = MetaArray(lambda ctx: count, subcon)
|
||||
con._clear_flag(con.FLAG_DYNAMIC)
|
||||
return con
|
||||
|
||||
def PrefixedArray(subcon, length_field = UBInt8("length")):
|
||||
"""an array prefixed by a length field.
|
||||
* subcon - the subcon to be repeated
|
||||
* length_field - a construct returning an integer
|
||||
"""
|
||||
return LengthValueAdapter(
|
||||
Sequence(subcon.name,
|
||||
length_field,
|
||||
Array(lambda ctx: ctx[length_field.name], subcon),
|
||||
nested = False
|
||||
)
|
||||
)
|
||||
|
||||
def OpenRange(mincount, subcon):
|
||||
from sys import maxsize
|
||||
return Range(mincount, maxsize, subcon)
|
||||
|
||||
def GreedyRange(subcon):
|
||||
"""
|
||||
Repeats the given unit one or more times.
|
||||
|
||||
:param ``Construct`` subcon: construct to repeat
|
||||
|
||||
>>> from construct import GreedyRange, UBInt8
|
||||
>>> c = GreedyRange(UBInt8("foo"))
|
||||
>>> c.parse("\\x01")
|
||||
[1]
|
||||
>>> c.parse("\\x01\\x02\\x03")
|
||||
[1, 2, 3]
|
||||
>>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06")
|
||||
[1, 2, 3, 4, 5, 6]
|
||||
>>> c.parse("")
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
construct.core.RangeError: expected 1..2147483647, found 0
|
||||
>>> c.build([1,2])
|
||||
'\\x01\\x02'
|
||||
>>> c.build([])
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
construct.core.RangeError: expected 1..2147483647, found 0
|
||||
"""
|
||||
|
||||
return OpenRange(1, subcon)
|
||||
|
||||
def OptionalGreedyRange(subcon):
|
||||
"""
|
||||
Repeats the given unit zero or more times. This repeater can't
|
||||
fail, as it accepts lists of any length.
|
||||
|
||||
:param ``Construct`` subcon: construct to repeat
|
||||
|
||||
>>> from construct import OptionalGreedyRange, UBInt8
|
||||
>>> c = OptionalGreedyRange(UBInt8("foo"))
|
||||
>>> c.parse("")
|
||||
[]
|
||||
>>> c.parse("\\x01\\x02")
|
||||
[1, 2]
|
||||
>>> c.build([])
|
||||
''
|
||||
>>> c.build([1,2])
|
||||
'\\x01\\x02'
|
||||
"""
|
||||
|
||||
return OpenRange(0, subcon)
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# subconstructs
|
||||
#===============================================================================
|
||||
def Optional(subcon):
|
||||
"""an optional construct. if parsing fails, returns None.
|
||||
* subcon - the subcon to optionally parse or build
|
||||
"""
|
||||
return Select(subcon.name, subcon, Pass)
|
||||
|
||||
def Bitwise(subcon):
|
||||
"""converts the stream to bits, and passes the bitstream to subcon
|
||||
* subcon - a bitwise construct (usually BitField)
|
||||
"""
|
||||
# subcons larger than MAX_BUFFER will be wrapped by Restream instead
|
||||
# of Buffered. implementation details, don't stick your nose in :)
|
||||
MAX_BUFFER = 1024 * 8
|
||||
def resizer(length):
|
||||
if length & 7:
|
||||
raise SizeofError("size must be a multiple of 8", length)
|
||||
return length >> 3
|
||||
if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER:
|
||||
con = Buffered(subcon,
|
||||
encoder = decode_bin,
|
||||
decoder = encode_bin,
|
||||
resizer = resizer
|
||||
)
|
||||
else:
|
||||
con = Restream(subcon,
|
||||
stream_reader = BitStreamReader,
|
||||
stream_writer = BitStreamWriter,
|
||||
resizer = resizer)
|
||||
return con
|
||||
|
||||
def Aligned(subcon, modulus = 4, pattern = b"\x00"):
|
||||
r"""aligns subcon to modulus boundary using padding pattern
|
||||
* subcon - the subcon to align
|
||||
* modulus - the modulus boundary (default is 4)
|
||||
* pattern - the padding pattern (default is \x00)
|
||||
"""
|
||||
if modulus < 2:
|
||||
raise ValueError("modulus must be >= 2", modulus)
|
||||
def padlength(ctx):
|
||||
return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus
|
||||
return SeqOfOne(subcon.name,
|
||||
subcon,
|
||||
# ??????
|
||||
# ??????
|
||||
# ??????
|
||||
# ??????
|
||||
Padding(padlength, pattern = pattern),
|
||||
nested = False,
|
||||
)
|
||||
|
||||
def SeqOfOne(name, *args, **kw):
|
||||
"""a sequence of one element. only the first element is meaningful, the
|
||||
rest are discarded
|
||||
* name - the name of the sequence
|
||||
* args - subconstructs
|
||||
* kw - any keyword arguments to Sequence
|
||||
"""
|
||||
return IndexingAdapter(Sequence(name, *args, **kw), index = 0)
|
||||
|
||||
def Embedded(subcon):
|
||||
"""embeds a struct into the enclosing struct.
|
||||
* subcon - the struct to embed
|
||||
"""
|
||||
return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED)
|
||||
|
||||
def Rename(newname, subcon):
|
||||
"""renames an existing construct
|
||||
* newname - the new name
|
||||
* subcon - the subcon to rename
|
||||
"""
|
||||
return Reconfig(newname, subcon)
|
||||
|
||||
def Alias(newname, oldname):
|
||||
"""creates an alias for an existing element in a struct
|
||||
* newname - the new name
|
||||
* oldname - the name of an existing element
|
||||
"""
|
||||
return Value(newname, lambda ctx: ctx[oldname])
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# mapping
|
||||
#===============================================================================
|
||||
def SymmetricMapping(subcon, mapping, default = NotImplemented):
|
||||
"""defines a symmetrical mapping: a->b, b->a.
|
||||
* subcon - the subcon to map
|
||||
* mapping - the encoding mapping (a dict); the decoding mapping is
|
||||
achieved by reversing this mapping
|
||||
* default - the default value to use when no mapping is found. if no
|
||||
default value is given, and exception is raised. setting to Pass would
|
||||
return the value "as is" (unmapped)
|
||||
"""
|
||||
reversed_mapping = dict((v, k) for k, v in mapping.items())
|
||||
return MappingAdapter(subcon,
|
||||
encoding = mapping,
|
||||
decoding = reversed_mapping,
|
||||
encdefault = default,
|
||||
decdefault = default,
|
||||
)
|
||||
|
||||
def Enum(subcon, **kw):
|
||||
"""a set of named values mapping.
|
||||
* subcon - the subcon to map
|
||||
* kw - keyword arguments which serve as the encoding mapping
|
||||
* _default_ - an optional, keyword-only argument that specifies the
|
||||
default value to use when the mapping is undefined. if not given,
|
||||
and exception is raised when the mapping is undefined. use `Pass` to
|
||||
pass the unmapped value as-is
|
||||
"""
|
||||
return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented))
|
||||
|
||||
def FlagsEnum(subcon, **kw):
|
||||
"""a set of flag values mapping.
|
||||
* subcon - the subcon to map
|
||||
* kw - keyword arguments which serve as the encoding mapping
|
||||
"""
|
||||
return FlagsAdapter(subcon, kw)
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# structs
|
||||
#===============================================================================
|
||||
def AlignedStruct(name, *subcons, **kw):
|
||||
"""a struct of aligned fields
|
||||
* name - the name of the struct
|
||||
* subcons - the subcons that make up this structure
|
||||
* kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern'
|
||||
"""
|
||||
return Struct(name, *(Aligned(sc, **kw) for sc in subcons))
|
||||
|
||||
def BitStruct(name, *subcons):
|
||||
"""a struct of bitwise fields
|
||||
* name - the name of the struct
|
||||
* subcons - the subcons that make up this structure
|
||||
"""
|
||||
return Bitwise(Struct(name, *subcons))
|
||||
|
||||
def EmbeddedBitStruct(*subcons):
|
||||
"""an embedded BitStruct. no name is necessary.
|
||||
* subcons - the subcons that make up this structure
|
||||
"""
|
||||
return Bitwise(Embedded(Struct(None, *subcons)))
|
||||
|
||||
#===============================================================================
|
||||
# strings
|
||||
#===============================================================================
|
||||
def String(name, length, encoding=None, padchar=None, paddir="right",
|
||||
trimdir="right"):
|
||||
"""
|
||||
A configurable, fixed-length string field.
|
||||
|
||||
The padding character must be specified for padding and trimming to work.
|
||||
|
||||
:param str name: name
|
||||
:param int length: length, in bytes
|
||||
:param str encoding: encoding (e.g. "utf8") or None for no encoding
|
||||
:param str padchar: optional character to pad out strings
|
||||
:param str paddir: direction to pad out strings; one of "right", "left",
|
||||
or "both"
|
||||
:param str trim: direction to trim strings; one of "right", "left"
|
||||
|
||||
>>> from construct import String
|
||||
>>> String("foo", 5).parse("hello")
|
||||
'hello'
|
||||
>>>
|
||||
>>> String("foo", 12, encoding = "utf8").parse("hello joh\\xd4\\x83n")
|
||||
u'hello joh\\u0503n'
|
||||
>>>
|
||||
>>> foo = String("foo", 10, padchar = "X", paddir = "right")
|
||||
>>> foo.parse("helloXXXXX")
|
||||
'hello'
|
||||
>>> foo.build("hello")
|
||||
'helloXXXXX'
|
||||
"""
|
||||
|
||||
con = StringAdapter(Field(name, length), encoding=encoding)
|
||||
if padchar is not None:
|
||||
con = PaddedStringAdapter(con, padchar=padchar, paddir=paddir,
|
||||
trimdir=trimdir)
|
||||
return con
|
||||
|
||||
def PascalString(name, length_field=UBInt8("length"), encoding=None):
|
||||
"""
|
||||
A length-prefixed string.
|
||||
|
||||
``PascalString`` is named after the string types of Pascal, which are
|
||||
length-prefixed. Lisp strings also follow this convention.
|
||||
|
||||
The length field will appear in the same ``Container`` as the
|
||||
``PascalString``, with the given name.
|
||||
|
||||
:param str name: name
|
||||
:param ``Construct`` length_field: a field which will store the length of
|
||||
the string
|
||||
:param str encoding: encoding (e.g. "utf8") or None for no encoding
|
||||
|
||||
>>> foo = PascalString("foo")
|
||||
>>> foo.parse("\\x05hello")
|
||||
'hello'
|
||||
>>> foo.build("hello world")
|
||||
'\\x0bhello world'
|
||||
>>>
|
||||
>>> foo = PascalString("foo", length_field = UBInt16("length"))
|
||||
>>> foo.parse("\\x00\\x05hello")
|
||||
'hello'
|
||||
>>> foo.build("hello")
|
||||
'\\x00\\x05hello'
|
||||
"""
|
||||
|
||||
return StringAdapter(
|
||||
LengthValueAdapter(
|
||||
Sequence(name,
|
||||
length_field,
|
||||
Field("data", lambda ctx: ctx[length_field.name]),
|
||||
)
|
||||
),
|
||||
encoding=encoding,
|
||||
)
|
||||
|
||||
def CString(name, terminators=b"\x00", encoding=None,
|
||||
char_field=Field(None, 1)):
|
||||
"""
|
||||
A string ending in a terminator.
|
||||
|
||||
``CString`` is similar to the strings of C, C++, and other related
|
||||
programming languages.
|
||||
|
||||
By default, the terminator is the NULL byte (b``0x00``).
|
||||
|
||||
:param str name: name
|
||||
:param iterable terminators: sequence of valid terminators, in order of
|
||||
preference
|
||||
:param str encoding: encoding (e.g. "utf8") or None for no encoding
|
||||
:param ``Construct`` char_field: construct representing a single character
|
||||
|
||||
>>> foo = CString("foo")
|
||||
>>> foo.parse(b"hello\\x00")
|
||||
b'hello'
|
||||
>>> foo.build(b"hello")
|
||||
b'hello\\x00'
|
||||
>>> foo = CString("foo", terminators = b"XYZ")
|
||||
>>> foo.parse(b"helloX")
|
||||
b'hello'
|
||||
>>> foo.parse(b"helloY")
|
||||
b'hello'
|
||||
>>> foo.parse(b"helloZ")
|
||||
b'hello'
|
||||
>>> foo.build(b"hello")
|
||||
b'helloX'
|
||||
"""
|
||||
|
||||
return Rename(name,
|
||||
CStringAdapter(
|
||||
RepeatUntil(lambda obj, ctx: obj in terminators, char_field),
|
||||
terminators=terminators,
|
||||
encoding=encoding,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# conditional
|
||||
#===============================================================================
|
||||
def IfThenElse(name, predicate, then_subcon, else_subcon):
|
||||
"""an if-then-else conditional construct: if the predicate indicates True,
|
||||
`then_subcon` will be used; otherwise `else_subcon`
|
||||
* name - the name of the construct
|
||||
* predicate - a function taking the context as an argument and returning
|
||||
True or False
|
||||
* then_subcon - the subcon that will be used if the predicate returns True
|
||||
* else_subcon - the subcon that will be used if the predicate returns False
|
||||
"""
|
||||
return Switch(name, lambda ctx: bool(predicate(ctx)),
|
||||
{
|
||||
True : then_subcon,
|
||||
False : else_subcon,
|
||||
}
|
||||
)
|
||||
|
||||
def If(predicate, subcon, elsevalue = None):
|
||||
"""an if-then conditional construct: if the predicate indicates True,
|
||||
subcon will be used; otherwise, `elsevalue` will be returned instead.
|
||||
* predicate - a function taking the context as an argument and returning
|
||||
True or False
|
||||
* subcon - the subcon that will be used if the predicate returns True
|
||||
* elsevalue - the value that will be used should the predicate return False.
|
||||
by default this value is None.
|
||||
"""
|
||||
return IfThenElse(subcon.name,
|
||||
predicate,
|
||||
subcon,
|
||||
Value("elsevalue", lambda ctx: elsevalue)
|
||||
)
|
||||
|
||||
|
||||
#===============================================================================
|
||||
# misc
|
||||
#===============================================================================
|
||||
def OnDemandPointer(offsetfunc, subcon, force_build = True):
|
||||
"""an on-demand pointer.
|
||||
* offsetfunc - a function taking the context as an argument and returning
|
||||
the absolute stream position
|
||||
* subcon - the subcon that will be parsed from the `offsetfunc()` stream
|
||||
position on demand
|
||||
* force_build - see OnDemand. by default True.
|
||||
"""
|
||||
return OnDemand(Pointer(offsetfunc, subcon),
|
||||
advance_stream = False,
|
||||
force_build = force_build
|
||||
)
|
||||
|
||||
def Magic(data):
|
||||
return ConstAdapter(Field(None, len(data)), data)
|
||||
@@ -1,79 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/abbrevtable.py
|
||||
#
|
||||
# DWARF abbreviation table
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..common.utils import struct_parse, dwarf_assert
|
||||
|
||||
|
||||
class AbbrevTable(object):
|
||||
""" Represents a DWARF abbreviation table.
|
||||
"""
|
||||
def __init__(self, structs, stream, offset):
|
||||
""" Create new abbreviation table. Parses the actual table from the
|
||||
stream and stores it internally.
|
||||
|
||||
structs:
|
||||
A DWARFStructs instance for parsing the data
|
||||
|
||||
stream, offset:
|
||||
The stream and offset into the stream where this abbreviation
|
||||
table lives.
|
||||
"""
|
||||
self.structs = structs
|
||||
self.stream = stream
|
||||
self.offset = offset
|
||||
|
||||
self._abbrev_map = self._parse_abbrev_table()
|
||||
|
||||
def get_abbrev(self, code):
|
||||
""" Get the AbbrevDecl for a given code. Raise KeyError if no
|
||||
declaration for this code exists.
|
||||
"""
|
||||
return self._abbrev_map[code]
|
||||
|
||||
def _parse_abbrev_table(self):
|
||||
""" Parse the abbrev table from the stream
|
||||
"""
|
||||
map = {}
|
||||
self.stream.seek(self.offset)
|
||||
while True:
|
||||
decl_code = struct_parse(
|
||||
struct=self.structs.Dwarf_uleb128(''),
|
||||
stream=self.stream)
|
||||
if decl_code == 0:
|
||||
break
|
||||
declaration = struct_parse(
|
||||
struct=self.structs.Dwarf_abbrev_declaration,
|
||||
stream=self.stream)
|
||||
map[decl_code] = AbbrevDecl(decl_code, declaration)
|
||||
return map
|
||||
|
||||
|
||||
class AbbrevDecl(object):
|
||||
""" Wraps a parsed abbreviation declaration, exposing its fields with
|
||||
dict-like access, and adding some convenience methods.
|
||||
|
||||
The abbreviation declaration represents an "entry" that points to it.
|
||||
"""
|
||||
def __init__(self, code, decl):
|
||||
self.code = code
|
||||
self.decl = decl
|
||||
|
||||
def has_children(self):
|
||||
""" Does the entry have children?
|
||||
"""
|
||||
return self['children_flag'] == 'DW_CHILDREN_yes'
|
||||
|
||||
def iter_attr_specs(self):
|
||||
""" Iterate over the attribute specifications for the entry. Yield
|
||||
(name, form) pairs.
|
||||
"""
|
||||
for attr_spec in self['attr_spec']:
|
||||
yield attr_spec.name, attr_spec.form
|
||||
|
||||
def __getitem__(self, entry):
|
||||
return self.decl[entry]
|
||||
@@ -1,113 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/aranges.py
|
||||
#
|
||||
# DWARF aranges section decoding (.debug_aranges)
|
||||
#
|
||||
# Dorothy Chen (dorothchen@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import os
|
||||
from collections import namedtuple
|
||||
from ..common.utils import struct_parse
|
||||
from bisect import bisect_right
|
||||
import math
|
||||
|
||||
# An entry in the aranges table;
|
||||
# begin_addr: The beginning address in the CU
|
||||
# length: The length of the address range in this entry
|
||||
# info_offset: The CU's offset into .debug_info
|
||||
# see 6.1.2 in DWARF4 docs for explanation of the remaining fields
|
||||
ARangeEntry = namedtuple('ARangeEntry',
|
||||
'begin_addr length info_offset unit_length version address_size segment_size')
|
||||
|
||||
class ARanges(object):
|
||||
""" ARanges table in DWARF
|
||||
|
||||
stream, size:
|
||||
A stream holding the .debug_aranges section, and its size
|
||||
|
||||
structs:
|
||||
A DWARFStructs instance for parsing the data
|
||||
"""
|
||||
def __init__(self, stream, size, structs):
|
||||
self.stream = stream
|
||||
self.size = size
|
||||
self.structs = structs
|
||||
|
||||
# Get entries of aranges table in the form of ARangeEntry tuples
|
||||
self.entries = self._get_entries()
|
||||
|
||||
# Sort entries by the beginning address
|
||||
self.entries.sort(key=lambda entry: entry.begin_addr)
|
||||
|
||||
# Create list of keys (first addresses) for better searching
|
||||
self.keys = [entry.begin_addr for entry in self.entries]
|
||||
|
||||
|
||||
def cu_offset_at_addr(self, addr):
|
||||
""" Given an address, get the offset of the CU it belongs to, where
|
||||
'offset' refers to the offset in the .debug_info section.
|
||||
"""
|
||||
tup = self.entries[bisect_right(self.keys, addr) - 1]
|
||||
if tup.begin_addr <= addr < tup.begin_addr + tup.length:
|
||||
return tup.info_offset
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
#------ PRIVATE ------#
|
||||
def _get_entries(self):
|
||||
""" Populate self.entries with ARangeEntry tuples for each range of addresses
|
||||
"""
|
||||
self.stream.seek(0)
|
||||
entries = []
|
||||
offset = 0
|
||||
|
||||
# one loop == one "set" == one CU
|
||||
while offset < self.size :
|
||||
aranges_header = struct_parse(self.structs.Dwarf_aranges_header,
|
||||
self.stream, offset)
|
||||
addr_size = self._get_addr_size_struct(aranges_header["address_size"])
|
||||
|
||||
# No segmentation
|
||||
if aranges_header["segment_size"] == 0:
|
||||
# pad to nearest multiple of tuple size
|
||||
tuple_size = aranges_header["address_size"] * 2
|
||||
fp = self.stream.tell()
|
||||
seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size)
|
||||
self.stream.seek(seek_to)
|
||||
|
||||
# entries in this set/CU
|
||||
addr = struct_parse(addr_size('addr'), self.stream)
|
||||
length = struct_parse(addr_size('length'), self.stream)
|
||||
while addr != 0 or length != 0:
|
||||
# 'begin_addr length info_offset version address_size segment_size'
|
||||
entries.append(
|
||||
ARangeEntry(begin_addr=addr,
|
||||
length=length,
|
||||
info_offset=aranges_header["debug_info_offset"],
|
||||
unit_length=aranges_header["unit_length"],
|
||||
version=aranges_header["version"],
|
||||
address_size=aranges_header["address_size"],
|
||||
segment_size=aranges_header["segment_size"]))
|
||||
addr = struct_parse(addr_size('addr'), self.stream)
|
||||
length = struct_parse(addr_size('length'), self.stream)
|
||||
# Segmentation exists in executable
|
||||
elif aranges_header["segment_size"] != 0:
|
||||
raise NotImplementedError("Segmentation not implemented")
|
||||
|
||||
offset = (offset
|
||||
+ aranges_header.unit_length
|
||||
+ self.structs.initial_length_field_size())
|
||||
|
||||
return entries
|
||||
|
||||
def _get_addr_size_struct(self, addr_header_value):
|
||||
""" Given this set's header value (int) for the address size,
|
||||
get the Construct representation of that size
|
||||
"""
|
||||
if addr_header_value == 4:
|
||||
return self.structs.Dwarf_uint32
|
||||
else:
|
||||
assert addr_header_value == 8
|
||||
return self.structs.Dwarf_uint64
|
||||
@@ -1,724 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/callframe.py
|
||||
#
|
||||
# DWARF call frame information
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import copy
|
||||
from collections import namedtuple
|
||||
from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
|
||||
from ..common.py3compat import iterbytes, iterkeys
|
||||
from ..construct import Struct, Switch
|
||||
from .enums import DW_EH_encoding_flags
|
||||
from .structs import DWARFStructs
|
||||
from .constants import *
|
||||
|
||||
|
||||
class CallFrameInfo(object):
|
||||
""" DWARF CFI (Call Frame Info)
|
||||
|
||||
Note that this also supports unwinding information as found in .eh_frame
|
||||
sections: its format differs slightly from the one in .debug_frame. See
|
||||
<http://www.airs.com/blog/archives/460>.
|
||||
|
||||
stream, size:
|
||||
A stream holding the .debug_frame section, and the size of the
|
||||
section in it.
|
||||
|
||||
address:
|
||||
Virtual address for this section. This is used to decode relative
|
||||
addresses.
|
||||
|
||||
base_structs:
|
||||
The structs to be used as the base for parsing this section.
|
||||
Eventually, each entry gets its own structs based on the initial
|
||||
length field it starts with. The address_size, however, is taken
|
||||
from base_structs. This appears to be a limitation of the DWARFv3
|
||||
standard, fixed in v4.
|
||||
A discussion I had on dwarf-discuss confirms this.
|
||||
So for DWARFv4 we'll take the address size from the CIE header,
|
||||
but for earlier versions will use the elfclass of the containing
|
||||
file; more sophisticated methods are used by libdwarf and others,
|
||||
such as guessing which CU contains which FDEs (based on their
|
||||
address ranges) and taking the address_size from those CUs.
|
||||
"""
|
||||
def __init__(self, stream, size, address, base_structs,
|
||||
for_eh_frame=False):
|
||||
self.stream = stream
|
||||
self.size = size
|
||||
self.address = address
|
||||
self.base_structs = base_structs
|
||||
self.entries = None
|
||||
|
||||
# Map between an offset in the stream and the entry object found at this
|
||||
# offset. Useful for assigning CIE to FDEs according to the CIE_pointer
|
||||
# header field which contains a stream offset.
|
||||
self._entry_cache = {}
|
||||
|
||||
# The .eh_frame and .debug_frame section use almost the same CFI
|
||||
# encoding, but there are tiny variations we need to handle during
|
||||
# parsing.
|
||||
self.for_eh_frame = for_eh_frame
|
||||
|
||||
def get_entries(self):
|
||||
""" Get a list of entries that constitute this CFI. The list consists
|
||||
of CIE or FDE objects, in the order of their appearance in the
|
||||
section.
|
||||
"""
|
||||
if self.entries is None:
|
||||
self.entries = self._parse_entries()
|
||||
return self.entries
|
||||
|
||||
#-------------------------
|
||||
|
||||
def _parse_entries(self):
|
||||
entries = []
|
||||
offset = 0
|
||||
while offset < self.size:
|
||||
entries.append(self._parse_entry_at(offset))
|
||||
offset = self.stream.tell()
|
||||
return entries
|
||||
|
||||
def _parse_entry_at(self, offset):
|
||||
""" Parse an entry from self.stream starting with the given offset.
|
||||
Return the entry object. self.stream will point right after the
|
||||
entry.
|
||||
"""
|
||||
if offset in self._entry_cache:
|
||||
return self._entry_cache[offset]
|
||||
|
||||
entry_length = struct_parse(
|
||||
self.base_structs.Dwarf_uint32(''), self.stream, offset)
|
||||
|
||||
if self.for_eh_frame and entry_length == 0:
|
||||
return ZERO(offset)
|
||||
|
||||
dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32
|
||||
|
||||
entry_structs = DWARFStructs(
|
||||
little_endian=self.base_structs.little_endian,
|
||||
dwarf_format=dwarf_format,
|
||||
address_size=self.base_structs.address_size)
|
||||
|
||||
# Read the next field to see whether this is a CIE or FDE
|
||||
CIE_id = struct_parse(
|
||||
entry_structs.Dwarf_offset(''), self.stream)
|
||||
|
||||
if self.for_eh_frame:
|
||||
is_CIE = CIE_id == 0
|
||||
else:
|
||||
is_CIE = (
|
||||
(dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or
|
||||
CIE_id == 0xFFFFFFFFFFFFFFFF)
|
||||
|
||||
# Parse the header, which goes up to and excluding the sequence of
|
||||
# instructions.
|
||||
if is_CIE:
|
||||
header_struct = (entry_structs.EH_CIE_header
|
||||
if self.for_eh_frame else
|
||||
entry_structs.Dwarf_CIE_header)
|
||||
header = struct_parse(
|
||||
header_struct, self.stream, offset)
|
||||
else:
|
||||
header = self._parse_fde_header(entry_structs, offset)
|
||||
|
||||
|
||||
# If this is DWARF version 4 or later, we can have a more precise
|
||||
# address size, read from the CIE header.
|
||||
if not self.for_eh_frame and entry_structs.dwarf_version >= 4:
|
||||
entry_structs = DWARFStructs(
|
||||
little_endian=entry_structs.little_endian,
|
||||
dwarf_format=entry_structs.dwarf_format,
|
||||
address_size=header.address_size)
|
||||
|
||||
# If the augmentation string is not empty, hope to find a length field
|
||||
# in order to skip the data specified augmentation.
|
||||
if is_CIE:
|
||||
aug_bytes, aug_dict = self._parse_cie_augmentation(
|
||||
header, entry_structs)
|
||||
else:
|
||||
cie = self._parse_cie_for_fde(offset, header, entry_structs)
|
||||
aug_bytes = self._read_augmentation_data(entry_structs)
|
||||
lsda_encoding = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit'])
|
||||
if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']:
|
||||
# parse LSDA pointer
|
||||
lsda_pointer = self._parse_lsda_pointer(entry_structs,
|
||||
self.stream.tell() - len(aug_bytes),
|
||||
lsda_encoding)
|
||||
else:
|
||||
lsda_pointer = None
|
||||
|
||||
# For convenience, compute the end offset for this entry
|
||||
end_offset = (
|
||||
offset + header.length +
|
||||
entry_structs.initial_length_field_size())
|
||||
|
||||
# At this point self.stream is at the start of the instruction list
|
||||
# for this entry
|
||||
instructions = self._parse_instructions(
|
||||
entry_structs, self.stream.tell(), end_offset)
|
||||
|
||||
if is_CIE:
|
||||
self._entry_cache[offset] = CIE(
|
||||
header=header, instructions=instructions, offset=offset,
|
||||
augmentation_dict=aug_dict,
|
||||
augmentation_bytes=aug_bytes,
|
||||
structs=entry_structs)
|
||||
|
||||
else: # FDE
|
||||
cie = self._parse_cie_for_fde(offset, header, entry_structs)
|
||||
self._entry_cache[offset] = FDE(
|
||||
header=header, instructions=instructions, offset=offset,
|
||||
structs=entry_structs, cie=cie,
|
||||
augmentation_bytes=aug_bytes,
|
||||
lsda_pointer=lsda_pointer,
|
||||
)
|
||||
return self._entry_cache[offset]
|
||||
|
||||
def _parse_instructions(self, structs, offset, end_offset):
|
||||
""" Parse a list of CFI instructions from self.stream, starting with
|
||||
the offset and until (not including) end_offset.
|
||||
Return a list of CallFrameInstruction objects.
|
||||
"""
|
||||
instructions = []
|
||||
while offset < end_offset:
|
||||
opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset)
|
||||
args = []
|
||||
|
||||
primary = opcode & _PRIMARY_MASK
|
||||
primary_arg = opcode & _PRIMARY_ARG_MASK
|
||||
if primary == DW_CFA_advance_loc:
|
||||
args = [primary_arg]
|
||||
elif primary == DW_CFA_offset:
|
||||
args = [
|
||||
primary_arg,
|
||||
struct_parse(structs.Dwarf_uleb128(''), self.stream)]
|
||||
elif primary == DW_CFA_restore:
|
||||
args = [primary_arg]
|
||||
# primary == 0 and real opcode is extended
|
||||
elif opcode in (DW_CFA_nop, DW_CFA_remember_state,
|
||||
DW_CFA_restore_state):
|
||||
args = []
|
||||
elif opcode == DW_CFA_set_loc:
|
||||
args = [
|
||||
struct_parse(structs.Dwarf_target_addr(''), self.stream)]
|
||||
elif opcode == DW_CFA_advance_loc1:
|
||||
args = [struct_parse(structs.Dwarf_uint8(''), self.stream)]
|
||||
elif opcode == DW_CFA_advance_loc2:
|
||||
args = [struct_parse(structs.Dwarf_uint16(''), self.stream)]
|
||||
elif opcode == DW_CFA_advance_loc4:
|
||||
args = [struct_parse(structs.Dwarf_uint32(''), self.stream)]
|
||||
elif opcode in (DW_CFA_offset_extended, DW_CFA_register,
|
||||
DW_CFA_def_cfa, DW_CFA_val_offset):
|
||||
args = [
|
||||
struct_parse(structs.Dwarf_uleb128(''), self.stream),
|
||||
struct_parse(structs.Dwarf_uleb128(''), self.stream)]
|
||||
elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined,
|
||||
DW_CFA_same_value, DW_CFA_def_cfa_register,
|
||||
DW_CFA_def_cfa_offset):
|
||||
args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
|
||||
elif opcode == DW_CFA_def_cfa_offset_sf:
|
||||
args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)]
|
||||
elif opcode == DW_CFA_def_cfa_expression:
|
||||
args = [struct_parse(
|
||||
structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
|
||||
elif opcode in (DW_CFA_expression, DW_CFA_val_expression):
|
||||
args = [
|
||||
struct_parse(structs.Dwarf_uleb128(''), self.stream),
|
||||
struct_parse(
|
||||
structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
|
||||
elif opcode in (DW_CFA_offset_extended_sf,
|
||||
DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf):
|
||||
args = [
|
||||
struct_parse(structs.Dwarf_uleb128(''), self.stream),
|
||||
struct_parse(structs.Dwarf_sleb128(''), self.stream)]
|
||||
elif opcode == DW_CFA_GNU_args_size:
|
||||
args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
|
||||
else:
|
||||
dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode)
|
||||
|
||||
instructions.append(CallFrameInstruction(opcode=opcode, args=args))
|
||||
offset = self.stream.tell()
|
||||
return instructions
|
||||
|
||||
def _parse_cie_for_fde(self, fde_offset, fde_header, entry_structs):
|
||||
""" Parse the CIE that corresponds to an FDE.
|
||||
"""
|
||||
# Determine the offset of the CIE that corresponds to this FDE
|
||||
if self.for_eh_frame:
|
||||
# CIE_pointer contains the offset for a reverse displacement from
|
||||
# the section offset of the CIE_pointer field itself (not from the
|
||||
# FDE header offset).
|
||||
cie_displacement = fde_header['CIE_pointer']
|
||||
cie_offset = (fde_offset + entry_structs.dwarf_format // 8
|
||||
- cie_displacement)
|
||||
else:
|
||||
cie_offset = fde_header['CIE_pointer']
|
||||
|
||||
# Then read it
|
||||
with preserve_stream_pos(self.stream):
|
||||
return self._parse_entry_at(cie_offset)
|
||||
|
||||
def _parse_cie_augmentation(self, header, entry_structs):
|
||||
""" Parse CIE augmentation data from the annotation string in `header`.
|
||||
|
||||
Return a tuple that contains 1) the augmentation data as a string
|
||||
(without the length field) and 2) the augmentation data as a dict.
|
||||
"""
|
||||
augmentation = header.get('augmentation')
|
||||
if not augmentation:
|
||||
return ('', {})
|
||||
|
||||
# Augmentation parsing works in minimal mode here: we need the length
|
||||
# field to be able to skip unhandled augmentation fields.
|
||||
assert augmentation.startswith(b'z'), (
|
||||
'Unhandled augmentation string: {}'.format(repr(augmentation)))
|
||||
|
||||
available_fields = {
|
||||
b'z': entry_structs.Dwarf_uleb128('length'),
|
||||
b'L': entry_structs.Dwarf_uint8('LSDA_encoding'),
|
||||
b'R': entry_structs.Dwarf_uint8('FDE_encoding'),
|
||||
b'S': True,
|
||||
b'P': Struct(
|
||||
'personality',
|
||||
entry_structs.Dwarf_uint8('encoding'),
|
||||
Switch('function', lambda ctx: ctx.encoding & 0x0f, {
|
||||
enc: fld_cons('function')
|
||||
for enc, fld_cons
|
||||
in self._eh_encoding_to_field(entry_structs).items()})),
|
||||
}
|
||||
|
||||
# Build the Struct we will be using to parse the augmentation data.
|
||||
# Stop as soon as we are not able to match the augmentation string.
|
||||
fields = []
|
||||
aug_dict = {}
|
||||
|
||||
for b in iterbytes(augmentation):
|
||||
try:
|
||||
fld = available_fields[b]
|
||||
except KeyError:
|
||||
break
|
||||
|
||||
if fld is True:
|
||||
aug_dict[fld] = True
|
||||
else:
|
||||
fields.append(fld)
|
||||
|
||||
# Read the augmentation twice: once with the Struct, once for the raw
|
||||
# bytes. Read the raw bytes last so we are sure we leave the stream
|
||||
# pointing right after the augmentation: the Struct may be incomplete
|
||||
# (missing trailing fields) due to an unknown char: see the KeyError
|
||||
# above.
|
||||
offset = self.stream.tell()
|
||||
struct = Struct('Augmentation_Data', *fields)
|
||||
aug_dict.update(struct_parse(struct, self.stream, offset))
|
||||
self.stream.seek(offset)
|
||||
aug_bytes = self._read_augmentation_data(entry_structs)
|
||||
return (aug_bytes, aug_dict)
|
||||
|
||||
def _read_augmentation_data(self, entry_structs):
|
||||
""" Read augmentation data.
|
||||
|
||||
This assumes that the augmentation string starts with 'z', i.e. that
|
||||
augmentation data is prefixed by a length field, which is not returned.
|
||||
"""
|
||||
if not self.for_eh_frame:
|
||||
return b''
|
||||
|
||||
augmentation_data_length = struct_parse(
|
||||
Struct('Dummy_Augmentation_Data',
|
||||
entry_structs.Dwarf_uleb128('length')),
|
||||
self.stream)['length']
|
||||
return self.stream.read(augmentation_data_length)
|
||||
|
||||
def _parse_lsda_pointer(self, structs, stream_offset, encoding):
|
||||
""" Parse bytes to get an LSDA pointer.
|
||||
|
||||
The basic encoding (lower four bits of the encoding) describes how the values are encoded in a CIE or an FDE.
|
||||
The modifier (upper four bits of the encoding) describes how the raw values, after decoded using a basic
|
||||
encoding, should be modified before using.
|
||||
|
||||
Ref: https://www.airs.com/blog/archives/460
|
||||
"""
|
||||
assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit']
|
||||
basic_encoding = encoding & 0x0f
|
||||
modifier = encoding & 0xf0
|
||||
|
||||
formats = self._eh_encoding_to_field(structs)
|
||||
|
||||
ptr = struct_parse(
|
||||
Struct('Augmentation_Data',
|
||||
formats[basic_encoding]('LSDA_pointer')),
|
||||
self.stream, stream_pos=stream_offset)['LSDA_pointer']
|
||||
|
||||
if modifier == DW_EH_encoding_flags['DW_EH_PE_absptr']:
|
||||
pass
|
||||
|
||||
elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
|
||||
ptr += self.address + stream_offset
|
||||
|
||||
else:
|
||||
assert False, 'Unsupported encoding modifier for LSDA pointer: {:#x}'.format(modifier)
|
||||
|
||||
return ptr
|
||||
|
||||
def _parse_fde_header(self, entry_structs, offset):
|
||||
""" Compute a struct to parse the header of the current FDE.
|
||||
"""
|
||||
if not self.for_eh_frame:
|
||||
return struct_parse(entry_structs.Dwarf_FDE_header, self.stream,
|
||||
offset)
|
||||
|
||||
fields = [entry_structs.Dwarf_initial_length('length'),
|
||||
entry_structs.Dwarf_offset('CIE_pointer')]
|
||||
|
||||
# Parse the couple of header fields that are always here so we can
|
||||
# fetch the corresponding CIE.
|
||||
minimal_header = struct_parse(Struct('eh_frame_minimal_header',
|
||||
*fields), self.stream, offset)
|
||||
cie = self._parse_cie_for_fde(offset, minimal_header, entry_structs)
|
||||
initial_location_offset = self.stream.tell()
|
||||
|
||||
# Try to parse the initial location. We need the initial location in
|
||||
# order to create a meaningful FDE, so assume it's there. Omission does
|
||||
# not seem to happen in practice.
|
||||
encoding = cie.augmentation_dict['FDE_encoding']
|
||||
assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit']
|
||||
basic_encoding = encoding & 0x0f
|
||||
encoding_modifier = encoding & 0xf0
|
||||
|
||||
# Depending on the specified encoding, complete the header Struct
|
||||
formats = self._eh_encoding_to_field(entry_structs)
|
||||
fields.append(formats[basic_encoding]('initial_location'))
|
||||
fields.append(formats[basic_encoding]('address_range'))
|
||||
|
||||
result = struct_parse(Struct('Dwarf_FDE_header', *fields),
|
||||
self.stream, offset)
|
||||
|
||||
if encoding_modifier == 0:
|
||||
pass
|
||||
|
||||
elif encoding_modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
|
||||
# Start address is relative to the address of the
|
||||
# "initial_location" field.
|
||||
result['initial_location'] += (
|
||||
self.address + initial_location_offset)
|
||||
else:
|
||||
assert False, 'Unsupported encoding: {:#x}'.format(encoding)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _eh_encoding_to_field(entry_structs):
|
||||
"""
|
||||
Return a mapping from basic encodings (DW_EH_encoding_flags) the
|
||||
corresponding field constructors (for instance
|
||||
entry_structs.Dwarf_uint32).
|
||||
"""
|
||||
return {
|
||||
DW_EH_encoding_flags['DW_EH_PE_absptr']:
|
||||
entry_structs.Dwarf_target_addr,
|
||||
DW_EH_encoding_flags['DW_EH_PE_uleb128']:
|
||||
entry_structs.Dwarf_uleb128,
|
||||
DW_EH_encoding_flags['DW_EH_PE_udata2']:
|
||||
entry_structs.Dwarf_uint16,
|
||||
DW_EH_encoding_flags['DW_EH_PE_udata4']:
|
||||
entry_structs.Dwarf_uint32,
|
||||
DW_EH_encoding_flags['DW_EH_PE_udata8']:
|
||||
entry_structs.Dwarf_uint64,
|
||||
|
||||
DW_EH_encoding_flags['DW_EH_PE_sleb128']:
|
||||
entry_structs.Dwarf_sleb128,
|
||||
DW_EH_encoding_flags['DW_EH_PE_sdata2']:
|
||||
entry_structs.Dwarf_int16,
|
||||
DW_EH_encoding_flags['DW_EH_PE_sdata4']:
|
||||
entry_structs.Dwarf_int32,
|
||||
DW_EH_encoding_flags['DW_EH_PE_sdata8']:
|
||||
entry_structs.Dwarf_int64,
|
||||
}
|
||||
|
||||
|
||||
def instruction_name(opcode):
|
||||
""" Given an opcode, return the instruction name.
|
||||
"""
|
||||
primary = opcode & _PRIMARY_MASK
|
||||
if primary == 0:
|
||||
return _OPCODE_NAME_MAP[opcode]
|
||||
else:
|
||||
return _OPCODE_NAME_MAP[primary]
|
||||
|
||||
|
||||
class CallFrameInstruction(object):
|
||||
""" An instruction in the CFI section. opcode is the instruction
|
||||
opcode, numeric - as it appears in the section. args is a list of
|
||||
arguments (including arguments embedded in the low bits of some
|
||||
instructions, when applicable), decoded from the stream.
|
||||
"""
|
||||
def __init__(self, opcode, args):
|
||||
self.opcode = opcode
|
||||
self.args = args
|
||||
|
||||
def __repr__(self):
|
||||
return '%s (0x%x): %s' % (
|
||||
instruction_name(self.opcode), self.opcode, self.args)
|
||||
|
||||
|
||||
class CFIEntry(object):
|
||||
""" A common base class for CFI entries.
|
||||
Contains a header and a list of instructions (CallFrameInstruction).
|
||||
offset: the offset of this entry from the beginning of the section
|
||||
cie: for FDEs, a CIE pointer is required
|
||||
augmentation_dict: Augmentation data as a parsed struct (dict): see
|
||||
CallFrameInfo._parse_cie_augmentation and
|
||||
http://www.airs.com/blog/archives/460.
|
||||
augmentation_bytes: Augmentation data as a chain of bytes: see
|
||||
CallFrameInfo._parse_cie_augmentation and
|
||||
http://www.airs.com/blog/archives/460.
|
||||
"""
|
||||
def __init__(self, header, structs, instructions, offset,
|
||||
augmentation_dict=None, augmentation_bytes=b'', cie=None):
|
||||
self.header = header
|
||||
self.structs = structs
|
||||
self.instructions = instructions
|
||||
self.offset = offset
|
||||
self.cie = cie
|
||||
self._decoded_table = None
|
||||
self.augmentation_dict = augmentation_dict if augmentation_dict else {}
|
||||
self.augmentation_bytes = augmentation_bytes
|
||||
|
||||
def get_decoded(self):
|
||||
""" Decode the CFI contained in this entry and return a
|
||||
DecodedCallFrameTable object representing it. See the documentation
|
||||
of that class to understand how to interpret the decoded table.
|
||||
"""
|
||||
if self._decoded_table is None:
|
||||
self._decoded_table = self._decode_CFI_table()
|
||||
return self._decoded_table
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to header entries
|
||||
"""
|
||||
return self.header[name]
|
||||
|
||||
def _decode_CFI_table(self):
|
||||
""" Decode the instructions contained in the given CFI entry and return
|
||||
a DecodedCallFrameTable.
|
||||
"""
|
||||
if isinstance(self, CIE):
|
||||
# For a CIE, initialize cur_line to an "empty" line
|
||||
cie = self
|
||||
cur_line = dict(pc=0, cfa=CFARule(reg=None, offset=0))
|
||||
reg_order = []
|
||||
else: # FDE
|
||||
# For a FDE, we need to decode the attached CIE first, because its
|
||||
# decoded table is needed. Its "initial instructions" describe a
|
||||
# line that serves as the base (first) line in the FDE's table.
|
||||
cie = self.cie
|
||||
cie_decoded_table = cie.get_decoded()
|
||||
if len(cie_decoded_table.table) > 0:
|
||||
last_line_in_CIE = copy.copy(cie_decoded_table.table[-1])
|
||||
cur_line = copy.copy(last_line_in_CIE)
|
||||
else:
|
||||
cur_line = dict(cfa=CFARule(reg=None, offset=0))
|
||||
cur_line['pc'] = self['initial_location']
|
||||
reg_order = copy.copy(cie_decoded_table.reg_order)
|
||||
|
||||
table = []
|
||||
|
||||
# Keeps a stack for the use of DW_CFA_{remember|restore}_state
|
||||
# instructions.
|
||||
line_stack = []
|
||||
|
||||
def _add_to_order(regnum):
|
||||
# DW_CFA_restore and others remove registers from cur_line,
|
||||
# but they stay in reg_order. Avoid duplicates.
|
||||
if regnum not in reg_order:
|
||||
reg_order.append(regnum)
|
||||
|
||||
for instr in self.instructions:
|
||||
# Throughout this loop, cur_line is the current line. Some
|
||||
# instructions add it to the table, but most instructions just
|
||||
# update it without adding it to the table.
|
||||
|
||||
name = instruction_name(instr.opcode)
|
||||
|
||||
if name == 'DW_CFA_set_loc':
|
||||
table.append(copy.copy(cur_line))
|
||||
cur_line['pc'] = instr.args[0]
|
||||
elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
|
||||
'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
|
||||
table.append(copy.copy(cur_line))
|
||||
cur_line['pc'] += instr.args[0] * cie['code_alignment_factor']
|
||||
elif name == 'DW_CFA_def_cfa':
|
||||
cur_line['cfa'] = CFARule(
|
||||
reg=instr.args[0],
|
||||
offset=instr.args[1])
|
||||
elif name == 'DW_CFA_def_cfa_sf':
|
||||
cur_line['cfa'] = CFARule(
|
||||
reg=instr.args[0],
|
||||
offset=instr.args[1] * cie['code_alignment_factor'])
|
||||
elif name == 'DW_CFA_def_cfa_register':
|
||||
cur_line['cfa'] = CFARule(
|
||||
reg=instr.args[0],
|
||||
offset=cur_line['cfa'].offset)
|
||||
elif name == 'DW_CFA_def_cfa_offset':
|
||||
cur_line['cfa'] = CFARule(
|
||||
reg=cur_line['cfa'].reg,
|
||||
offset=instr.args[0])
|
||||
elif name == 'DW_CFA_def_cfa_expression':
|
||||
cur_line['cfa'] = CFARule(expr=instr.args[0])
|
||||
elif name == 'DW_CFA_undefined':
|
||||
_add_to_order(instr.args[0])
|
||||
cur_line[instr.args[0]] = RegisterRule(RegisterRule.UNDEFINED)
|
||||
elif name == 'DW_CFA_same_value':
|
||||
_add_to_order(instr.args[0])
|
||||
cur_line[instr.args[0]] = RegisterRule(RegisterRule.SAME_VALUE)
|
||||
elif name in ( 'DW_CFA_offset', 'DW_CFA_offset_extended',
|
||||
'DW_CFA_offset_extended_sf'):
|
||||
_add_to_order(instr.args[0])
|
||||
cur_line[instr.args[0]] = RegisterRule(
|
||||
RegisterRule.OFFSET,
|
||||
instr.args[1] * cie['data_alignment_factor'])
|
||||
elif name in ('DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
|
||||
_add_to_order(instr.args[0])
|
||||
cur_line[instr.args[0]] = RegisterRule(
|
||||
RegisterRule.VAL_OFFSET,
|
||||
instr.args[1] * cie['data_alignment_factor'])
|
||||
elif name == 'DW_CFA_register':
|
||||
_add_to_order(instr.args[0])
|
||||
cur_line[instr.args[0]] = RegisterRule(
|
||||
RegisterRule.REGISTER,
|
||||
instr.args[1])
|
||||
elif name == 'DW_CFA_expression':
|
||||
_add_to_order(instr.args[0])
|
||||
cur_line[instr.args[0]] = RegisterRule(
|
||||
RegisterRule.EXPRESSION,
|
||||
instr.args[1])
|
||||
elif name == 'DW_CFA_val_expression':
|
||||
_add_to_order(instr.args[0])
|
||||
cur_line[instr.args[0]] = RegisterRule(
|
||||
RegisterRule.VAL_EXPRESSION,
|
||||
instr.args[1])
|
||||
elif name in ('DW_CFA_restore', 'DW_CFA_restore_extended'):
|
||||
_add_to_order(instr.args[0])
|
||||
dwarf_assert(
|
||||
isinstance(self, FDE),
|
||||
'%s instruction must be in a FDE' % name)
|
||||
if instr.args[0] in last_line_in_CIE:
|
||||
cur_line[instr.args[0]] = last_line_in_CIE[instr.args[0]]
|
||||
else:
|
||||
cur_line.pop(instr.args[0], None)
|
||||
elif name == 'DW_CFA_remember_state':
|
||||
line_stack.append(copy.deepcopy(cur_line))
|
||||
elif name == 'DW_CFA_restore_state':
|
||||
pc = cur_line['pc']
|
||||
cur_line = line_stack.pop()
|
||||
cur_line['pc'] = pc
|
||||
|
||||
# The current line is appended to the table after all instructions
|
||||
# have ended, if there were instructions.
|
||||
if cur_line['cfa'].reg is not None or len(cur_line) > 2:
|
||||
table.append(cur_line)
|
||||
|
||||
return DecodedCallFrameTable(table=table, reg_order=reg_order)
|
||||
|
||||
|
||||
# A CIE and FDE have exactly the same functionality, except that a FDE has
|
||||
# a pointer to its CIE. The functionality was wholly encapsulated in CFIEntry,
|
||||
# so the CIE and FDE classes exists separately for identification (instead
|
||||
# of having an explicit "entry_type" field in CFIEntry).
|
||||
#
|
||||
class CIE(CFIEntry):
|
||||
pass
|
||||
|
||||
|
||||
class FDE(CFIEntry):
|
||||
def __init__(self, header, structs, instructions, offset, augmentation_bytes=None, cie=None, lsda_pointer=None):
|
||||
super(FDE, self).__init__(header, structs, instructions, offset, augmentation_bytes=augmentation_bytes, cie=cie)
|
||||
self.lsda_pointer = lsda_pointer
|
||||
|
||||
|
||||
class ZERO(object):
|
||||
""" End marker for the sequence of CIE/FDE.
|
||||
|
||||
This is specific to `.eh_frame` sections: this kind of entry does not exist
|
||||
in pure DWARF. `readelf` displays these as "ZERO terminator", hence the
|
||||
class name.
|
||||
"""
|
||||
def __init__(self, offset):
|
||||
self.offset = offset
|
||||
|
||||
|
||||
class RegisterRule(object):
|
||||
""" Register rules are used to find registers in call frames. Each rule
|
||||
consists of a type (enumeration following DWARFv3 section 6.4.1)
|
||||
and an optional argument to augment the type.
|
||||
"""
|
||||
UNDEFINED = 'UNDEFINED'
|
||||
SAME_VALUE = 'SAME_VALUE'
|
||||
OFFSET = 'OFFSET'
|
||||
VAL_OFFSET = 'VAL_OFFSET'
|
||||
REGISTER = 'REGISTER'
|
||||
EXPRESSION = 'EXPRESSION'
|
||||
VAL_EXPRESSION = 'VAL_EXPRESSION'
|
||||
ARCHITECTURAL = 'ARCHITECTURAL'
|
||||
|
||||
def __init__(self, type, arg=None):
|
||||
self.type = type
|
||||
self.arg = arg
|
||||
|
||||
def __repr__(self):
|
||||
return 'RegisterRule(%s, %s)' % (self.type, self.arg)
|
||||
|
||||
|
||||
class CFARule(object):
|
||||
""" A CFA rule is used to compute the CFA for each location. It either
|
||||
consists of a register+offset, or a DWARF expression.
|
||||
"""
|
||||
def __init__(self, reg=None, offset=None, expr=None):
|
||||
self.reg = reg
|
||||
self.offset = offset
|
||||
self.expr = expr
|
||||
|
||||
def __repr__(self):
|
||||
return 'CFARule(reg=%s, offset=%s, expr=%s)' % (
|
||||
self.reg, self.offset, self.expr)
|
||||
|
||||
|
||||
# Represents the decoded CFI for an entry, which is just a large table,
|
||||
# according to DWARFv3 section 6.4.1
|
||||
#
|
||||
# DecodedCallFrameTable is a simple named tuple to group together the table
|
||||
# and the register appearance order.
|
||||
#
|
||||
# table:
|
||||
#
|
||||
# A list of dicts that represent "lines" in the decoded table. Each line has
|
||||
# some special dict entries: 'pc' for the location/program counter (LOC),
|
||||
# and 'cfa' for the CFARule to locate the CFA on that line.
|
||||
# The other entries are keyed by register numbers with RegisterRule values,
|
||||
# and describe the rules for these registers.
|
||||
#
|
||||
# reg_order:
|
||||
#
|
||||
# A list of register numbers that are described in the table by the order of
|
||||
# their appearance.
|
||||
#
|
||||
DecodedCallFrameTable = namedtuple(
|
||||
'DecodedCallFrameTable', 'table reg_order')
|
||||
|
||||
|
||||
#---------------- PRIVATE ----------------#
|
||||
|
||||
_PRIMARY_MASK = 0b11000000
|
||||
_PRIMARY_ARG_MASK = 0b00111111
|
||||
|
||||
# This dictionary is filled by automatically scanning the constants module
|
||||
# for DW_CFA_* instructions, and mapping their values to names. Since all
|
||||
# names were imported from constants with `import *`, we look in globals()
|
||||
_OPCODE_NAME_MAP = {}
|
||||
for name in list(iterkeys(globals())):
|
||||
if name.startswith('DW_CFA'):
|
||||
_OPCODE_NAME_MAP[globals()[name]] = name
|
||||
@@ -1,226 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/compileunit.py
|
||||
#
|
||||
# DWARF compile unit
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from bisect import bisect_right
|
||||
from .die import DIE
|
||||
from ..common.utils import dwarf_assert
|
||||
|
||||
|
||||
class CompileUnit(object):
|
||||
""" A DWARF compilation unit (CU).
|
||||
|
||||
A normal compilation unit typically represents the text and data
|
||||
contributed to an executable by a single relocatable object file.
|
||||
It may be derived from several source files,
|
||||
including pre-processed "include files"
|
||||
|
||||
Serves as a container and context to DIEs that describe objects and code
|
||||
belonging to a compilation unit.
|
||||
|
||||
CU header entries can be accessed as dict keys from this object, i.e.
|
||||
cu = CompileUnit(...)
|
||||
cu['version'] # version field of the CU header
|
||||
|
||||
To get the top-level DIE describing the compilation unit, call the
|
||||
get_top_DIE method.
|
||||
"""
|
||||
def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset):
|
||||
""" header:
|
||||
CU header for this compile unit
|
||||
|
||||
dwarfinfo:
|
||||
The DWARFInfo context object which created this one
|
||||
|
||||
structs:
|
||||
A DWARFStructs instance suitable for this compile unit
|
||||
|
||||
cu_offset:
|
||||
Offset in the stream to the beginning of this CU (its header)
|
||||
|
||||
cu_die_offset:
|
||||
Offset in the stream of the top DIE of this CU
|
||||
"""
|
||||
self.dwarfinfo = dwarfinfo
|
||||
self.header = header
|
||||
self.structs = structs
|
||||
self.cu_offset = cu_offset
|
||||
self.cu_die_offset = cu_die_offset
|
||||
|
||||
# The abbreviation table for this CU. Filled lazily when DIEs are
|
||||
# requested.
|
||||
self._abbrev_table = None
|
||||
|
||||
# A list of DIEs belonging to this CU.
|
||||
# This list is lazily constructed as DIEs are iterated over.
|
||||
self._dielist = []
|
||||
# A list of file offsets, corresponding (by index) to the DIEs
|
||||
# in `self._dielist`. This list exists separately from
|
||||
# `self._dielist` to make it binary searchable, enabling the
|
||||
# DIE population strategy used in `iter_DIE_children`.
|
||||
# Like `self._dielist`, this list is lazily constructed
|
||||
# as DIEs are iterated over.
|
||||
self._diemap = []
|
||||
|
||||
def dwarf_format(self):
|
||||
""" Get the DWARF format (32 or 64) for this CU
|
||||
"""
|
||||
return self.structs.dwarf_format
|
||||
|
||||
def get_abbrev_table(self):
|
||||
""" Get the abbreviation table (AbbrevTable object) for this CU
|
||||
"""
|
||||
if self._abbrev_table is None:
|
||||
self._abbrev_table = self.dwarfinfo.get_abbrev_table(
|
||||
self['debug_abbrev_offset'])
|
||||
return self._abbrev_table
|
||||
|
||||
def get_top_DIE(self):
|
||||
""" Get the top DIE (which is either a DW_TAG_compile_unit or
|
||||
DW_TAG_partial_unit) of this CU
|
||||
"""
|
||||
|
||||
# Note that a top DIE always has minimal offset and is therefore
|
||||
# at the beginning of our lists, so no bisect is required.
|
||||
if len(self._diemap) > 0:
|
||||
return self._dielist[0]
|
||||
|
||||
top = DIE(
|
||||
cu=self,
|
||||
stream=self.dwarfinfo.debug_info_sec.stream,
|
||||
offset=self.cu_die_offset)
|
||||
|
||||
self._dielist.insert(0, top)
|
||||
self._diemap.insert(0, self.cu_die_offset)
|
||||
|
||||
return top
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
return self['unit_length'] + self.structs.initial_length_field_size()
|
||||
|
||||
def get_DIE_from_refaddr(self, refaddr):
|
||||
""" Obtain a DIE contained in this CU from a reference.
|
||||
|
||||
refaddr:
|
||||
The offset into the .debug_info section, which must be
|
||||
contained in this CU or a DWARFError will be raised.
|
||||
|
||||
When using a reference class attribute with a form that is
|
||||
relative to the compile unit, add unit add the compile unit's
|
||||
.cu_addr before calling this function.
|
||||
"""
|
||||
# All DIEs are after the cu header and within the unit
|
||||
dwarf_assert(
|
||||
self.cu_die_offset <= refaddr < self.cu_offset + self.size,
|
||||
'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset))
|
||||
|
||||
return self._get_cached_DIE(refaddr)
|
||||
|
||||
def iter_DIEs(self):
|
||||
""" Iterate over all the DIEs in the CU, in order of their appearance.
|
||||
Note that null DIEs will also be returned.
|
||||
"""
|
||||
return self._iter_DIE_subtree(self.get_top_DIE())
|
||||
|
||||
def iter_DIE_children(self, die):
|
||||
""" Given a DIE, yields either its children, without null DIE list
|
||||
terminator, or nothing, if that DIE has no children.
|
||||
|
||||
The null DIE terminator is saved in that DIE when iteration ended.
|
||||
"""
|
||||
if not die.has_children:
|
||||
return
|
||||
|
||||
# `cur_offset` tracks the stream offset of the next DIE to yield
|
||||
# as we iterate over our children,
|
||||
cur_offset = die.offset + die.size
|
||||
|
||||
while True:
|
||||
child = self._get_cached_DIE(cur_offset)
|
||||
|
||||
child.set_parent(die)
|
||||
|
||||
if child.is_null():
|
||||
die._terminator = child
|
||||
return
|
||||
|
||||
yield child
|
||||
|
||||
if not child.has_children:
|
||||
cur_offset += child.size
|
||||
elif "DW_AT_sibling" in child.attributes:
|
||||
sibling = child.attributes["DW_AT_sibling"]
|
||||
cur_offset = sibling.value + self.cu_offset
|
||||
else:
|
||||
# If no DW_AT_sibling attribute is provided by the producer
|
||||
# then the whole child subtree must be parsed to find its next
|
||||
# sibling. There is one zero byte representing null DIE
|
||||
# terminating children list. It is used to locate child subtree
|
||||
# bounds.
|
||||
|
||||
# If children are not parsed yet, this instruction will manage
|
||||
# to recursive call of this function which will result in
|
||||
# setting of `_terminator` attribute of the `child`.
|
||||
if child._terminator is None:
|
||||
for _ in self.iter_DIE_children(child):
|
||||
pass
|
||||
|
||||
cur_offset = child._terminator.offset + child._terminator.size
|
||||
|
||||
#------ PRIVATE ------#
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to header entries
|
||||
"""
|
||||
return self.header[name]
|
||||
|
||||
def _iter_DIE_subtree(self, die):
|
||||
""" Given a DIE, this yields it with its subtree including null DIEs
|
||||
(child list terminators).
|
||||
"""
|
||||
yield die
|
||||
if die.has_children:
|
||||
for c in die.iter_children():
|
||||
for d in self._iter_DIE_subtree(c):
|
||||
yield d
|
||||
yield die._terminator
|
||||
|
||||
def _get_cached_DIE(self, offset):
|
||||
""" Given a DIE offset, look it up in the cache. If not present,
|
||||
parse the DIE and insert it into the cache.
|
||||
|
||||
offset:
|
||||
The offset of the DIE in the debug_info section to retrieve.
|
||||
|
||||
The stream reference is copied from the top DIE. The top die will
|
||||
also be parsed and cached if needed.
|
||||
|
||||
See also get_DIE_from_refaddr(self, refaddr).
|
||||
"""
|
||||
# The top die must be in the cache if any DIE is in the cache.
|
||||
# The stream is the same for all DIEs in this CU, so populate
|
||||
# the top DIE and obtain a reference to its stream.
|
||||
top_die_stream = self.get_top_DIE().stream
|
||||
|
||||
# `offset` is the offset in the stream of the DIE we want to return.
|
||||
# The map is maintined as a parallel array to the list. We call
|
||||
# bisect each time to ensure new DIEs are inserted in the correct
|
||||
# order within both `self._dielist` and `self._diemap`.
|
||||
i = bisect_right(self._diemap, offset)
|
||||
|
||||
# Note that `self._diemap` cannot be empty because a the top DIE
|
||||
# was inserted by the call to .get_top_DIE(). Also it has the minimal
|
||||
# offset, so the bisect_right insert point will always be at least 1.
|
||||
if offset == self._diemap[i - 1]:
|
||||
die = self._dielist[i - 1]
|
||||
else:
|
||||
die = DIE(cu=self, stream=top_die_stream, offset=offset)
|
||||
self._dielist.insert(i, die)
|
||||
self._diemap.insert(i, offset)
|
||||
|
||||
return die
|
||||
@@ -1,224 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/constants.py
|
||||
#
|
||||
# Constants and flags
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# Inline codes
|
||||
#
|
||||
DW_INL_not_inlined = 0
|
||||
DW_INL_inlined = 1
|
||||
DW_INL_declared_not_inlined = 2
|
||||
DW_INL_declared_inlined = 3
|
||||
|
||||
|
||||
# Source languages
|
||||
#
|
||||
DW_LANG_C89 = 0x0001
|
||||
DW_LANG_C = 0x0002
|
||||
DW_LANG_Ada83 = 0x0003
|
||||
DW_LANG_C_plus_plus = 0x0004
|
||||
DW_LANG_Cobol74 = 0x0005
|
||||
DW_LANG_Cobol85 = 0x0006
|
||||
DW_LANG_Fortran77 = 0x0007
|
||||
DW_LANG_Fortran90 = 0x0008
|
||||
DW_LANG_Pascal83 = 0x0009
|
||||
DW_LANG_Modula2 = 0x000a
|
||||
DW_LANG_Java = 0x000b
|
||||
DW_LANG_C99 = 0x000c
|
||||
DW_LANG_Ada95 = 0x000d
|
||||
DW_LANG_Fortran95 = 0x000e
|
||||
DW_LANG_PLI = 0x000f
|
||||
DW_LANG_ObjC = 0x0010
|
||||
DW_LANG_ObjC_plus_plus = 0x0011
|
||||
DW_LANG_UPC = 0x0012
|
||||
DW_LANG_D = 0x0013
|
||||
DW_LANG_Python = 0x0014
|
||||
DW_LANG_OpenCL = 0x0015
|
||||
DW_LANG_Go = 0x0016
|
||||
DW_LANG_Modula3 = 0x0017
|
||||
DW_LANG_Haskell = 0x0018
|
||||
DW_LANG_C_plus_plus_03 = 0x0019
|
||||
DW_LANG_C_plus_plus_11 = 0x001a
|
||||
DW_LANG_OCaml = 0x001b
|
||||
DW_LANG_Rust = 0x001c
|
||||
DW_LANG_C11 = 0x001d
|
||||
DW_LANG_Swift = 0x001e
|
||||
DW_LANG_Julia = 0x001f
|
||||
DW_LANG_Dylan = 0x0020
|
||||
DW_LANG_C_plus_plus_14 = 0x0021
|
||||
DW_LANG_Fortran03 = 0x0022
|
||||
DW_LANG_Fortran08 = 0x0023
|
||||
DW_LANG_RenderScript = 0x0024
|
||||
DW_LANG_BLISS = 0x0025
|
||||
DW_LANG_Mips_Assembler = 0x8001
|
||||
DW_LANG_Upc = 0x8765
|
||||
DW_LANG_HP_Bliss = 0x8003
|
||||
DW_LANG_HP_Basic91 = 0x8004
|
||||
DW_LANG_HP_Pascal91 = 0x8005
|
||||
DW_LANG_HP_IMacro = 0x8006
|
||||
DW_LANG_HP_Assembler = 0x8007
|
||||
DW_LANG_GOOGLE_RenderScript = 0x8e57
|
||||
DW_LANG_BORLAND_Delphi = 0xb000
|
||||
|
||||
|
||||
# Encoding
|
||||
#
|
||||
DW_ATE_void = 0x0
|
||||
DW_ATE_address = 0x1
|
||||
DW_ATE_boolean = 0x2
|
||||
DW_ATE_complex_float = 0x3
|
||||
DW_ATE_float = 0x4
|
||||
DW_ATE_signed = 0x5
|
||||
DW_ATE_signed_char = 0x6
|
||||
DW_ATE_unsigned = 0x7
|
||||
DW_ATE_unsigned_char = 0x8
|
||||
DW_ATE_imaginary_float = 0x9
|
||||
DW_ATE_packed_decimal = 0xa
|
||||
DW_ATE_numeric_string = 0xb
|
||||
DW_ATE_edited = 0xc
|
||||
DW_ATE_signed_fixed = 0xd
|
||||
DW_ATE_unsigned_fixed = 0xe
|
||||
DW_ATE_decimal_float = 0xf
|
||||
DW_ATE_UTF = 0x10
|
||||
DW_ATE_UCS = 0x11
|
||||
DW_ATE_ASCII = 0x12
|
||||
DW_ATE_lo_user = 0x80
|
||||
DW_ATE_hi_user = 0xff
|
||||
DW_ATE_HP_float80 = 0x80
|
||||
DW_ATE_HP_complex_float80 = 0x81
|
||||
DW_ATE_HP_float128 = 0x82
|
||||
DW_ATE_HP_complex_float128 = 0x83
|
||||
DW_ATE_HP_floathpintel = 0x84
|
||||
DW_ATE_HP_imaginary_float80 = 0x85
|
||||
DW_ATE_HP_imaginary_float128 = 0x86
|
||||
|
||||
|
||||
# Access
|
||||
#
|
||||
DW_ACCESS_public = 1
|
||||
DW_ACCESS_protected = 2
|
||||
DW_ACCESS_private = 3
|
||||
|
||||
|
||||
# Visibility
|
||||
#
|
||||
DW_VIS_local = 1
|
||||
DW_VIS_exported = 2
|
||||
DW_VIS_qualified = 3
|
||||
|
||||
|
||||
# Virtuality
|
||||
#
|
||||
DW_VIRTUALITY_none = 0
|
||||
DW_VIRTUALITY_virtual = 1
|
||||
DW_VIRTUALITY_pure_virtual = 2
|
||||
|
||||
|
||||
# ID case
|
||||
#
|
||||
DW_ID_case_sensitive = 0
|
||||
DW_ID_up_case = 1
|
||||
DW_ID_down_case = 2
|
||||
DW_ID_case_insensitive = 3
|
||||
|
||||
|
||||
# Calling convention
|
||||
#
|
||||
DW_CC_normal = 0x1
|
||||
DW_CC_program = 0x2
|
||||
DW_CC_nocall = 0x3
|
||||
|
||||
|
||||
# Ordering
|
||||
#
|
||||
DW_ORD_row_major = 0
|
||||
DW_ORD_col_major = 1
|
||||
|
||||
|
||||
# Line program opcodes
|
||||
#
|
||||
DW_LNS_copy = 0x01
|
||||
DW_LNS_advance_pc = 0x02
|
||||
DW_LNS_advance_line = 0x03
|
||||
DW_LNS_set_file = 0x04
|
||||
DW_LNS_set_column = 0x05
|
||||
DW_LNS_negate_stmt = 0x06
|
||||
DW_LNS_set_basic_block = 0x07
|
||||
DW_LNS_const_add_pc = 0x08
|
||||
DW_LNS_fixed_advance_pc = 0x09
|
||||
DW_LNS_set_prologue_end = 0x0a
|
||||
DW_LNS_set_epilogue_begin = 0x0b
|
||||
DW_LNS_set_isa = 0x0c
|
||||
DW_LNE_end_sequence = 0x01
|
||||
DW_LNE_set_address = 0x02
|
||||
DW_LNE_define_file = 0x03
|
||||
DW_LNE_set_discriminator = 0x04
|
||||
DW_LNE_lo_user = 0x80
|
||||
DW_LNE_hi_user = 0xff
|
||||
|
||||
# Line program header content types
|
||||
#
|
||||
DW_LNCT_path = 0x01
|
||||
DW_LNCT_directory_index = 0x02
|
||||
DW_LNCT_timestamp = 0x03
|
||||
DW_LNCT_size = 0x04
|
||||
DW_LNCT_MD5 = 0x05
|
||||
DW_LNCT_lo_user = 0x2000
|
||||
DW_LNCT_hi_user = 0x3fff
|
||||
|
||||
# Call frame instructions
|
||||
#
|
||||
# Note that the first 3 instructions have the so-called "primary opcode"
|
||||
# (as described in DWARFv3 7.23), so only their highest 2 bits take part
|
||||
# in the opcode decoding. They are kept as constants with the low bits masked
|
||||
# out, and the callframe module knows how to handle this.
|
||||
# The other instructions use an "extended opcode" encoded just in the low 6
|
||||
# bits, with the high 2 bits, so these constants are exactly as they would
|
||||
# appear in an actual file.
|
||||
#
|
||||
DW_CFA_advance_loc = 0b01000000
|
||||
DW_CFA_offset = 0b10000000
|
||||
DW_CFA_restore = 0b11000000
|
||||
DW_CFA_nop = 0x00
|
||||
DW_CFA_set_loc = 0x01
|
||||
DW_CFA_advance_loc1 = 0x02
|
||||
DW_CFA_advance_loc2 = 0x03
|
||||
DW_CFA_advance_loc4 = 0x04
|
||||
DW_CFA_offset_extended = 0x05
|
||||
DW_CFA_restore_extended = 0x06
|
||||
DW_CFA_undefined = 0x07
|
||||
DW_CFA_same_value = 0x08
|
||||
DW_CFA_register = 0x09
|
||||
DW_CFA_remember_state = 0x0a
|
||||
DW_CFA_restore_state = 0x0b
|
||||
DW_CFA_def_cfa = 0x0c
|
||||
DW_CFA_def_cfa_register = 0x0d
|
||||
DW_CFA_def_cfa_offset = 0x0e
|
||||
DW_CFA_def_cfa_expression = 0x0f
|
||||
DW_CFA_expression = 0x10
|
||||
DW_CFA_offset_extended_sf = 0x11
|
||||
DW_CFA_def_cfa_sf = 0x12
|
||||
DW_CFA_def_cfa_offset_sf = 0x13
|
||||
DW_CFA_val_offset = 0x14
|
||||
DW_CFA_val_offset_sf = 0x15
|
||||
DW_CFA_val_expression = 0x16
|
||||
DW_CFA_GNU_args_size = 0x2e
|
||||
|
||||
|
||||
# Compilation unit types
|
||||
#
|
||||
# DWARFv5 introduces the "unit_type" field to each CU header, allowing
|
||||
# individual CUs to indicate whether they're complete, partial, and so forth.
|
||||
# See DWARFv5 3.1 ("Unit Entries") and 7.5.1 ("Unit Headers").
|
||||
DW_UT_compile = 0x01
|
||||
DW_UT_type = 0x02
|
||||
DW_UT_partial = 0x03
|
||||
DW_UT_skeleton = 0x04
|
||||
DW_UT_split_compile = 0x05
|
||||
DW_UT_split_type = 0x06
|
||||
DW_UT_lo_user = 0x80
|
||||
DW_UT_hi_user = 0xff
|
||||
@@ -1,649 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/descriptions.py
|
||||
#
|
||||
# Textual descriptions of the various values and enums of DWARF
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from collections import defaultdict
|
||||
|
||||
from .constants import *
|
||||
from .dwarf_expr import DWARFExprParser
|
||||
from .die import DIE
|
||||
from ..common.utils import preserve_stream_pos, dwarf_assert
|
||||
from ..common.py3compat import bytes2str
|
||||
from .callframe import instruction_name, CIE, FDE
|
||||
|
||||
|
||||
def set_global_machine_arch(machine_arch):
|
||||
global _MACHINE_ARCH
|
||||
_MACHINE_ARCH = machine_arch
|
||||
|
||||
|
||||
def describe_attr_value(attr, die, section_offset):
|
||||
""" Given an attribute attr, return the textual representation of its
|
||||
value, suitable for tools like readelf.
|
||||
|
||||
To cover all cases, this function needs some extra arguments:
|
||||
|
||||
die: the DIE this attribute was extracted from
|
||||
section_offset: offset in the stream of the section the DIE belongs to
|
||||
"""
|
||||
descr_func = _ATTR_DESCRIPTION_MAP[attr.form]
|
||||
val_description = descr_func(attr, die, section_offset)
|
||||
|
||||
# For some attributes we can display further information
|
||||
extra_info_func = _EXTRA_INFO_DESCRIPTION_MAP[attr.name]
|
||||
extra_info = extra_info_func(attr, die, section_offset)
|
||||
return str(val_description) + '\t' + extra_info
|
||||
|
||||
|
||||
def describe_CFI_instructions(entry):
|
||||
""" Given a CFI entry (CIE or FDE), return the textual description of its
|
||||
instructions.
|
||||
"""
|
||||
def _assert_FDE_instruction(instr):
|
||||
dwarf_assert(
|
||||
isinstance(entry, FDE),
|
||||
'Unexpected instruction "%s" for a CIE' % instr)
|
||||
|
||||
def _full_reg_name(regnum):
|
||||
regname = describe_reg_name(regnum, _MACHINE_ARCH, False)
|
||||
if regname:
|
||||
return 'r%s (%s)' % (regnum, regname)
|
||||
else:
|
||||
return 'r%s' % regnum
|
||||
|
||||
if isinstance(entry, CIE):
|
||||
cie = entry
|
||||
else: # FDE
|
||||
cie = entry.cie
|
||||
pc = entry['initial_location']
|
||||
|
||||
s = ''
|
||||
for instr in entry.instructions:
|
||||
name = instruction_name(instr.opcode)
|
||||
|
||||
if name in ('DW_CFA_offset',
|
||||
'DW_CFA_offset_extended', 'DW_CFA_offset_extended_sf',
|
||||
'DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
|
||||
s += ' %s: %s at cfa%+d\n' % (
|
||||
name, _full_reg_name(instr.args[0]),
|
||||
instr.args[1] * cie['data_alignment_factor'])
|
||||
elif name in ( 'DW_CFA_restore', 'DW_CFA_restore_extended',
|
||||
'DW_CFA_undefined', 'DW_CFA_same_value',
|
||||
'DW_CFA_def_cfa_register'):
|
||||
s += ' %s: %s\n' % (name, _full_reg_name(instr.args[0]))
|
||||
elif name == 'DW_CFA_register':
|
||||
s += ' %s: %s in %s' % (
|
||||
name, _full_reg_name(instr.args[0]),
|
||||
_full_reg_name(instr.args[1]))
|
||||
elif name == 'DW_CFA_set_loc':
|
||||
pc = instr.args[0]
|
||||
s += ' %s: %08x\n' % (name, pc)
|
||||
elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
|
||||
'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
|
||||
_assert_FDE_instruction(instr)
|
||||
factored_offset = instr.args[0] * cie['code_alignment_factor']
|
||||
s += ' %s: %s to %08x\n' % (
|
||||
name, factored_offset, factored_offset + pc)
|
||||
pc += factored_offset
|
||||
elif name in ( 'DW_CFA_remember_state', 'DW_CFA_restore_state',
|
||||
'DW_CFA_nop'):
|
||||
s += ' %s\n' % name
|
||||
elif name == 'DW_CFA_def_cfa':
|
||||
s += ' %s: %s ofs %s\n' % (
|
||||
name, _full_reg_name(instr.args[0]), instr.args[1])
|
||||
elif name == 'DW_CFA_def_cfa_sf':
|
||||
s += ' %s: %s ofs %s\n' % (
|
||||
name, _full_reg_name(instr.args[0]),
|
||||
instr.args[1] * cie['data_alignment_factor'])
|
||||
elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'):
|
||||
s += ' %s: %s\n' % (name, instr.args[0])
|
||||
elif name == 'DW_CFA_def_cfa_expression':
|
||||
expr_dumper = ExprDumper(entry.structs)
|
||||
# readelf output is missing a colon for DW_CFA_def_cfa_expression
|
||||
s += ' %s (%s)\n' % (name, expr_dumper.dump_expr(instr.args[0]))
|
||||
elif name == 'DW_CFA_expression':
|
||||
expr_dumper = ExprDumper(entry.structs)
|
||||
s += ' %s: %s (%s)\n' % (
|
||||
name, _full_reg_name(instr.args[0]),
|
||||
expr_dumper.dump_expr(instr.args[1]))
|
||||
else:
|
||||
s += ' %s: <??>\n' % name
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def describe_CFI_register_rule(rule):
|
||||
s = _DESCR_CFI_REGISTER_RULE_TYPE[rule.type]
|
||||
if rule.type in ('OFFSET', 'VAL_OFFSET'):
|
||||
s += '%+d' % rule.arg
|
||||
elif rule.type == 'REGISTER':
|
||||
s += describe_reg_name(rule.arg)
|
||||
return s
|
||||
|
||||
|
||||
def describe_CFI_CFA_rule(rule):
|
||||
if rule.expr:
|
||||
return 'exp'
|
||||
else:
|
||||
return '%s%+d' % (describe_reg_name(rule.reg), rule.offset)
|
||||
|
||||
|
||||
def describe_DWARF_expr(expr, structs, cu_offset=None):
|
||||
""" Textual description of a DWARF expression encoded in 'expr'.
|
||||
structs should come from the entity encompassing the expression - it's
|
||||
needed to be able to parse it correctly.
|
||||
"""
|
||||
# Since this function can be called a lot, initializing a fresh new
|
||||
# ExprDumper per call is expensive. So a rudimentary caching scheme is in
|
||||
# place to create only one such dumper per instance of structs.
|
||||
cache_key = id(structs)
|
||||
if cache_key not in _DWARF_EXPR_DUMPER_CACHE:
|
||||
_DWARF_EXPR_DUMPER_CACHE[cache_key] = \
|
||||
ExprDumper(structs)
|
||||
dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key]
|
||||
return '(' + dwarf_expr_dumper.dump_expr(expr, cu_offset) + ')'
|
||||
|
||||
|
||||
def describe_reg_name(regnum, machine_arch=None, default=True):
|
||||
""" Provide a textual description for a register name, given its serial
|
||||
number. The number is expected to be valid.
|
||||
"""
|
||||
if machine_arch is None:
|
||||
machine_arch = _MACHINE_ARCH
|
||||
|
||||
if machine_arch == 'x86':
|
||||
return _REG_NAMES_x86[regnum]
|
||||
elif machine_arch == 'x64':
|
||||
return _REG_NAMES_x64[regnum]
|
||||
elif machine_arch == 'AArch64':
|
||||
return _REG_NAMES_AArch64[regnum]
|
||||
elif default:
|
||||
return 'r%s' % regnum
|
||||
else:
|
||||
return None
|
||||
|
||||
def describe_form_class(form):
|
||||
"""For a given form name, determine its value class.
|
||||
|
||||
For example, given 'DW_FORM_data1' returns 'constant'.
|
||||
|
||||
For some forms, like DW_FORM_indirect and DW_FORM_sec_offset, the class is
|
||||
not hard-coded and extra information is required. For these, None is
|
||||
returned.
|
||||
"""
|
||||
return _FORM_CLASS[form]
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# The machine architecture. Set globally via set_global_machine_arch
|
||||
#
|
||||
_MACHINE_ARCH = None
|
||||
|
||||
|
||||
def _describe_attr_ref(attr, die, section_offset):
|
||||
return '<0x%x>' % (attr.value + die.cu.cu_offset)
|
||||
|
||||
def _describe_attr_value_passthrough(attr, die, section_offset):
|
||||
return attr.value
|
||||
|
||||
def _describe_attr_hex(attr, die, section_offset):
|
||||
return '0x%x' % (attr.value)
|
||||
|
||||
def _describe_attr_hex_addr(attr, die, section_offset):
|
||||
return '<0x%x>' % (attr.value)
|
||||
|
||||
def _describe_attr_split_64bit(attr, die, section_offset):
|
||||
low_word = attr.value & 0xFFFFFFFF
|
||||
high_word = (attr.value >> 32) & 0xFFFFFFFF
|
||||
return '0x%x 0x%x' % (low_word, high_word)
|
||||
|
||||
def _describe_attr_strp(attr, die, section_offset):
|
||||
return '(indirect string, offset: 0x%x): %s' % (
|
||||
attr.raw_value, bytes2str(attr.value))
|
||||
|
||||
def _describe_attr_string(attr, die, section_offset):
|
||||
return bytes2str(attr.value)
|
||||
|
||||
def _describe_attr_debool(attr, die, section_offset):
|
||||
""" To be consistent with readelf, generate 1 for True flags, 0 for False
|
||||
flags.
|
||||
"""
|
||||
return '1' if attr.value else '0'
|
||||
|
||||
def _describe_attr_present(attr, die, section_offset):
|
||||
""" Some forms may simply mean that an attribute is present,
|
||||
without providing any value.
|
||||
"""
|
||||
return '1'
|
||||
|
||||
def _describe_attr_block(attr, die, section_offset):
|
||||
s = '%s byte block: ' % len(attr.value)
|
||||
s += ' '.join('%x' % item for item in attr.value) + ' '
|
||||
return s
|
||||
|
||||
|
||||
_ATTR_DESCRIPTION_MAP = defaultdict(
|
||||
lambda: _describe_attr_value_passthrough, # default_factory
|
||||
|
||||
DW_FORM_ref1=_describe_attr_ref,
|
||||
DW_FORM_ref2=_describe_attr_ref,
|
||||
DW_FORM_ref4=_describe_attr_ref,
|
||||
DW_FORM_ref8=_describe_attr_split_64bit,
|
||||
DW_FORM_ref_udata=_describe_attr_ref,
|
||||
DW_FORM_ref_addr=_describe_attr_hex_addr,
|
||||
DW_FORM_data4=_describe_attr_hex,
|
||||
DW_FORM_data8=_describe_attr_hex,
|
||||
DW_FORM_addr=_describe_attr_hex,
|
||||
DW_FORM_sec_offset=_describe_attr_hex,
|
||||
DW_FORM_flag=_describe_attr_debool,
|
||||
DW_FORM_data1=_describe_attr_value_passthrough,
|
||||
DW_FORM_data2=_describe_attr_value_passthrough,
|
||||
DW_FORM_sdata=_describe_attr_value_passthrough,
|
||||
DW_FORM_udata=_describe_attr_value_passthrough,
|
||||
DW_FORM_string=_describe_attr_string,
|
||||
DW_FORM_strp=_describe_attr_strp,
|
||||
DW_FORM_block1=_describe_attr_block,
|
||||
DW_FORM_block2=_describe_attr_block,
|
||||
DW_FORM_block4=_describe_attr_block,
|
||||
DW_FORM_block=_describe_attr_block,
|
||||
DW_FORM_flag_present=_describe_attr_present,
|
||||
DW_FORM_exprloc=_describe_attr_block,
|
||||
DW_FORM_ref_sig8=_describe_attr_ref,
|
||||
)
|
||||
|
||||
_FORM_CLASS = dict(
|
||||
DW_FORM_addr='address',
|
||||
DW_FORM_block2='block',
|
||||
DW_FORM_block4='block',
|
||||
DW_FORM_data2='constant',
|
||||
DW_FORM_data4='constant',
|
||||
DW_FORM_data8='constant',
|
||||
DW_FORM_string='string',
|
||||
DW_FORM_block='block',
|
||||
DW_FORM_block1='block',
|
||||
DW_FORM_data1='constant',
|
||||
DW_FORM_flag='flag',
|
||||
DW_FORM_sdata='constant',
|
||||
DW_FORM_strp='string',
|
||||
DW_FORM_udata='constant',
|
||||
DW_FORM_ref_addr='reference',
|
||||
DW_FORM_ref1='reference',
|
||||
DW_FORM_ref2='reference',
|
||||
DW_FORM_ref4='reference',
|
||||
DW_FORM_ref8='reference',
|
||||
DW_FORM_ref_udata='reference',
|
||||
DW_FORM_indirect=None,
|
||||
DW_FORM_sec_offset=None,
|
||||
DW_FORM_exprloc='exprloc',
|
||||
DW_FORM_flag_present='flag',
|
||||
DW_FORM_ref_sig8='reference',
|
||||
)
|
||||
|
||||
_DESCR_DW_INL = {
|
||||
DW_INL_not_inlined: '(not inlined)',
|
||||
DW_INL_inlined: '(inlined)',
|
||||
DW_INL_declared_not_inlined: '(declared as inline but ignored)',
|
||||
DW_INL_declared_inlined: '(declared as inline and inlined)',
|
||||
}
|
||||
|
||||
_DESCR_DW_LANG = {
|
||||
DW_LANG_C89: '(ANSI C)',
|
||||
DW_LANG_C: '(non-ANSI C)',
|
||||
DW_LANG_Ada83: '(Ada)',
|
||||
DW_LANG_C_plus_plus: '(C++)',
|
||||
DW_LANG_Cobol74: '(Cobol 74)',
|
||||
DW_LANG_Cobol85: '(Cobol 85)',
|
||||
DW_LANG_Fortran77: '(FORTRAN 77)',
|
||||
DW_LANG_Fortran90: '(Fortran 90)',
|
||||
DW_LANG_Pascal83: '(ANSI Pascal)',
|
||||
DW_LANG_Modula2: '(Modula 2)',
|
||||
DW_LANG_Java: '(Java)',
|
||||
DW_LANG_C99: '(ANSI C99)',
|
||||
DW_LANG_Ada95: '(ADA 95)',
|
||||
DW_LANG_Fortran95: '(Fortran 95)',
|
||||
DW_LANG_PLI: '(PLI)',
|
||||
DW_LANG_ObjC: '(Objective C)',
|
||||
DW_LANG_ObjC_plus_plus: '(Objective C++)',
|
||||
DW_LANG_UPC: '(Unified Parallel C)',
|
||||
DW_LANG_D: '(D)',
|
||||
DW_LANG_Python: '(Python)',
|
||||
DW_LANG_Mips_Assembler: '(MIPS assembler)',
|
||||
DW_LANG_HP_Bliss: '(HP Bliss)',
|
||||
DW_LANG_HP_Basic91: '(HP Basic 91)',
|
||||
DW_LANG_HP_Pascal91: '(HP Pascal 91)',
|
||||
DW_LANG_HP_IMacro: '(HP IMacro)',
|
||||
DW_LANG_HP_Assembler: '(HP assembler)',
|
||||
}
|
||||
|
||||
_DESCR_DW_ATE = {
|
||||
DW_ATE_void: '(void)',
|
||||
DW_ATE_address: '(machine address)',
|
||||
DW_ATE_boolean: '(boolean)',
|
||||
DW_ATE_complex_float: '(complex float)',
|
||||
DW_ATE_float: '(float)',
|
||||
DW_ATE_signed: '(signed)',
|
||||
DW_ATE_signed_char: '(signed char)',
|
||||
DW_ATE_unsigned: '(unsigned)',
|
||||
DW_ATE_unsigned_char: '(unsigned char)',
|
||||
DW_ATE_imaginary_float: '(imaginary float)',
|
||||
DW_ATE_decimal_float: '(decimal float)',
|
||||
DW_ATE_packed_decimal: '(packed_decimal)',
|
||||
DW_ATE_numeric_string: '(numeric_string)',
|
||||
DW_ATE_edited: '(edited)',
|
||||
DW_ATE_signed_fixed: '(signed_fixed)',
|
||||
DW_ATE_unsigned_fixed: '(unsigned_fixed)',
|
||||
DW_ATE_UTF: '(unicode string)',
|
||||
DW_ATE_HP_float80: '(HP_float80)',
|
||||
DW_ATE_HP_complex_float80: '(HP_complex_float80)',
|
||||
DW_ATE_HP_float128: '(HP_float128)',
|
||||
DW_ATE_HP_complex_float128: '(HP_complex_float128)',
|
||||
DW_ATE_HP_floathpintel: '(HP_floathpintel)',
|
||||
DW_ATE_HP_imaginary_float80: '(HP_imaginary_float80)',
|
||||
DW_ATE_HP_imaginary_float128: '(HP_imaginary_float128)',
|
||||
}
|
||||
|
||||
_DESCR_DW_ACCESS = {
|
||||
DW_ACCESS_public: '(public)',
|
||||
DW_ACCESS_protected: '(protected)',
|
||||
DW_ACCESS_private: '(private)',
|
||||
}
|
||||
|
||||
_DESCR_DW_VIS = {
|
||||
DW_VIS_local: '(local)',
|
||||
DW_VIS_exported: '(exported)',
|
||||
DW_VIS_qualified: '(qualified)',
|
||||
}
|
||||
|
||||
_DESCR_DW_VIRTUALITY = {
|
||||
DW_VIRTUALITY_none: '(none)',
|
||||
DW_VIRTUALITY_virtual: '(virtual)',
|
||||
DW_VIRTUALITY_pure_virtual: '(pure virtual)',
|
||||
}
|
||||
|
||||
_DESCR_DW_ID_CASE = {
|
||||
DW_ID_case_sensitive: '(case_sensitive)',
|
||||
DW_ID_up_case: '(up_case)',
|
||||
DW_ID_down_case: '(down_case)',
|
||||
DW_ID_case_insensitive: '(case_insensitive)',
|
||||
}
|
||||
|
||||
_DESCR_DW_CC = {
|
||||
DW_CC_normal: '(normal)',
|
||||
DW_CC_program: '(program)',
|
||||
DW_CC_nocall: '(nocall)',
|
||||
}
|
||||
|
||||
_DESCR_DW_ORD = {
|
||||
DW_ORD_row_major: '(row major)',
|
||||
DW_ORD_col_major: '(column major)',
|
||||
}
|
||||
|
||||
_DESCR_CFI_REGISTER_RULE_TYPE = dict(
|
||||
UNDEFINED='u',
|
||||
SAME_VALUE='s',
|
||||
OFFSET='c',
|
||||
VAL_OFFSET='v',
|
||||
REGISTER='',
|
||||
EXPRESSION='exp',
|
||||
VAL_EXPRESSION='vexp',
|
||||
ARCHITECTURAL='a',
|
||||
)
|
||||
|
||||
def _make_extra_mapper(mapping, default, default_interpolate_value=False):
|
||||
""" Create a mapping function from attribute parameters to an extra
|
||||
value that should be displayed.
|
||||
"""
|
||||
def mapper(attr, die, section_offset):
|
||||
if default_interpolate_value:
|
||||
d = default % attr.value
|
||||
else:
|
||||
d = default
|
||||
return mapping.get(attr.value, d)
|
||||
return mapper
|
||||
|
||||
|
||||
def _make_extra_string(s=''):
|
||||
""" Create an extra function that just returns a constant string.
|
||||
"""
|
||||
def extra(attr, die, section_offset):
|
||||
return s
|
||||
return extra
|
||||
|
||||
|
||||
_DWARF_EXPR_DUMPER_CACHE = {}
|
||||
|
||||
def _location_list_extra(attr, die, section_offset):
|
||||
# According to section 2.6 of the DWARF spec v3, class loclistptr means
|
||||
# a location list, and class block means a location expression.
|
||||
# DW_FORM_sec_offset is new in DWARFv4 as a section offset.
|
||||
if attr.form in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'):
|
||||
return '(location list)'
|
||||
else:
|
||||
return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset)
|
||||
|
||||
|
||||
def _data_member_location_extra(attr, die, section_offset):
|
||||
# According to section 5.5.6 of the DWARF spec v4, a data member location
|
||||
# can be an integer offset, or a location description.
|
||||
#
|
||||
if attr.form in ('DW_FORM_data1', 'DW_FORM_data2',
|
||||
'DW_FORM_data4', 'DW_FORM_data8'):
|
||||
return '' # No extra description needed
|
||||
elif attr.form == 'DW_FORM_sdata':
|
||||
return str(attr.value)
|
||||
else:
|
||||
return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset)
|
||||
|
||||
|
||||
def _import_extra(attr, die, section_offset):
|
||||
# For DW_AT_import the value points to a DIE (that can be either in the
|
||||
# current DIE's CU or in another CU, depending on the FORM). The extra
|
||||
# information for it is the abbreviation number in this DIE and its tag.
|
||||
if attr.form == 'DW_FORM_ref_addr':
|
||||
# Absolute offset value
|
||||
ref_die_offset = section_offset + attr.value
|
||||
else:
|
||||
# Relative offset to the current DIE's CU
|
||||
ref_die_offset = attr.value + die.cu.cu_offset
|
||||
|
||||
# Now find the CU this DIE belongs to (since we have to find its abbrev
|
||||
# table). This is done by linearly scanning through all CUs, looking for
|
||||
# one spanning an address space containing the referred DIE's offset.
|
||||
for cu in die.dwarfinfo.iter_CUs():
|
||||
if cu['unit_length'] + cu.cu_offset > ref_die_offset >= cu.cu_offset:
|
||||
# Once we have the CU, we can actually parse this DIE from the
|
||||
# stream.
|
||||
with preserve_stream_pos(die.stream):
|
||||
ref_die = DIE(cu, die.stream, ref_die_offset)
|
||||
#print '&&& ref_die', ref_die
|
||||
return '[Abbrev Number: %s (%s)]' % (
|
||||
ref_die.abbrev_code, ref_die.tag)
|
||||
|
||||
return '[unknown]'
|
||||
|
||||
|
||||
_EXTRA_INFO_DESCRIPTION_MAP = defaultdict(
|
||||
lambda: _make_extra_string(''), # default_factory
|
||||
|
||||
DW_AT_inline=_make_extra_mapper(
|
||||
_DESCR_DW_INL, '(Unknown inline attribute value: %x',
|
||||
default_interpolate_value=True),
|
||||
DW_AT_language=_make_extra_mapper(
|
||||
_DESCR_DW_LANG, '(Unknown: %x)', default_interpolate_value=True),
|
||||
DW_AT_encoding=_make_extra_mapper(_DESCR_DW_ATE, '(unknown type)'),
|
||||
DW_AT_accessibility=_make_extra_mapper(
|
||||
_DESCR_DW_ACCESS, '(unknown accessibility)'),
|
||||
DW_AT_visibility=_make_extra_mapper(
|
||||
_DESCR_DW_VIS, '(unknown visibility)'),
|
||||
DW_AT_virtuality=_make_extra_mapper(
|
||||
_DESCR_DW_VIRTUALITY, '(unknown virtuality)'),
|
||||
DW_AT_identifier_case=_make_extra_mapper(
|
||||
_DESCR_DW_ID_CASE, '(unknown case)'),
|
||||
DW_AT_calling_convention=_make_extra_mapper(
|
||||
_DESCR_DW_CC, '(unknown convention)'),
|
||||
DW_AT_ordering=_make_extra_mapper(
|
||||
_DESCR_DW_ORD, '(undefined)'),
|
||||
DW_AT_frame_base=_location_list_extra,
|
||||
DW_AT_location=_location_list_extra,
|
||||
DW_AT_string_length=_location_list_extra,
|
||||
DW_AT_return_addr=_location_list_extra,
|
||||
DW_AT_data_member_location=_data_member_location_extra,
|
||||
DW_AT_vtable_elem_location=_location_list_extra,
|
||||
DW_AT_segment=_location_list_extra,
|
||||
DW_AT_static_link=_location_list_extra,
|
||||
DW_AT_use_location=_location_list_extra,
|
||||
DW_AT_allocated=_location_list_extra,
|
||||
DW_AT_associated=_location_list_extra,
|
||||
DW_AT_data_location=_location_list_extra,
|
||||
DW_AT_stride=_location_list_extra,
|
||||
DW_AT_import=_import_extra,
|
||||
DW_AT_GNU_call_site_value=_location_list_extra,
|
||||
DW_AT_GNU_call_site_data_value=_location_list_extra,
|
||||
DW_AT_GNU_call_site_target=_location_list_extra,
|
||||
DW_AT_GNU_call_site_target_clobbered=_location_list_extra,
|
||||
)
|
||||
|
||||
# 8 in a line, for easier counting
|
||||
_REG_NAMES_x86 = [
|
||||
'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi',
|
||||
'eip', 'eflags', '<none>', 'st0', 'st1', 'st2', 'st3', 'st4',
|
||||
'st5', 'st6', 'st7', '<none>', '<none>', 'xmm0', 'xmm1', 'xmm2',
|
||||
'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'mm0', 'mm1', 'mm2',
|
||||
'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'fcw', 'fsw', 'mxcsr',
|
||||
'es', 'cs', 'ss', 'ds', 'fs', 'gs', '<none>', '<none>', 'tr', 'ldtr'
|
||||
]
|
||||
|
||||
_REG_NAMES_x64 = [
|
||||
'rax', 'rdx', 'rcx', 'rbx', 'rsi', 'rdi', 'rbp', 'rsp',
|
||||
'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15',
|
||||
'rip', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6',
|
||||
'xmm7', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14',
|
||||
'xmm15', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6',
|
||||
'st7', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
|
||||
'mm7', 'rflags', 'es', 'cs', 'ss', 'ds', 'fs', 'gs',
|
||||
'<none>', '<none>', 'fs.base', 'gs.base', '<none>', '<none>', 'tr', 'ldtr',
|
||||
'mxcsr', 'fcw', 'fsw'
|
||||
]
|
||||
|
||||
# https://developer.arm.com/documentation/ihi0057/e/?lang=en#dwarf-register-names
|
||||
_REG_NAMES_AArch64 = [
|
||||
'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7',
|
||||
'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15',
|
||||
'x16', 'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x23',
|
||||
'x24', 'x25', 'x26', 'x27', 'x28', 'x29', 'x30', 'sp',
|
||||
'<none>', 'ELR_mode', 'RA_SIGN_STATE', '<none>', '<none>', '<none>', '<none>', '<none>',
|
||||
'<none>', '<none>', '<none>', '<none>', '<none>', '<none>', 'VG', 'FFR',
|
||||
'p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7',
|
||||
'p8', 'p9', 'p10', 'p11', 'p12', 'p13', 'p14', 'p15',
|
||||
'v0', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7',
|
||||
'v8', 'v9', 'v10', 'v11', 'v12', 'v13', 'v14', 'v15',
|
||||
'v16', 'v17', 'v18', 'v19', 'v20', 'v21', 'v22', 'v23',
|
||||
'v24', 'v25', 'v26', 'v27', 'v28', 'v29', 'v30', 'v31',
|
||||
'z0', 'z1', 'z2', 'z3', 'z4', 'z5', 'z6', 'z7',
|
||||
'z8', 'z9', 'z10', 'z11', 'z12', 'z13', 'z14', 'z15',
|
||||
'z16', 'z17', 'z18', 'z19', 'z20', 'z21', 'z22', 'z23',
|
||||
'z24', 'z25', 'z26', 'z27', 'z28', 'z29', 'z30', 'z31'
|
||||
]
|
||||
|
||||
|
||||
class ExprDumper(object):
|
||||
""" A dumper for DWARF expressions that dumps a textual
|
||||
representation of the complete expression.
|
||||
|
||||
Usage: after creation, call dump_expr repeatedly - it's stateless.
|
||||
"""
|
||||
def __init__(self, structs):
|
||||
self.structs = structs
|
||||
self.expr_parser = DWARFExprParser(self.structs)
|
||||
self._init_lookups()
|
||||
|
||||
def dump_expr(self, expr, cu_offset=None):
|
||||
""" Parse and dump a DWARF expression. expr should be a list of
|
||||
(integer) byte values. cu_offset is the cu_offset
|
||||
value from the CU object where the expression resides.
|
||||
Only affects a handful of GNU opcodes, if None is provided,
|
||||
that's not a crash condition, only the expression dump will
|
||||
not be consistent of that of readelf.
|
||||
|
||||
Returns a string representing the expression.
|
||||
"""
|
||||
parsed = self.expr_parser.parse_expr(expr)
|
||||
s = []
|
||||
for deo in parsed:
|
||||
s.append(self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset))
|
||||
return '; '.join(s)
|
||||
|
||||
def _init_lookups(self):
|
||||
self._ops_with_decimal_arg = set([
|
||||
'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s',
|
||||
'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_const8u', 'DW_OP_const8s',
|
||||
'DW_OP_constu', 'DW_OP_consts', 'DW_OP_pick', 'DW_OP_plus_uconst',
|
||||
'DW_OP_bra', 'DW_OP_skip', 'DW_OP_fbreg', 'DW_OP_piece',
|
||||
'DW_OP_deref_size', 'DW_OP_xderef_size', 'DW_OP_regx',])
|
||||
|
||||
for n in range(0, 32):
|
||||
self._ops_with_decimal_arg.add('DW_OP_breg%s' % n)
|
||||
|
||||
self._ops_with_two_decimal_args = set(['DW_OP_bregx', 'DW_OP_bit_piece'])
|
||||
|
||||
self._ops_with_hex_arg = set(
|
||||
['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref'])
|
||||
|
||||
def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None):
|
||||
# Some GNU ops contain an offset from the current CU as an argument,
|
||||
# but readelf emits those ops with offset from the info section
|
||||
# so we need the base offset of the parent CU.
|
||||
# If omitted, arguments on some GNU opcodes will be off.
|
||||
if cu_offset is None:
|
||||
cu_offset = 0
|
||||
|
||||
if len(args) == 0:
|
||||
if opcode_name.startswith('DW_OP_reg'):
|
||||
regnum = int(opcode_name[9:])
|
||||
return '%s (%s)' % (
|
||||
opcode_name,
|
||||
describe_reg_name(regnum, _MACHINE_ARCH))
|
||||
else:
|
||||
return opcode_name
|
||||
elif opcode_name in self._ops_with_decimal_arg:
|
||||
if opcode_name.startswith('DW_OP_breg'):
|
||||
regnum = int(opcode_name[10:])
|
||||
return '%s (%s): %s' % (
|
||||
opcode_name,
|
||||
describe_reg_name(regnum, _MACHINE_ARCH),
|
||||
args[0])
|
||||
elif opcode_name.endswith('regx'):
|
||||
# applies to both regx and bregx
|
||||
return '%s: %s (%s)' % (
|
||||
opcode_name,
|
||||
args[0],
|
||||
describe_reg_name(args[0], _MACHINE_ARCH))
|
||||
else:
|
||||
return '%s: %s' % (opcode_name, args[0])
|
||||
elif opcode_name in self._ops_with_hex_arg:
|
||||
return '%s: %x' % (opcode_name, args[0])
|
||||
elif opcode_name in self._ops_with_two_decimal_args:
|
||||
return '%s: %s %s' % (opcode_name, args[0], args[1])
|
||||
elif opcode_name == 'DW_OP_GNU_entry_value':
|
||||
return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]]))
|
||||
elif opcode_name == 'DW_OP_implicit_value':
|
||||
return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]]))
|
||||
elif opcode_name == 'DW_OP_GNU_parameter_ref':
|
||||
return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset)
|
||||
elif opcode_name == 'DW_OP_GNU_implicit_pointer':
|
||||
return "%s: <0x%x> %d" % (opcode_name, args[0], args[1])
|
||||
elif opcode_name == 'DW_OP_GNU_convert':
|
||||
return "%s <0x%x>" % (opcode_name, args[0] + cu_offset)
|
||||
elif opcode_name == 'DW_OP_GNU_deref_type':
|
||||
return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset)
|
||||
elif opcode_name == 'DW_OP_GNU_const_type':
|
||||
return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1]))
|
||||
elif opcode_name == 'DW_OP_GNU_regval_type':
|
||||
return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset)
|
||||
else:
|
||||
return '<unknown %s>' % opcode_name
|
||||
@@ -1,279 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/die.py
|
||||
#
|
||||
# DWARF Debugging Information Entry
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from collections import namedtuple, OrderedDict
|
||||
import os
|
||||
|
||||
from ..common.exceptions import DWARFError
|
||||
from ..common.py3compat import bytes2str, iteritems
|
||||
from ..common.utils import struct_parse, preserve_stream_pos
|
||||
from .enums import DW_FORM_raw2name
|
||||
|
||||
|
||||
# AttributeValue - describes an attribute value in the DIE:
|
||||
#
|
||||
# name:
|
||||
# The name (DW_AT_*) of this attribute
|
||||
#
|
||||
# form:
|
||||
# The DW_FORM_* name of this attribute
|
||||
#
|
||||
# value:
|
||||
# The value parsed from the section and translated accordingly to the form
|
||||
# (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
|
||||
#
|
||||
# raw_value:
|
||||
# Raw value as parsed from the section - used for debugging and presentation
|
||||
# (e.g. for a DW_FORM_strp it's the raw string offset into the table)
|
||||
#
|
||||
# offset:
|
||||
# Offset of this attribute's value in the stream (absolute offset, relative
|
||||
# the beginning of the whole stream)
|
||||
#
|
||||
AttributeValue = namedtuple(
|
||||
'AttributeValue', 'name form value raw_value offset')
|
||||
|
||||
|
||||
class DIE(object):
|
||||
""" A DWARF debugging information entry. On creation, parses itself from
|
||||
the stream. Each DIE is held by a CU.
|
||||
|
||||
Accessible attributes:
|
||||
|
||||
tag:
|
||||
The DIE tag
|
||||
|
||||
size:
|
||||
The size this DIE occupies in the section
|
||||
|
||||
offset:
|
||||
The offset of this DIE in the stream
|
||||
|
||||
attributes:
|
||||
An ordered dictionary mapping attribute names to values. It's
|
||||
ordered to preserve the order of attributes in the section
|
||||
|
||||
has_children:
|
||||
Specifies whether this DIE has children
|
||||
|
||||
abbrev_code:
|
||||
The abbreviation code pointing to an abbreviation entry (note
|
||||
that this is for informational pusposes only - this object
|
||||
interacts with its abbreviation table transparently).
|
||||
|
||||
See also the public methods.
|
||||
"""
|
||||
def __init__(self, cu, stream, offset):
|
||||
""" cu:
|
||||
CompileUnit object this DIE belongs to. Used to obtain context
|
||||
information (structs, abbrev table, etc.)
|
||||
|
||||
stream, offset:
|
||||
The stream and offset into it where this DIE's data is located
|
||||
"""
|
||||
self.cu = cu
|
||||
self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
|
||||
self.stream = stream
|
||||
self.offset = offset
|
||||
|
||||
self.attributes = OrderedDict()
|
||||
self.tag = None
|
||||
self.has_children = None
|
||||
self.abbrev_code = None
|
||||
self.size = 0
|
||||
# Null DIE terminator. It can be used to obtain offset range occupied
|
||||
# by this DIE including its whole subtree.
|
||||
self._terminator = None
|
||||
self._parent = None
|
||||
|
||||
self._parse_DIE()
|
||||
|
||||
def is_null(self):
|
||||
""" Is this a null entry?
|
||||
"""
|
||||
return self.tag is None
|
||||
|
||||
def get_DIE_from_attribute(self, name):
|
||||
""" Return the DIE referenced by the named attribute of this DIE.
|
||||
The attribute must be in the reference attribute class.
|
||||
|
||||
name:
|
||||
The name of the attribute in the reference class.
|
||||
"""
|
||||
attr = self.attributes[name]
|
||||
if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
|
||||
'DW_FORM_ref8', 'DW_FORM_ref'):
|
||||
refaddr = self.cu.cu_offset + attr.raw_value
|
||||
return self.cu.get_DIE_from_refaddr(refaddr)
|
||||
elif attr.form in ('DW_FORM_ref_addr'):
|
||||
return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
|
||||
elif attr.form in ('DW_FORM_ref_sig8'):
|
||||
# Implement search type units for matching signature
|
||||
raise NotImplementedError('%s (type unit by signature)' % attr.form)
|
||||
elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'):
|
||||
raise NotImplementedError('%s to dwo' % attr.form)
|
||||
else:
|
||||
raise DWARFError('%s is not a reference class form attribute' % attr)
|
||||
|
||||
def get_parent(self):
|
||||
""" Return the parent DIE of this DIE, or None if the DIE has no
|
||||
parent (i.e. is a top-level DIE).
|
||||
"""
|
||||
if self._parent is None:
|
||||
self._search_ancestor_offspring()
|
||||
return self._parent
|
||||
|
||||
def get_full_path(self):
|
||||
""" Return the full path filename for the DIE.
|
||||
|
||||
The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
|
||||
either of which may be missing in practice. Note that its value is
|
||||
usually a string taken from the .debug_string section and the
|
||||
returned value will be a string.
|
||||
"""
|
||||
comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
|
||||
comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
|
||||
fname_attr = self.attributes.get('DW_AT_name', None)
|
||||
fname = bytes2str(fname_attr.value) if fname_attr else ''
|
||||
return os.path.join(comp_dir, fname)
|
||||
|
||||
def iter_children(self):
|
||||
""" Iterates all children of this DIE
|
||||
"""
|
||||
return self.cu.iter_DIE_children(self)
|
||||
|
||||
def iter_siblings(self):
|
||||
""" Yield all siblings of this DIE
|
||||
"""
|
||||
parent = self.get_parent()
|
||||
if parent:
|
||||
for sibling in parent.iter_children():
|
||||
if sibling is not self:
|
||||
yield sibling
|
||||
else:
|
||||
raise StopIteration()
|
||||
|
||||
# The following methods are used while creating the DIE and should not be
|
||||
# interesting to consumers
|
||||
#
|
||||
|
||||
def set_parent(self, die):
|
||||
self._parent = die
|
||||
|
||||
#------ PRIVATE ------#
|
||||
|
||||
def _search_ancestor_offspring(self):
|
||||
""" Search our ancestors identifying their offspring to find our parent.
|
||||
|
||||
DIEs are stored as a flattened tree. The top DIE is the ancestor
|
||||
of all DIEs in the unit. Each parent is guaranteed to be at
|
||||
an offset less than their children. In each generation of children
|
||||
the sibling with the closest offset not greater than our offset is
|
||||
our ancestor.
|
||||
"""
|
||||
# This code is called when get_parent notices that the _parent has
|
||||
# not been identified. To avoid execution for each sibling record all
|
||||
# the children of any parent iterated. Assuming get_parent will also be
|
||||
# called for siblings, it is more efficient if siblings references are
|
||||
# provided and no worse than a single walk if they are missing, while
|
||||
# stopping iteration early could result in O(n^2) walks.
|
||||
search = self.cu.get_top_DIE()
|
||||
while search.offset < self.offset:
|
||||
prev = search
|
||||
for child in search.iter_children():
|
||||
child.set_parent(search)
|
||||
if child.offset <= self.offset:
|
||||
prev = child
|
||||
|
||||
# We also need to check the offset of the terminator DIE
|
||||
if search.has_children and search._terminator.offset <= self.offset:
|
||||
prev = search._terminator
|
||||
|
||||
# If we didn't find a closer parent, give up, don't loop.
|
||||
# Either we mis-parsed an ancestor or someone created a DIE
|
||||
# by an offset that was not actually the start of a DIE.
|
||||
if prev is search:
|
||||
raise ValueError("offset %s not in CU %s DIE tree" %
|
||||
(self.offset, self.cu.cu_offset))
|
||||
|
||||
search = prev
|
||||
|
||||
def __repr__(self):
|
||||
s = 'DIE %s, size=%s, has_children=%s\n' % (
|
||||
self.tag, self.size, self.has_children)
|
||||
for attrname, attrval in iteritems(self.attributes):
|
||||
s += ' |%-18s: %s\n' % (attrname, attrval)
|
||||
return s
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
def _parse_DIE(self):
|
||||
""" Parses the DIE info from the section, based on the abbreviation
|
||||
table of the CU
|
||||
"""
|
||||
structs = self.cu.structs
|
||||
|
||||
# A DIE begins with the abbreviation code. Read it and use it to
|
||||
# obtain the abbrev declaration for this DIE.
|
||||
# Note: here and elsewhere, preserve_stream_pos is used on operations
|
||||
# that manipulate the stream by reading data from it.
|
||||
self.abbrev_code = struct_parse(
|
||||
structs.Dwarf_uleb128(''), self.stream, self.offset)
|
||||
|
||||
# This may be a null entry
|
||||
if self.abbrev_code == 0:
|
||||
self.size = self.stream.tell() - self.offset
|
||||
return
|
||||
|
||||
abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
|
||||
self.tag = abbrev_decl['tag']
|
||||
self.has_children = abbrev_decl.has_children()
|
||||
|
||||
# Guided by the attributes listed in the abbreviation declaration, parse
|
||||
# values from the stream.
|
||||
for name, form in abbrev_decl.iter_attr_specs():
|
||||
attr_offset = self.stream.tell()
|
||||
raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
|
||||
|
||||
value = self._translate_attr_value(form, raw_value)
|
||||
self.attributes[name] = AttributeValue(
|
||||
name=name,
|
||||
form=form,
|
||||
value=value,
|
||||
raw_value=raw_value,
|
||||
offset=attr_offset)
|
||||
|
||||
self.size = self.stream.tell() - self.offset
|
||||
|
||||
def _translate_attr_value(self, form, raw_value):
|
||||
""" Translate a raw attr value according to the form
|
||||
"""
|
||||
value = None
|
||||
if form == 'DW_FORM_strp':
|
||||
with preserve_stream_pos(self.stream):
|
||||
value = self.dwarfinfo.get_string_from_table(raw_value)
|
||||
elif form == 'DW_FORM_flag':
|
||||
value = not raw_value == 0
|
||||
elif form == 'DW_FORM_flag_present':
|
||||
value = True
|
||||
elif form == 'DW_FORM_indirect':
|
||||
try:
|
||||
form = DW_FORM_raw2name[raw_value]
|
||||
except KeyError as err:
|
||||
raise DWARFError(
|
||||
'Found DW_FORM_indirect with unknown raw_value=' +
|
||||
str(raw_value))
|
||||
|
||||
raw_value = struct_parse(
|
||||
self.cu.structs.Dwarf_dw_form[form], self.stream)
|
||||
# Let's hope this doesn't get too deep :-)
|
||||
return self._translate_attr_value(form, raw_value)
|
||||
else:
|
||||
value = raw_value
|
||||
return value
|
||||
@@ -1,257 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/dwarf_expr.py
|
||||
#
|
||||
# Decoding DWARF expressions
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from collections import namedtuple
|
||||
|
||||
from ..common.py3compat import BytesIO, iteritems
|
||||
from ..common.utils import struct_parse, bytelist2string, read_blob
|
||||
|
||||
|
||||
# DWARF expression opcodes. name -> opcode mapping
|
||||
DW_OP_name2opcode = dict(
|
||||
DW_OP_addr=0x03,
|
||||
DW_OP_deref=0x06,
|
||||
DW_OP_const1u=0x08,
|
||||
DW_OP_const1s=0x09,
|
||||
DW_OP_const2u=0x0a,
|
||||
DW_OP_const2s=0x0b,
|
||||
DW_OP_const4u=0x0c,
|
||||
DW_OP_const4s=0x0d,
|
||||
DW_OP_const8u=0x0e,
|
||||
DW_OP_const8s=0x0f,
|
||||
DW_OP_constu=0x10,
|
||||
DW_OP_consts=0x11,
|
||||
DW_OP_dup=0x12,
|
||||
DW_OP_drop=0x13,
|
||||
DW_OP_over=0x14,
|
||||
DW_OP_pick=0x15,
|
||||
DW_OP_swap=0x16,
|
||||
DW_OP_rot=0x17,
|
||||
DW_OP_xderef=0x18,
|
||||
DW_OP_abs=0x19,
|
||||
DW_OP_and=0x1a,
|
||||
DW_OP_div=0x1b,
|
||||
DW_OP_minus=0x1c,
|
||||
DW_OP_mod=0x1d,
|
||||
DW_OP_mul=0x1e,
|
||||
DW_OP_neg=0x1f,
|
||||
DW_OP_not=0x20,
|
||||
DW_OP_or=0x21,
|
||||
DW_OP_plus=0x22,
|
||||
DW_OP_plus_uconst=0x23,
|
||||
DW_OP_shl=0x24,
|
||||
DW_OP_shr=0x25,
|
||||
DW_OP_shra=0x26,
|
||||
DW_OP_xor=0x27,
|
||||
DW_OP_bra=0x28,
|
||||
DW_OP_eq=0x29,
|
||||
DW_OP_ge=0x2a,
|
||||
DW_OP_gt=0x2b,
|
||||
DW_OP_le=0x2c,
|
||||
DW_OP_lt=0x2d,
|
||||
DW_OP_ne=0x2e,
|
||||
DW_OP_skip=0x2f,
|
||||
DW_OP_regx=0x90,
|
||||
DW_OP_fbreg=0x91,
|
||||
DW_OP_bregx=0x92,
|
||||
DW_OP_piece=0x93,
|
||||
DW_OP_deref_size=0x94,
|
||||
DW_OP_xderef_size=0x95,
|
||||
DW_OP_nop=0x96,
|
||||
DW_OP_push_object_address=0x97,
|
||||
DW_OP_call2=0x98,
|
||||
DW_OP_call4=0x99,
|
||||
DW_OP_call_ref=0x9a,
|
||||
DW_OP_form_tls_address=0x9b,
|
||||
DW_OP_call_frame_cfa=0x9c,
|
||||
DW_OP_bit_piece=0x9d,
|
||||
DW_OP_implicit_value=0x9e,
|
||||
DW_OP_stack_value=0x9f,
|
||||
DW_OP_implicit_pointer=0xa0,
|
||||
DW_OP_addrx=0xa1,
|
||||
DW_OP_constx=0xa2,
|
||||
DW_OP_entry_value=0xa3,
|
||||
DW_OP_const_type=0xa4,
|
||||
DW_OP_regval_type=0xa5,
|
||||
DW_OP_deref_type=0xa6,
|
||||
DW_OP_xderef_type=0xa7,
|
||||
DW_OP_convert=0xa8,
|
||||
DW_OP_reinterpret=0xa9,
|
||||
DW_OP_lo_user=0xe0,
|
||||
DW_OP_GNU_push_tls_address=0xe0,
|
||||
DW_OP_GNU_implicit_pointer=0xf2,
|
||||
DW_OP_GNU_entry_value=0xf3,
|
||||
DW_OP_GNU_const_type=0xf4,
|
||||
DW_OP_GNU_regval_type=0xf5,
|
||||
DW_OP_GNU_deref_type=0xf6,
|
||||
DW_OP_GNU_convert=0xf7,
|
||||
DW_OP_GNU_parameter_ref=0xfa,
|
||||
DW_OP_hi_user=0xff,
|
||||
)
|
||||
|
||||
def _generate_dynamic_values(map, prefix, index_start, index_end, value_start):
|
||||
""" Generate values in a map (dict) dynamically. Each key starts with
|
||||
a (string) prefix, followed by an index in the inclusive range
|
||||
[index_start, index_end]. The values start at value_start.
|
||||
"""
|
||||
for index in range(index_start, index_end + 1):
|
||||
name = '%s%s' % (prefix, index)
|
||||
value = value_start + index - index_start
|
||||
map[name] = value
|
||||
|
||||
_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_lit', 0, 31, 0x30)
|
||||
_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_reg', 0, 31, 0x50)
|
||||
_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_breg', 0, 31, 0x70)
|
||||
|
||||
# opcode -> name mapping
|
||||
DW_OP_opcode2name = dict((v, k) for k, v in iteritems(DW_OP_name2opcode))
|
||||
|
||||
|
||||
# Each parsed DWARF expression is returned as this type with its numeric opcode,
|
||||
# op name (as a string) and a list of arguments.
|
||||
DWARFExprOp = namedtuple('DWARFExprOp', 'op op_name args')
|
||||
|
||||
|
||||
class DWARFExprParser(object):
|
||||
"""DWARF expression parser.
|
||||
|
||||
When initialized, requires structs to cache a dispatch table. After that,
|
||||
parse_expr can be called repeatedly - it's stateless.
|
||||
"""
|
||||
|
||||
def __init__(self, structs):
|
||||
self._dispatch_table = _init_dispatch_table(structs)
|
||||
|
||||
def parse_expr(self, expr):
|
||||
""" Parses expr (a list of integers) into a list of DWARFExprOp.
|
||||
|
||||
The list can potentially be nested.
|
||||
"""
|
||||
stream = BytesIO(bytelist2string(expr))
|
||||
parsed = []
|
||||
|
||||
while True:
|
||||
# Get the next opcode from the stream. If nothing is left in the
|
||||
# stream, we're done.
|
||||
byte = stream.read(1)
|
||||
if len(byte) == 0:
|
||||
break
|
||||
|
||||
# Decode the opcode and its name.
|
||||
op = ord(byte)
|
||||
op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op)
|
||||
|
||||
# Use dispatch table to parse args.
|
||||
arg_parser = self._dispatch_table[op]
|
||||
args = arg_parser(stream)
|
||||
|
||||
parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args))
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def _init_dispatch_table(structs):
|
||||
"""Creates a dispatch table for parsing args of an op.
|
||||
|
||||
Returns a dict mapping opcode to a function. The function accepts a stream
|
||||
and return a list of parsed arguments for the opcode from the stream;
|
||||
the stream is advanced by the function as needed.
|
||||
"""
|
||||
table = {}
|
||||
def add(opcode_name, func):
|
||||
table[DW_OP_name2opcode[opcode_name]] = func
|
||||
|
||||
def parse_noargs():
|
||||
return lambda stream: []
|
||||
|
||||
def parse_op_addr():
|
||||
return lambda stream: [struct_parse(structs.Dwarf_target_addr(''),
|
||||
stream)]
|
||||
|
||||
def parse_arg_struct(arg_struct):
|
||||
return lambda stream: [struct_parse(arg_struct, stream)]
|
||||
|
||||
def parse_arg_struct2(arg1_struct, arg2_struct):
|
||||
return lambda stream: [struct_parse(arg1_struct, stream),
|
||||
struct_parse(arg2_struct, stream)]
|
||||
|
||||
# ULEB128, then an expression of that length
|
||||
def parse_nestedexpr():
|
||||
def parse(stream):
|
||||
size = struct_parse(structs.Dwarf_uleb128(''), stream)
|
||||
nested_expr_blob = read_blob(stream, size)
|
||||
return [DWARFExprParser(structs).parse_expr(nested_expr_blob)]
|
||||
return parse
|
||||
|
||||
# ULEB128, then a blob of that size
|
||||
def parse_blob():
|
||||
return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128(''), stream))]
|
||||
|
||||
# ULEB128 with datatype DIE offset, then byte, then a blob of that size
|
||||
def parse_typedblob():
|
||||
return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))]
|
||||
|
||||
add('DW_OP_addr', parse_op_addr())
|
||||
add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8('')))
|
||||
add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8('')))
|
||||
add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16('')))
|
||||
add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16('')))
|
||||
add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32('')))
|
||||
add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32('')))
|
||||
add('DW_OP_const8u', parse_arg_struct(structs.Dwarf_uint64('')))
|
||||
add('DW_OP_const8s', parse_arg_struct(structs.Dwarf_int64('')))
|
||||
add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128('')))
|
||||
add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8('')))
|
||||
add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16('')))
|
||||
add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16('')))
|
||||
|
||||
for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over',
|
||||
'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef',
|
||||
'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus',
|
||||
'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not',
|
||||
'DW_OP_or', 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr',
|
||||
'DW_OP_shra', 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge',
|
||||
'DW_OP_gt', 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop',
|
||||
'DW_OP_push_object_address', 'DW_OP_form_tls_address',
|
||||
'DW_OP_call_frame_cfa', 'DW_OP_stack_value',
|
||||
'DW_OP_GNU_push_tls_address']:
|
||||
add(opname, parse_noargs())
|
||||
|
||||
for n in range(0, 32):
|
||||
add('DW_OP_lit%s' % n, parse_noargs())
|
||||
add('DW_OP_reg%s' % n, parse_noargs())
|
||||
add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128('')))
|
||||
|
||||
add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128('')))
|
||||
add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128(''),
|
||||
structs.Dwarf_sleb128('')))
|
||||
add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128(''),
|
||||
structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8('')))
|
||||
add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8('')))
|
||||
add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16('')))
|
||||
add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32('')))
|
||||
add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset('')))
|
||||
add('DW_OP_implicit_value', parse_blob())
|
||||
add('DW_OP_GNU_entry_value', parse_nestedexpr())
|
||||
add('DW_OP_GNU_const_type', parse_typedblob())
|
||||
add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''),
|
||||
structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''),
|
||||
structs.Dwarf_uleb128('')))
|
||||
add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''),
|
||||
structs.Dwarf_sleb128('')))
|
||||
add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset('')))
|
||||
add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128('')))
|
||||
|
||||
return table
|
||||
@@ -1,460 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/dwarfinfo.py
|
||||
#
|
||||
# DWARFInfo - Main class for accessing DWARF debug information
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from collections import namedtuple
|
||||
from bisect import bisect_right
|
||||
|
||||
from ..common.exceptions import DWARFError
|
||||
from ..common.utils import (struct_parse, dwarf_assert,
|
||||
parse_cstring_from_stream)
|
||||
from .structs import DWARFStructs
|
||||
from .compileunit import CompileUnit
|
||||
from .abbrevtable import AbbrevTable
|
||||
from .lineprogram import LineProgram
|
||||
from .callframe import CallFrameInfo
|
||||
from .locationlists import LocationLists
|
||||
from .ranges import RangeLists
|
||||
from .aranges import ARanges
|
||||
from .namelut import NameLUT
|
||||
|
||||
|
||||
# Describes a debug section
|
||||
#
|
||||
# stream: a stream object containing the data of this section
|
||||
# name: section name in the container file
|
||||
# global_offset: the global offset of the section in its container file
|
||||
# size: the size of the section's data, in bytes
|
||||
# address: the virtual address for the section's data
|
||||
#
|
||||
# 'name' and 'global_offset' are for descriptional purposes only and
|
||||
# aren't strictly required for the DWARF parsing to work. 'address' is required
|
||||
# to properly decode the special '.eh_frame' format.
|
||||
#
|
||||
DebugSectionDescriptor = namedtuple('DebugSectionDescriptor',
|
||||
'stream name global_offset size address')
|
||||
|
||||
|
||||
# Some configuration parameters for the DWARF reader. This exists to allow
|
||||
# DWARFInfo to be independent from any specific file format/container.
|
||||
#
|
||||
# little_endian:
|
||||
# boolean flag specifying whether the data in the file is little endian
|
||||
#
|
||||
# machine_arch:
|
||||
# Machine architecture as a string. For example 'x86' or 'x64'
|
||||
#
|
||||
# default_address_size:
|
||||
# The default address size for the container file (sizeof pointer, in bytes)
|
||||
#
|
||||
DwarfConfig = namedtuple('DwarfConfig',
|
||||
'little_endian machine_arch default_address_size')
|
||||
|
||||
|
||||
class DWARFInfo(object):
|
||||
""" Acts also as a "context" to other major objects, bridging between
|
||||
various parts of the debug infromation.
|
||||
"""
|
||||
def __init__(self,
|
||||
config,
|
||||
debug_info_sec,
|
||||
debug_aranges_sec,
|
||||
debug_abbrev_sec,
|
||||
debug_frame_sec,
|
||||
eh_frame_sec,
|
||||
debug_str_sec,
|
||||
debug_loc_sec,
|
||||
debug_ranges_sec,
|
||||
debug_line_sec,
|
||||
debug_pubtypes_sec,
|
||||
debug_pubnames_sec,
|
||||
debug_addr_sec,
|
||||
debug_str_offsets_sec):
|
||||
""" config:
|
||||
A DwarfConfig object
|
||||
|
||||
debug_*_sec:
|
||||
DebugSectionDescriptor for a section. Pass None for sections
|
||||
that don't exist. These arguments are best given with
|
||||
keyword syntax.
|
||||
"""
|
||||
self.config = config
|
||||
self.debug_info_sec = debug_info_sec
|
||||
self.debug_aranges_sec = debug_aranges_sec
|
||||
self.debug_abbrev_sec = debug_abbrev_sec
|
||||
self.debug_frame_sec = debug_frame_sec
|
||||
self.eh_frame_sec = eh_frame_sec
|
||||
self.debug_str_sec = debug_str_sec
|
||||
self.debug_loc_sec = debug_loc_sec
|
||||
self.debug_ranges_sec = debug_ranges_sec
|
||||
self.debug_line_sec = debug_line_sec
|
||||
self.debug_pubtypes_sec = debug_pubtypes_sec
|
||||
self.debug_pubnames_sec = debug_pubnames_sec
|
||||
|
||||
# This is the DWARFStructs the context uses, so it doesn't depend on
|
||||
# DWARF format and address_size (these are determined per CU) - set them
|
||||
# to default values.
|
||||
self.structs = DWARFStructs(
|
||||
little_endian=self.config.little_endian,
|
||||
dwarf_format=32,
|
||||
address_size=self.config.default_address_size)
|
||||
|
||||
# Cache for abbrev tables: a dict keyed by offset
|
||||
self._abbrevtable_cache = {}
|
||||
|
||||
# Cache of compile units and map of their offsets for bisect lookup.
|
||||
# Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at().
|
||||
self._cu_cache = []
|
||||
self._cu_offsets_map = []
|
||||
|
||||
@property
|
||||
def has_debug_info(self):
|
||||
""" Return whether this contains debug information.
|
||||
|
||||
It can be not the case when the ELF only contains .eh_frame, which is
|
||||
encoded DWARF but not actually for debugging.
|
||||
"""
|
||||
return bool(self.debug_info_sec)
|
||||
|
||||
def get_DIE_from_lut_entry(self, lut_entry):
|
||||
""" Get the DIE from the pubnames or putbtypes lookup table entry.
|
||||
|
||||
lut_entry:
|
||||
A NameLUTEntry object from a NameLUT instance (see
|
||||
.get_pubmames and .get_pubtypes methods).
|
||||
"""
|
||||
cu = self.get_CU_at(lut_entry.cu_ofs)
|
||||
return self.get_DIE_from_refaddr(lut_entry.die_ofs, cu)
|
||||
|
||||
def get_DIE_from_refaddr(self, refaddr, cu=None):
|
||||
""" Given a .debug_info section offset of a DIE, return the DIE.
|
||||
|
||||
refaddr:
|
||||
The refaddr may come from a DW_FORM_ref_addr attribute.
|
||||
|
||||
cu:
|
||||
The compile unit object, if known. If None a search
|
||||
from the closest offset less than refaddr will be performed.
|
||||
"""
|
||||
if cu is None:
|
||||
cu = self.get_CU_containing(refaddr)
|
||||
return cu.get_DIE_from_refaddr(refaddr)
|
||||
|
||||
def get_CU_containing(self, refaddr):
|
||||
""" Find the CU that includes the given reference address in the
|
||||
.debug_info section.
|
||||
|
||||
refaddr:
|
||||
Either a refaddr of a DIE (possibly from a DW_FORM_ref_addr
|
||||
attribute) or the section offset of a CU (possibly from an
|
||||
aranges table).
|
||||
|
||||
This function will parse and cache CUs until the search criteria
|
||||
is met, starting from the closest known offset lessthan or equal
|
||||
to the given address.
|
||||
"""
|
||||
dwarf_assert(
|
||||
self.has_debug_info,
|
||||
'CU lookup but no debug info section')
|
||||
dwarf_assert(
|
||||
0 <= refaddr < self.debug_info_sec.size,
|
||||
"refaddr %s beyond .debug_info size" % refaddr)
|
||||
|
||||
# The CU containing the DIE we desire will be to the right of the
|
||||
# DIE insert point. If we have a CU address, then it will be a
|
||||
# match but the right insert minus one will still be the item.
|
||||
# The first CU starts at offset 0, so start there if cache is empty.
|
||||
i = bisect_right(self._cu_offsets_map, refaddr)
|
||||
start = self._cu_offsets_map[i - 1] if i > 0 else 0
|
||||
|
||||
# parse CUs until we find one containing the desired address
|
||||
for cu in self._parse_CUs_iter(start):
|
||||
if cu.cu_offset <= refaddr < cu.cu_offset + cu.size:
|
||||
return cu
|
||||
|
||||
raise ValueError("CU for reference address %s not found" % refaddr)
|
||||
|
||||
def get_CU_at(self, offset):
|
||||
""" Given a CU header offset, return the parsed CU.
|
||||
|
||||
offset:
|
||||
The offset may be from an accelerated access table such as
|
||||
the public names, public types, address range table, or
|
||||
prior use.
|
||||
|
||||
This function will directly parse the CU doing no validation of
|
||||
the offset beyond checking the size of the .debug_info section.
|
||||
"""
|
||||
dwarf_assert(
|
||||
self.has_debug_info,
|
||||
'CU lookup but no debug info section')
|
||||
dwarf_assert(
|
||||
0 <= offset < self.debug_info_sec.size,
|
||||
"offset %s beyond .debug_info size" % offset)
|
||||
|
||||
return self._cached_CU_at_offset(offset)
|
||||
|
||||
def iter_CUs(self):
|
||||
""" Yield all the compile units (CompileUnit objects) in the debug info
|
||||
"""
|
||||
return self._parse_CUs_iter()
|
||||
|
||||
def get_abbrev_table(self, offset):
|
||||
""" Get an AbbrevTable from the given offset in the debug_abbrev
|
||||
section.
|
||||
|
||||
The only verification done on the offset is that it's within the
|
||||
bounds of the section (if not, an exception is raised).
|
||||
It is the caller's responsibility to make sure the offset actually
|
||||
points to a valid abbreviation table.
|
||||
|
||||
AbbrevTable objects are cached internally (two calls for the same
|
||||
offset will return the same object).
|
||||
"""
|
||||
dwarf_assert(
|
||||
offset < self.debug_abbrev_sec.size,
|
||||
"Offset '0x%x' to abbrev table out of section bounds" % offset)
|
||||
if offset not in self._abbrevtable_cache:
|
||||
self._abbrevtable_cache[offset] = AbbrevTable(
|
||||
structs=self.structs,
|
||||
stream=self.debug_abbrev_sec.stream,
|
||||
offset=offset)
|
||||
return self._abbrevtable_cache[offset]
|
||||
|
||||
def get_string_from_table(self, offset):
|
||||
""" Obtain a string from the string table section, given an offset
|
||||
relative to the section.
|
||||
"""
|
||||
return parse_cstring_from_stream(self.debug_str_sec.stream, offset)
|
||||
|
||||
def line_program_for_CU(self, CU):
|
||||
""" Given a CU object, fetch the line program it points to from the
|
||||
.debug_line section.
|
||||
If the CU doesn't point to a line program, return None.
|
||||
"""
|
||||
# The line program is pointed to by the DW_AT_stmt_list attribute of
|
||||
# the top DIE of a CU.
|
||||
top_DIE = CU.get_top_DIE()
|
||||
if 'DW_AT_stmt_list' in top_DIE.attributes:
|
||||
return self._parse_line_program_at_offset(
|
||||
top_DIE.attributes['DW_AT_stmt_list'].value, CU.structs)
|
||||
else:
|
||||
return None
|
||||
|
||||
def has_CFI(self):
|
||||
""" Does this dwarf info have a dwarf_frame CFI section?
|
||||
"""
|
||||
return self.debug_frame_sec is not None
|
||||
|
||||
def CFI_entries(self):
|
||||
""" Get a list of dwarf_frame CFI entries from the .debug_frame section.
|
||||
"""
|
||||
cfi = CallFrameInfo(
|
||||
stream=self.debug_frame_sec.stream,
|
||||
size=self.debug_frame_sec.size,
|
||||
address=self.debug_frame_sec.address,
|
||||
base_structs=self.structs)
|
||||
return cfi.get_entries()
|
||||
|
||||
def has_EH_CFI(self):
|
||||
""" Does this dwarf info have a eh_frame CFI section?
|
||||
"""
|
||||
return self.eh_frame_sec is not None
|
||||
|
||||
def EH_CFI_entries(self):
|
||||
""" Get a list of eh_frame CFI entries from the .eh_frame section.
|
||||
"""
|
||||
cfi = CallFrameInfo(
|
||||
stream=self.eh_frame_sec.stream,
|
||||
size=self.eh_frame_sec.size,
|
||||
address=self.eh_frame_sec.address,
|
||||
base_structs=self.structs,
|
||||
for_eh_frame=True)
|
||||
return cfi.get_entries()
|
||||
|
||||
def get_pubtypes(self):
|
||||
"""
|
||||
Returns a NameLUT object that contains information read from the
|
||||
.debug_pubtypes section in the ELF file.
|
||||
|
||||
NameLUT is essentially a dictionary containing the CU/DIE offsets of
|
||||
each symbol. See the NameLUT doc string for more details.
|
||||
"""
|
||||
|
||||
if self.debug_pubtypes_sec:
|
||||
return NameLUT(self.debug_pubtypes_sec.stream,
|
||||
self.debug_pubtypes_sec.size,
|
||||
self.structs)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_pubnames(self):
|
||||
"""
|
||||
Returns a NameLUT object that contains information read from the
|
||||
.debug_pubnames section in the ELF file.
|
||||
|
||||
NameLUT is essentially a dictionary containing the CU/DIE offsets of
|
||||
each symbol. See the NameLUT doc string for more details.
|
||||
"""
|
||||
|
||||
if self.debug_pubnames_sec:
|
||||
return NameLUT(self.debug_pubnames_sec.stream,
|
||||
self.debug_pubnames_sec.size,
|
||||
self.structs)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_aranges(self):
|
||||
""" Get an ARanges object representing the .debug_aranges section of
|
||||
the DWARF data, or None if the section doesn't exist
|
||||
"""
|
||||
if self.debug_aranges_sec:
|
||||
return ARanges(self.debug_aranges_sec.stream,
|
||||
self.debug_aranges_sec.size,
|
||||
self.structs)
|
||||
else:
|
||||
return None
|
||||
|
||||
def location_lists(self):
|
||||
""" Get a LocationLists object representing the .debug_loc section of
|
||||
the DWARF data, or None if this section doesn't exist.
|
||||
"""
|
||||
if self.debug_loc_sec:
|
||||
return LocationLists(self.debug_loc_sec.stream, self.structs)
|
||||
else:
|
||||
return None
|
||||
|
||||
def range_lists(self):
|
||||
""" Get a RangeLists object representing the .debug_ranges section of
|
||||
the DWARF data, or None if this section doesn't exist.
|
||||
"""
|
||||
if self.debug_ranges_sec:
|
||||
return RangeLists(self.debug_ranges_sec.stream, self.structs)
|
||||
else:
|
||||
return None
|
||||
|
||||
#------ PRIVATE ------#
|
||||
|
||||
def _parse_CUs_iter(self, offset=0):
|
||||
""" Iterate CU objects in order of appearance in the debug_info section.
|
||||
|
||||
offset:
|
||||
The offset of the first CU to yield. Additional iterations
|
||||
will return the sequential unit objects.
|
||||
|
||||
See .iter_CUs(), .get_CU_containing(), and .get_CU_at().
|
||||
"""
|
||||
if self.debug_info_sec is None:
|
||||
return
|
||||
|
||||
while offset < self.debug_info_sec.size:
|
||||
cu = self._cached_CU_at_offset(offset)
|
||||
# Compute the offset of the next CU in the section. The unit_length
|
||||
# field of the CU header contains its size not including the length
|
||||
# field itself.
|
||||
offset = ( offset +
|
||||
cu['unit_length'] +
|
||||
cu.structs.initial_length_field_size())
|
||||
yield cu
|
||||
|
||||
def _cached_CU_at_offset(self, offset):
|
||||
""" Return the CU with unit header at the given offset into the
|
||||
debug_info section from the cache. If not present, the unit is
|
||||
header is parsed and the object is installed in the cache.
|
||||
|
||||
offset:
|
||||
The offset of the unit header in the .debug_info section
|
||||
to of the unit to fetch from the cache.
|
||||
|
||||
See get_CU_at().
|
||||
"""
|
||||
# Find the insert point for the requested offset. With bisect_right,
|
||||
# if this entry is present in the cache it will be the prior entry.
|
||||
i = bisect_right(self._cu_offsets_map, offset)
|
||||
if i >= 1 and offset == self._cu_offsets_map[i - 1]:
|
||||
return self._cu_cache[i - 1]
|
||||
|
||||
# Parse the CU and insert the offset and object into the cache.
|
||||
# The ._cu_offsets_map[] contains just the numeric offsets for the
|
||||
# bisect_right search while the parallel indexed ._cu_cache[] holds
|
||||
# the object references.
|
||||
cu = self._parse_CU_at_offset(offset)
|
||||
self._cu_offsets_map.insert(i, offset)
|
||||
self._cu_cache.insert(i, cu)
|
||||
return cu
|
||||
|
||||
def _parse_CU_at_offset(self, offset):
|
||||
""" Parse and return a CU at the given offset in the debug_info stream.
|
||||
"""
|
||||
# Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3
|
||||
# states that the first 32-bit word of the CU header determines
|
||||
# whether the CU is represented with 32-bit or 64-bit DWARF format.
|
||||
#
|
||||
# So we peek at the first word in the CU header to determine its
|
||||
# dwarf format. Based on it, we then create a new DWARFStructs
|
||||
# instance suitable for this CU and use it to parse the rest.
|
||||
#
|
||||
initial_length = struct_parse(
|
||||
self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset)
|
||||
dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
|
||||
|
||||
|
||||
# Temporary structs for parsing the header
|
||||
# The structs for the rest of the CU depend on the header data.
|
||||
#
|
||||
cu_structs = DWARFStructs(
|
||||
little_endian=self.config.little_endian,
|
||||
dwarf_format=dwarf_format,
|
||||
address_size=4,
|
||||
dwarf_version=2)
|
||||
|
||||
cu_header = struct_parse(
|
||||
cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset)
|
||||
|
||||
# structs for the rest of the CU, taking into account bitness and DWARF version
|
||||
cu_structs = DWARFStructs(
|
||||
little_endian=self.config.little_endian,
|
||||
dwarf_format=dwarf_format,
|
||||
address_size=cu_header['address_size'],
|
||||
dwarf_version=cu_header['version'])
|
||||
|
||||
cu_die_offset = self.debug_info_sec.stream.tell()
|
||||
dwarf_assert(
|
||||
self._is_supported_version(cu_header['version']),
|
||||
"Expected supported DWARF version. Got '%s'" % cu_header['version'])
|
||||
return CompileUnit(
|
||||
header=cu_header,
|
||||
dwarfinfo=self,
|
||||
structs=cu_structs,
|
||||
cu_offset=offset,
|
||||
cu_die_offset=cu_die_offset)
|
||||
|
||||
def _is_supported_version(self, version):
|
||||
""" DWARF version supported by this parser
|
||||
"""
|
||||
return 2 <= version <= 5
|
||||
|
||||
def _parse_line_program_at_offset(self, debug_line_offset, structs):
|
||||
""" Given an offset to the .debug_line section, parse the line program
|
||||
starting at this offset in the section and return it.
|
||||
structs is the DWARFStructs object used to do this parsing.
|
||||
"""
|
||||
lineprog_header = struct_parse(
|
||||
structs.Dwarf_lineprog_header,
|
||||
self.debug_line_sec.stream,
|
||||
debug_line_offset)
|
||||
|
||||
# Calculate the offset to the next line program (see DWARF 6.2.4)
|
||||
end_offset = ( debug_line_offset + lineprog_header['unit_length'] +
|
||||
structs.initial_length_field_size())
|
||||
|
||||
return LineProgram(
|
||||
header=lineprog_header,
|
||||
stream=self.debug_line_sec.stream,
|
||||
structs=structs,
|
||||
program_start_offset=self.debug_line_sec.stream.tell(),
|
||||
program_end_offset=end_offset)
|
||||
@@ -1,396 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/enums.py
|
||||
#
|
||||
# Mappings of enum names to values
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..construct import Pass
|
||||
from ..common.py3compat import iteritems
|
||||
|
||||
|
||||
ENUM_DW_TAG = dict(
|
||||
DW_TAG_null = 0x00,
|
||||
DW_TAG_array_type = 0x01,
|
||||
DW_TAG_class_type = 0x02,
|
||||
DW_TAG_entry_point = 0x03,
|
||||
DW_TAG_enumeration_type = 0x04,
|
||||
DW_TAG_formal_parameter = 0x05,
|
||||
DW_TAG_global_subroutine = 0x06,
|
||||
DW_TAG_global_variable = 0x07,
|
||||
DW_TAG_imported_declaration = 0x08,
|
||||
DW_TAG_label = 0x0a,
|
||||
DW_TAG_lexical_block = 0x0b,
|
||||
DW_TAG_local_variable = 0x0c,
|
||||
DW_TAG_member = 0x0d,
|
||||
DW_TAG_pointer_type = 0x0f,
|
||||
DW_TAG_reference_type = 0x10,
|
||||
DW_TAG_compile_unit = 0x11,
|
||||
DW_TAG_string_type = 0x12,
|
||||
DW_TAG_structure_type = 0x13,
|
||||
DW_TAG_subroutine = 0x14,
|
||||
DW_TAG_subroutine_type = 0x15,
|
||||
DW_TAG_typedef = 0x16,
|
||||
DW_TAG_union_type = 0x17,
|
||||
DW_TAG_unspecified_parameters = 0x18,
|
||||
DW_TAG_variant = 0x19,
|
||||
DW_TAG_common_block = 0x1a,
|
||||
DW_TAG_common_inclusion = 0x1b,
|
||||
DW_TAG_inheritance = 0x1c,
|
||||
DW_TAG_inlined_subroutine = 0x1d,
|
||||
DW_TAG_module = 0x1e,
|
||||
DW_TAG_ptr_to_member_type = 0x1f,
|
||||
DW_TAG_set_type = 0x20,
|
||||
DW_TAG_subrange_type = 0x21,
|
||||
DW_TAG_with_stmt = 0x22,
|
||||
DW_TAG_access_declaration = 0x23,
|
||||
DW_TAG_base_type = 0x24,
|
||||
DW_TAG_catch_block = 0x25,
|
||||
DW_TAG_const_type = 0x26,
|
||||
DW_TAG_constant = 0x27,
|
||||
DW_TAG_enumerator = 0x28,
|
||||
DW_TAG_file_type = 0x29,
|
||||
DW_TAG_friend = 0x2a,
|
||||
DW_TAG_namelist = 0x2b,
|
||||
DW_TAG_namelist_item = 0x2c,
|
||||
DW_TAG_namelist_items = 0x2c,
|
||||
DW_TAG_packed_type = 0x2d,
|
||||
DW_TAG_subprogram = 0x2e,
|
||||
|
||||
# The DWARF standard defines these as _parameter, not _param, but we
|
||||
# maintain compatibility with readelf.
|
||||
DW_TAG_template_type_param = 0x2f,
|
||||
DW_TAG_template_value_param = 0x30,
|
||||
|
||||
DW_TAG_thrown_type = 0x31,
|
||||
DW_TAG_try_block = 0x32,
|
||||
DW_TAG_variant_part = 0x33,
|
||||
DW_TAG_variable = 0x34,
|
||||
DW_TAG_volatile_type = 0x35,
|
||||
DW_TAG_dwarf_procedure = 0x36,
|
||||
DW_TAG_restrict_type = 0x37,
|
||||
DW_TAG_interface_type = 0x38,
|
||||
DW_TAG_namespace = 0x39,
|
||||
DW_TAG_imported_module = 0x3a,
|
||||
DW_TAG_unspecified_type = 0x3b,
|
||||
DW_TAG_partial_unit = 0x3c,
|
||||
DW_TAG_imported_unit = 0x3d,
|
||||
DW_TAG_mutable_type = 0x3e,
|
||||
DW_TAG_condition = 0x3f,
|
||||
DW_TAG_shared_type = 0x40,
|
||||
DW_TAG_type_unit = 0x41,
|
||||
DW_TAG_rvalue_reference_type = 0x42,
|
||||
DW_TAG_atomic_type = 0x47,
|
||||
DW_TAG_call_site = 0x48,
|
||||
DW_TAG_call_site_parameter = 0x49,
|
||||
DW_TAG_skeleton_unit = 0x4a,
|
||||
DW_TAG_immutable_type = 0x4b,
|
||||
|
||||
|
||||
|
||||
DW_TAG_lo_user = 0x4080,
|
||||
DW_TAG_GNU_template_template_param = 0x4106,
|
||||
DW_TAG_GNU_template_parameter_pack = 0x4107,
|
||||
DW_TAG_GNU_formal_parameter_pack = 0x4108,
|
||||
DW_TAG_GNU_call_site = 0x4109,
|
||||
DW_TAG_GNU_call_site_parameter = 0x410a,
|
||||
|
||||
DW_TAG_APPLE_property = 0x4200,
|
||||
|
||||
DW_TAG_hi_user = 0xffff,
|
||||
|
||||
_default_ = Pass,
|
||||
)
|
||||
|
||||
|
||||
ENUM_DW_CHILDREN = dict(
|
||||
DW_CHILDREN_no = 0x00,
|
||||
DW_CHILDREN_yes = 0x01,
|
||||
)
|
||||
|
||||
|
||||
ENUM_DW_AT = dict(
|
||||
DW_AT_null = 0x00,
|
||||
DW_AT_sibling = 0x01,
|
||||
DW_AT_location = 0x02,
|
||||
DW_AT_name = 0x03,
|
||||
DW_AT_fund_type = 0x05,
|
||||
DW_AT_mod_fund_type = 0x06,
|
||||
DW_AT_user_def_type = 0x07,
|
||||
DW_AT_mod_u_d_type = 0x08,
|
||||
DW_AT_ordering = 0x09,
|
||||
DW_AT_subscr_data = 0x0a,
|
||||
DW_AT_byte_size = 0x0b,
|
||||
DW_AT_bit_offset = 0x0c,
|
||||
DW_AT_bit_size = 0x0d,
|
||||
DW_AT_element_list = 0x0f,
|
||||
DW_AT_stmt_list = 0x10,
|
||||
DW_AT_low_pc = 0x11,
|
||||
DW_AT_high_pc = 0x12,
|
||||
DW_AT_language = 0x13,
|
||||
DW_AT_member = 0x14,
|
||||
DW_AT_discr = 0x15,
|
||||
DW_AT_discr_value = 0x16,
|
||||
DW_AT_visibility = 0x17,
|
||||
DW_AT_import = 0x18,
|
||||
DW_AT_string_length = 0x19,
|
||||
DW_AT_common_reference = 0x1a,
|
||||
DW_AT_comp_dir = 0x1b,
|
||||
DW_AT_const_value = 0x1c,
|
||||
DW_AT_containing_type = 0x1d,
|
||||
DW_AT_default_value = 0x1e,
|
||||
DW_AT_inline = 0x20,
|
||||
DW_AT_is_optional = 0x21,
|
||||
DW_AT_lower_bound = 0x22,
|
||||
DW_AT_program = 0x23,
|
||||
DW_AT_private = 0x24,
|
||||
DW_AT_producer = 0x25,
|
||||
DW_AT_protected = 0x26,
|
||||
DW_AT_prototyped = 0x27,
|
||||
DW_AT_public = 0x28,
|
||||
DW_AT_return_addr = 0x2a,
|
||||
DW_AT_start_scope = 0x2c,
|
||||
DW_AT_bit_stride = 0x2e,
|
||||
DW_AT_stride_size = 0x2e,
|
||||
DW_AT_upper_bound = 0x2f,
|
||||
DW_AT_virtual = 0x30,
|
||||
DW_AT_abstract_origin = 0x31,
|
||||
DW_AT_accessibility = 0x32,
|
||||
DW_AT_address_class = 0x33,
|
||||
DW_AT_artificial = 0x34,
|
||||
DW_AT_base_types = 0x35,
|
||||
DW_AT_calling_convention = 0x36,
|
||||
DW_AT_count = 0x37,
|
||||
DW_AT_data_member_location = 0x38,
|
||||
DW_AT_decl_column = 0x39,
|
||||
DW_AT_decl_file = 0x3a,
|
||||
DW_AT_decl_line = 0x3b,
|
||||
DW_AT_declaration = 0x3c,
|
||||
DW_AT_discr_list = 0x3d,
|
||||
DW_AT_encoding = 0x3e,
|
||||
DW_AT_external = 0x3f,
|
||||
DW_AT_frame_base = 0x40,
|
||||
DW_AT_friend = 0x41,
|
||||
DW_AT_identifier_case = 0x42,
|
||||
DW_AT_macro_info = 0x43,
|
||||
DW_AT_namelist_item = 0x44,
|
||||
DW_AT_priority = 0x45,
|
||||
DW_AT_segment = 0x46,
|
||||
DW_AT_specification = 0x47,
|
||||
DW_AT_static_link = 0x48,
|
||||
DW_AT_type = 0x49,
|
||||
DW_AT_use_location = 0x4a,
|
||||
DW_AT_variable_parameter = 0x4b,
|
||||
DW_AT_virtuality = 0x4c,
|
||||
DW_AT_vtable_elem_location = 0x4d,
|
||||
DW_AT_allocated = 0x4e,
|
||||
DW_AT_associated = 0x4f,
|
||||
DW_AT_data_location = 0x50,
|
||||
DW_AT_byte_stride = 0x51,
|
||||
DW_AT_stride = 0x51,
|
||||
DW_AT_entry_pc = 0x52,
|
||||
DW_AT_use_UTF8 = 0x53,
|
||||
DW_AT_extension = 0x54,
|
||||
DW_AT_ranges = 0x55,
|
||||
DW_AT_trampoline = 0x56,
|
||||
DW_AT_call_column = 0x57,
|
||||
DW_AT_call_file = 0x58,
|
||||
DW_AT_call_line = 0x59,
|
||||
DW_AT_description = 0x5a,
|
||||
DW_AT_binary_scale = 0x5b,
|
||||
DW_AT_decimal_scale = 0x5c,
|
||||
DW_AT_small = 0x5d,
|
||||
DW_AT_decimal_sign = 0x5e,
|
||||
DW_AT_digit_count = 0x5f,
|
||||
DW_AT_picture_string = 0x60,
|
||||
DW_AT_mutable = 0x61,
|
||||
DW_AT_threads_scaled = 0x62,
|
||||
DW_AT_explicit = 0x63,
|
||||
DW_AT_object_pointer = 0x64,
|
||||
DW_AT_endianity = 0x65,
|
||||
DW_AT_elemental = 0x66,
|
||||
DW_AT_pure = 0x67,
|
||||
DW_AT_recursive = 0x68,
|
||||
DW_AT_signature = 0x69,
|
||||
DW_AT_main_subprogram = 0x6a,
|
||||
DW_AT_data_bit_offset = 0x6b,
|
||||
DW_AT_const_expr = 0x6c,
|
||||
DW_AT_enum_class = 0x6d,
|
||||
DW_AT_linkage_name = 0x6e,
|
||||
DW_AT_string_length_bit_size = 0x6f,
|
||||
DW_AT_string_length_byte_size = 0x70,
|
||||
DW_AT_rank = 0x71,
|
||||
DW_AT_str_offsets_base = 0x72,
|
||||
DW_AT_addr_base = 0x73,
|
||||
DW_AT_rnglists_base = 0x74,
|
||||
DW_AT_dwo_name = 0x76,
|
||||
DW_AT_reference = 0x77,
|
||||
DW_AT_rvalue_reference = 0x78,
|
||||
DW_AT_macros = 0x79,
|
||||
DW_AT_call_all_calls = 0x7a,
|
||||
DW_AT_call_all_source_calls = 0x7b,
|
||||
DW_AT_call_all_tail_calls = 0x7c,
|
||||
DW_AT_call_return_pc = 0x7d,
|
||||
DW_AT_call_value = 0x7e,
|
||||
DW_AT_call_origin = 0x7f,
|
||||
DW_AT_call_parameter = 0x80,
|
||||
DW_AT_call_pc = 0x81,
|
||||
DW_AT_call_tail_call = 0x82,
|
||||
DW_AT_call_target = 0x83,
|
||||
DW_AT_call_target_clobbered = 0x84,
|
||||
DW_AT_call_data_location = 0x85,
|
||||
DW_AT_call_data_value = 0x86,
|
||||
DW_AT_noreturn = 0x87,
|
||||
DW_AT_alignment = 0x88,
|
||||
DW_AT_export_symbols = 0x89,
|
||||
DW_AT_deleted = 0x8a,
|
||||
DW_AT_defaulted = 0x8b,
|
||||
DW_AT_loclists_base = 0x8c,
|
||||
|
||||
DW_AT_MIPS_fde = 0x2001,
|
||||
DW_AT_MIPS_loop_begin = 0x2002,
|
||||
DW_AT_MIPS_tail_loop_begin = 0x2003,
|
||||
DW_AT_MIPS_epilog_begin = 0x2004,
|
||||
DW_AT_MIPS_loop_unroll_factor = 0x2005,
|
||||
DW_AT_MIPS_software_pipeline_depth = 0x2006,
|
||||
DW_AT_MIPS_linkage_name = 0x2007,
|
||||
DW_AT_MIPS_stride = 0x2008,
|
||||
DW_AT_MIPS_abstract_name = 0x2009,
|
||||
DW_AT_MIPS_clone_origin = 0x200a,
|
||||
DW_AT_MIPS_has_inlines = 0x200b,
|
||||
DW_AT_MIPS_stride_byte = 0x200c,
|
||||
DW_AT_MIPS_stride_elem = 0x200d,
|
||||
DW_AT_MIPS_ptr_dopetype = 0x200e,
|
||||
DW_AT_MIPS_allocatable_dopetype = 0x200f,
|
||||
DW_AT_MIPS_assumed_shape_dopetype = 0x2010,
|
||||
DW_AT_MIPS_assumed_size = 0x2011,
|
||||
|
||||
DW_AT_sf_names = 0x2101,
|
||||
DW_AT_src_info = 0x2102,
|
||||
DW_AT_mac_info = 0x2103,
|
||||
DW_AT_src_coords = 0x2104,
|
||||
DW_AT_body_begin = 0x2105,
|
||||
DW_AT_body_end = 0x2106,
|
||||
DW_AT_GNU_vector = 0x2107,
|
||||
DW_AT_GNU_template_name = 0x2110,
|
||||
DW_AT_GNU_odr_signature = 0x210f,
|
||||
|
||||
DW_AT_GNU_call_site_value = 0x2111,
|
||||
DW_AT_GNU_call_site_data_value = 0x2112,
|
||||
DW_AT_GNU_call_site_target = 0x2113,
|
||||
DW_AT_GNU_call_site_target_clobbered = 0x2114,
|
||||
DW_AT_GNU_tail_call = 0x2115,
|
||||
DW_AT_GNU_all_tail_call_sites = 0x2116,
|
||||
DW_AT_GNU_all_call_sites = 0x2117,
|
||||
DW_AT_GNU_all_source_call_sites = 0x2118,
|
||||
DW_AT_GNU_macros = 0x2119,
|
||||
DW_AT_GNU_deleted = 0x211a,
|
||||
DW_AT_GNU_dwo_id = 0x2131,
|
||||
DW_AT_GNU_pubnames = 0x2134,
|
||||
DW_AT_GNU_pubtypes = 0x2135,
|
||||
DW_AT_GNU_discriminator = 0x2136,
|
||||
|
||||
DW_AT_LLVM_include_path = 0x3e00,
|
||||
DW_AT_LLVM_config_macros = 0x3e01,
|
||||
DW_AT_LLVM_isysroot = 0x3e02,
|
||||
DW_AT_LLVM_tag_offset = 0x3e03,
|
||||
|
||||
DW_AT_APPLE_optimized = 0x3fe1,
|
||||
DW_AT_APPLE_flags = 0x3fe2,
|
||||
DW_AT_APPLE_isa = 0x3fe3,
|
||||
DW_AT_APPLE_block = 0x3fe4,
|
||||
DW_AT_APPLE_major_runtime_vers = 0x3fe5,
|
||||
DW_AT_APPLE_runtime_class = 0x3fe6,
|
||||
DW_AT_APPLE_omit_frame_ptr = 0x3fe7,
|
||||
DW_AT_APPLE_property_name = 0x3fe8,
|
||||
DW_AT_APPLE_property_getter = 0x3fe9,
|
||||
DW_AT_APPLE_property_setter = 0x3fea,
|
||||
DW_AT_APPLE_property_attribute = 0x3feb,
|
||||
DW_AT_APPLE_objc_complete_type = 0x3fec,
|
||||
DW_AT_APPLE_property = 0x3fed,
|
||||
|
||||
_default_ = Pass,
|
||||
)
|
||||
|
||||
|
||||
ENUM_DW_FORM = dict(
|
||||
DW_FORM_null = 0x00,
|
||||
DW_FORM_addr = 0x01,
|
||||
DW_FORM_ref = 0x02,
|
||||
DW_FORM_block2 = 0x03,
|
||||
DW_FORM_block4 = 0x04,
|
||||
DW_FORM_data2 = 0x05,
|
||||
DW_FORM_data4 = 0x06,
|
||||
DW_FORM_data8 = 0x07,
|
||||
DW_FORM_string = 0x08,
|
||||
DW_FORM_block = 0x09,
|
||||
DW_FORM_block1 = 0x0a,
|
||||
DW_FORM_data1 = 0x0b,
|
||||
DW_FORM_flag = 0x0c,
|
||||
DW_FORM_sdata = 0x0d,
|
||||
DW_FORM_strp = 0x0e,
|
||||
DW_FORM_udata = 0x0f,
|
||||
DW_FORM_ref_addr = 0x10,
|
||||
DW_FORM_ref1 = 0x11,
|
||||
DW_FORM_ref2 = 0x12,
|
||||
DW_FORM_ref4 = 0x13,
|
||||
DW_FORM_ref8 = 0x14,
|
||||
DW_FORM_ref_udata = 0x15,
|
||||
DW_FORM_indirect = 0x16,
|
||||
DW_FORM_sec_offset = 0x17,
|
||||
DW_FORM_exprloc = 0x18,
|
||||
DW_FORM_flag_present = 0x19,
|
||||
DW_FORM_strx = 0x1a,
|
||||
DW_FORM_addrx = 0x1b,
|
||||
DW_FORM_ref_sup4 = 0x1c,
|
||||
DW_FORM_strp_sup = 0x1d,
|
||||
DW_FORM_data16 = 0x1e,
|
||||
DW_FORM_line_strp = 0x1f,
|
||||
DW_FORM_ref_sig8 = 0x20,
|
||||
DW_FORM_implicit_const = 0x21,
|
||||
DW_FORM_loclistx = 0x22,
|
||||
DW_FORM_rnglistx = 0x23,
|
||||
DW_FORM_ref_sup8 = 0x24,
|
||||
DW_FORM_strx1 = 0x25,
|
||||
DW_FORM_strx2 = 0x26,
|
||||
DW_FORM_strx3 = 0x27,
|
||||
DW_FORM_strx4 = 0x28,
|
||||
DW_FORM_addrx1 = 0x29,
|
||||
DW_FORM_addrx2 = 0x2a,
|
||||
DW_FORM_addrx3 = 0x2b,
|
||||
DW_FORM_addrx4 = 0x2c,
|
||||
|
||||
DW_FORM_GNU_addr_index = 0x1f01,
|
||||
DW_FORM_GNU_str_index = 0x1f02,
|
||||
DW_FORM_GNU_ref_alt = 0x1f20,
|
||||
DW_FORM_GNU_strp_alt = 0x1f21,
|
||||
_default_ = Pass,
|
||||
)
|
||||
|
||||
# Inverse mapping for ENUM_DW_FORM
|
||||
DW_FORM_raw2name = dict((v, k) for k, v in iteritems(ENUM_DW_FORM))
|
||||
|
||||
# See http://www.airs.com/blog/archives/460
|
||||
DW_EH_encoding_flags = dict(
|
||||
DW_EH_PE_absptr = 0x00,
|
||||
DW_EH_PE_uleb128 = 0x01,
|
||||
DW_EH_PE_udata2 = 0x02,
|
||||
DW_EH_PE_udata4 = 0x03,
|
||||
DW_EH_PE_udata8 = 0x04,
|
||||
|
||||
DW_EH_PE_signed = 0x08,
|
||||
DW_EH_PE_sleb128 = 0x09,
|
||||
DW_EH_PE_sdata2 = 0x0a,
|
||||
DW_EH_PE_sdata4 = 0x0b,
|
||||
DW_EH_PE_sdata8 = 0x0c,
|
||||
|
||||
DW_EH_PE_pcrel = 0x10,
|
||||
DW_EH_PE_textrel = 0x20,
|
||||
DW_EH_PE_datarel = 0x30,
|
||||
DW_EH_PE_funcrel = 0x40,
|
||||
DW_EH_PE_aligned = 0x50,
|
||||
DW_EH_PE_indirect = 0x80,
|
||||
|
||||
DW_EH_PE_omit = 0xff,
|
||||
)
|
||||
@@ -1,262 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/lineprogram.py
|
||||
#
|
||||
# DWARF line number program
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import os
|
||||
import copy
|
||||
from collections import namedtuple
|
||||
|
||||
from ..common.utils import struct_parse, dwarf_assert
|
||||
from .constants import *
|
||||
|
||||
|
||||
# LineProgramEntry - an entry in the line program.
|
||||
# A line program is a sequence of encoded entries. Some of these entries add a
|
||||
# new LineState (mapping between line and address), and some don't.
|
||||
#
|
||||
# command:
|
||||
# The command/opcode - always numeric. For standard commands - it's the opcode
|
||||
# that can be matched with one of the DW_LNS_* constants. For extended commands
|
||||
# it's the extended opcode that can be matched with one of the DW_LNE_*
|
||||
# constants. For special commands, it's the opcode itself.
|
||||
#
|
||||
# args:
|
||||
# A list of decoded arguments of the command.
|
||||
#
|
||||
# is_extended:
|
||||
# Since extended commands are encoded by a zero followed by an extended
|
||||
# opcode, and these extended opcodes overlap with other opcodes, this
|
||||
# flag is needed to mark that the command has an extended opcode.
|
||||
#
|
||||
# state:
|
||||
# For commands that add a new state, it's the relevant LineState object.
|
||||
# For commands that don't add a new state, it's None.
|
||||
#
|
||||
LineProgramEntry = namedtuple(
|
||||
'LineProgramEntry', 'command is_extended args state')
|
||||
|
||||
|
||||
class LineState(object):
|
||||
""" Represents a line program state (or a "row" in the matrix
|
||||
describing debug location information for addresses).
|
||||
The instance variables of this class are the "state machine registers"
|
||||
described in section 6.2.2 of DWARFv3
|
||||
"""
|
||||
def __init__(self, default_is_stmt):
|
||||
self.address = 0
|
||||
self.file = 1
|
||||
self.line = 1
|
||||
self.column = 0
|
||||
self.op_index = 0
|
||||
self.is_stmt = default_is_stmt
|
||||
self.basic_block = False
|
||||
self.end_sequence = False
|
||||
self.prologue_end = False
|
||||
self.epilogue_begin = False
|
||||
self.isa = 0
|
||||
self.discriminator = 0
|
||||
|
||||
def __repr__(self):
|
||||
a = ['<LineState %x:' % id(self)]
|
||||
a.append(' address = 0x%x' % self.address)
|
||||
for attr in ('file', 'line', 'column', 'is_stmt', 'basic_block',
|
||||
'end_sequence', 'prologue_end', 'epilogue_begin', 'isa',
|
||||
'discriminator'):
|
||||
a.append(' %s = %s' % (attr, getattr(self, attr)))
|
||||
return '\n'.join(a) + '>\n'
|
||||
|
||||
|
||||
class LineProgram(object):
|
||||
""" Builds a "line table", which is essentially the matrix described
|
||||
in section 6.2 of DWARFv3. It's a list of LineState objects,
|
||||
sorted by increasing address, so it can be used to obtain the
|
||||
state information for each address.
|
||||
"""
|
||||
def __init__(self, header, stream, structs,
|
||||
program_start_offset, program_end_offset):
|
||||
"""
|
||||
header:
|
||||
The header of this line program. Note: LineProgram may modify
|
||||
its header by appending file entries if DW_LNE_define_file
|
||||
instructions are encountered.
|
||||
|
||||
stream:
|
||||
The stream this program can be read from.
|
||||
|
||||
structs:
|
||||
A DWARFStructs instance suitable for this line program
|
||||
|
||||
program_{start|end}_offset:
|
||||
Offset in the debug_line section stream where this program
|
||||
starts (the actual program, after the header), and where it
|
||||
ends.
|
||||
The actual range includes start but not end: [start, end - 1]
|
||||
"""
|
||||
self.stream = stream
|
||||
self.header = header
|
||||
self.structs = structs
|
||||
self.program_start_offset = program_start_offset
|
||||
self.program_end_offset = program_end_offset
|
||||
self._decoded_entries = None
|
||||
|
||||
def get_entries(self):
|
||||
""" Get the decoded entries for this line program. Return a list of
|
||||
LineProgramEntry objects.
|
||||
Note that this contains more information than absolutely required
|
||||
for the line table. The line table can be easily extracted from
|
||||
the list of entries by looking only at entries with non-None
|
||||
state. The extra information is mainly for the purposes of display
|
||||
with readelf and debugging.
|
||||
"""
|
||||
if self._decoded_entries is None:
|
||||
self._decoded_entries = self._decode_line_program()
|
||||
return self._decoded_entries
|
||||
|
||||
#------ PRIVATE ------#
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to header entries
|
||||
"""
|
||||
return self.header[name]
|
||||
|
||||
def _decode_line_program(self):
|
||||
entries = []
|
||||
state = LineState(self.header['default_is_stmt'])
|
||||
|
||||
def add_entry_new_state(cmd, args, is_extended=False):
|
||||
# Add an entry that sets a new state.
|
||||
# After adding, clear some state registers.
|
||||
entries.append(LineProgramEntry(
|
||||
cmd, is_extended, args, copy.copy(state)))
|
||||
state.discriminator = 0
|
||||
state.basic_block = False
|
||||
state.prologue_end = False
|
||||
state.epilogue_begin = False
|
||||
|
||||
def add_entry_old_state(cmd, args, is_extended=False):
|
||||
# Add an entry that doesn't visibly set a new state
|
||||
entries.append(LineProgramEntry(cmd, is_extended, args, None))
|
||||
|
||||
offset = self.program_start_offset
|
||||
while offset < self.program_end_offset:
|
||||
opcode = struct_parse(
|
||||
self.structs.Dwarf_uint8(''),
|
||||
self.stream,
|
||||
offset)
|
||||
|
||||
# As an exercise in avoiding premature optimization, if...elif
|
||||
# chains are used here for standard and extended opcodes instead
|
||||
# of dispatch tables. This keeps the code much cleaner. Besides,
|
||||
# the majority of instructions in a typical program are special
|
||||
# opcodes anyway.
|
||||
if opcode >= self.header['opcode_base']:
|
||||
# Special opcode (follow the recipe in 6.2.5.1)
|
||||
maximum_operations_per_instruction = self['maximum_operations_per_instruction']
|
||||
adjusted_opcode = opcode - self['opcode_base']
|
||||
operation_advance = adjusted_opcode // self['line_range']
|
||||
address_addend = (
|
||||
self['minimum_instruction_length'] *
|
||||
((state.op_index + operation_advance) //
|
||||
maximum_operations_per_instruction))
|
||||
state.address += address_addend
|
||||
state.op_index = (state.op_index + operation_advance) % maximum_operations_per_instruction
|
||||
line_addend = self['line_base'] + (adjusted_opcode % self['line_range'])
|
||||
state.line += line_addend
|
||||
add_entry_new_state(
|
||||
opcode, [line_addend, address_addend, state.op_index])
|
||||
elif opcode == 0:
|
||||
# Extended opcode: start with a zero byte, followed by
|
||||
# instruction size and the instruction itself.
|
||||
inst_len = struct_parse(self.structs.Dwarf_uleb128(''),
|
||||
self.stream)
|
||||
ex_opcode = struct_parse(self.structs.Dwarf_uint8(''),
|
||||
self.stream)
|
||||
|
||||
if ex_opcode == DW_LNE_end_sequence:
|
||||
state.end_sequence = True
|
||||
state.is_stmt = 0
|
||||
add_entry_new_state(ex_opcode, [], is_extended=True)
|
||||
# reset state
|
||||
state = LineState(self.header['default_is_stmt'])
|
||||
elif ex_opcode == DW_LNE_set_address:
|
||||
operand = struct_parse(self.structs.Dwarf_target_addr(''),
|
||||
self.stream)
|
||||
state.address = operand
|
||||
add_entry_old_state(ex_opcode, [operand], is_extended=True)
|
||||
elif ex_opcode == DW_LNE_define_file:
|
||||
operand = struct_parse(
|
||||
self.structs.Dwarf_lineprog_file_entry, self.stream)
|
||||
self['file_entry'].append(operand)
|
||||
add_entry_old_state(ex_opcode, [operand], is_extended=True)
|
||||
elif ex_opcode == DW_LNE_set_discriminator:
|
||||
operand = struct_parse(self.structs.Dwarf_uleb128(''),
|
||||
self.stream)
|
||||
state.discriminator = operand
|
||||
else:
|
||||
# Unknown, but need to roll forward the stream because the
|
||||
# length is specified. Seek forward inst_len - 1 because
|
||||
# we've already read the extended opcode, which takes part
|
||||
# in the length.
|
||||
self.stream.seek(inst_len - 1, os.SEEK_CUR)
|
||||
else: # 0 < opcode < opcode_base
|
||||
# Standard opcode
|
||||
if opcode == DW_LNS_copy:
|
||||
add_entry_new_state(opcode, [])
|
||||
elif opcode == DW_LNS_advance_pc:
|
||||
operand = struct_parse(self.structs.Dwarf_uleb128(''),
|
||||
self.stream)
|
||||
address_addend = (
|
||||
operand * self.header['minimum_instruction_length'])
|
||||
state.address += address_addend
|
||||
add_entry_old_state(opcode, [address_addend])
|
||||
elif opcode == DW_LNS_advance_line:
|
||||
operand = struct_parse(self.structs.Dwarf_sleb128(''),
|
||||
self.stream)
|
||||
state.line += operand
|
||||
elif opcode == DW_LNS_set_file:
|
||||
operand = struct_parse(self.structs.Dwarf_uleb128(''),
|
||||
self.stream)
|
||||
state.file = operand
|
||||
add_entry_old_state(opcode, [operand])
|
||||
elif opcode == DW_LNS_set_column:
|
||||
operand = struct_parse(self.structs.Dwarf_uleb128(''),
|
||||
self.stream)
|
||||
state.column = operand
|
||||
add_entry_old_state(opcode, [operand])
|
||||
elif opcode == DW_LNS_negate_stmt:
|
||||
state.is_stmt = not state.is_stmt
|
||||
add_entry_old_state(opcode, [])
|
||||
elif opcode == DW_LNS_set_basic_block:
|
||||
state.basic_block = True
|
||||
add_entry_old_state(opcode, [])
|
||||
elif opcode == DW_LNS_const_add_pc:
|
||||
adjusted_opcode = 255 - self['opcode_base']
|
||||
address_addend = ((adjusted_opcode // self['line_range']) *
|
||||
self['minimum_instruction_length'])
|
||||
state.address += address_addend
|
||||
add_entry_old_state(opcode, [address_addend])
|
||||
elif opcode == DW_LNS_fixed_advance_pc:
|
||||
operand = struct_parse(self.structs.Dwarf_uint16(''),
|
||||
self.stream)
|
||||
state.address += operand
|
||||
add_entry_old_state(opcode, [operand])
|
||||
elif opcode == DW_LNS_set_prologue_end:
|
||||
state.prologue_end = True
|
||||
add_entry_old_state(opcode, [])
|
||||
elif opcode == DW_LNS_set_epilogue_begin:
|
||||
state.epilogue_begin = True
|
||||
add_entry_old_state(opcode, [])
|
||||
elif opcode == DW_LNS_set_isa:
|
||||
operand = struct_parse(self.structs.Dwarf_uleb128(''),
|
||||
self.stream)
|
||||
state.isa = operand
|
||||
add_entry_old_state(opcode, [operand])
|
||||
else:
|
||||
dwarf_assert(False, 'Invalid standard line program opcode: %s' % (
|
||||
opcode,))
|
||||
offset = self.stream.tell()
|
||||
return entries
|
||||
@@ -1,130 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/locationlists.py
|
||||
#
|
||||
# DWARF location lists section decoding (.debug_loc)
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import os
|
||||
from collections import namedtuple
|
||||
|
||||
from ..common.utils import struct_parse
|
||||
|
||||
LocationExpr = namedtuple('LocationExpr', 'loc_expr')
|
||||
LocationEntry = namedtuple('LocationEntry', 'entry_offset begin_offset end_offset loc_expr')
|
||||
BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address')
|
||||
|
||||
class LocationLists(object):
|
||||
""" A single location list is a Python list consisting of LocationEntry or
|
||||
BaseAddressEntry objects.
|
||||
"""
|
||||
def __init__(self, stream, structs):
|
||||
self.stream = stream
|
||||
self.structs = structs
|
||||
self._max_addr = 2 ** (self.structs.address_size * 8) - 1
|
||||
|
||||
def get_location_list_at_offset(self, offset):
|
||||
""" Get a location list at the given offset in the section.
|
||||
"""
|
||||
self.stream.seek(offset, os.SEEK_SET)
|
||||
return self._parse_location_list_from_stream()
|
||||
|
||||
def iter_location_lists(self):
|
||||
""" Yield all location lists found in the section.
|
||||
"""
|
||||
# Just call _parse_location_list_from_stream until the stream ends
|
||||
self.stream.seek(0, os.SEEK_END)
|
||||
endpos = self.stream.tell()
|
||||
|
||||
self.stream.seek(0, os.SEEK_SET)
|
||||
while self.stream.tell() < endpos:
|
||||
yield self._parse_location_list_from_stream()
|
||||
|
||||
#------ PRIVATE ------#
|
||||
|
||||
def _parse_location_list_from_stream(self):
|
||||
lst = []
|
||||
while True:
|
||||
entry_offset = self.stream.tell()
|
||||
begin_offset = struct_parse(
|
||||
self.structs.Dwarf_target_addr(''), self.stream)
|
||||
end_offset = struct_parse(
|
||||
self.structs.Dwarf_target_addr(''), self.stream)
|
||||
if begin_offset == 0 and end_offset == 0:
|
||||
# End of list - we're done.
|
||||
break
|
||||
elif begin_offset == self._max_addr:
|
||||
# Base address selection entry
|
||||
lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset))
|
||||
else:
|
||||
# Location list entry
|
||||
expr_len = struct_parse(
|
||||
self.structs.Dwarf_uint16(''), self.stream)
|
||||
loc_expr = [struct_parse(self.structs.Dwarf_uint8(''),
|
||||
self.stream)
|
||||
for i in range(expr_len)]
|
||||
lst.append(LocationEntry(
|
||||
entry_offset=entry_offset,
|
||||
begin_offset=begin_offset,
|
||||
end_offset=end_offset,
|
||||
loc_expr=loc_expr))
|
||||
return lst
|
||||
|
||||
class LocationParser(object):
|
||||
""" A parser for location information in DIEs.
|
||||
Handles both location information contained within the attribute
|
||||
itself (represented as a LocationExpr object) and references to
|
||||
location lists in the .debug_loc section (represented as a
|
||||
list).
|
||||
"""
|
||||
def __init__(self, location_lists):
|
||||
self.location_lists = location_lists
|
||||
|
||||
@staticmethod
|
||||
def attribute_has_location(attr, dwarf_version):
|
||||
""" Checks if a DIE attribute contains location information.
|
||||
"""
|
||||
return (LocationParser._attribute_is_loclistptr_class(attr) and
|
||||
(LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
|
||||
LocationParser._attribute_has_loc_list(attr, dwarf_version)))
|
||||
|
||||
def parse_from_attribute(self, attr, dwarf_version):
|
||||
""" Parses a DIE attribute and returns either a LocationExpr or
|
||||
a list.
|
||||
"""
|
||||
if self.attribute_has_location(attr, dwarf_version):
|
||||
if self._attribute_has_loc_expr(attr, dwarf_version):
|
||||
return LocationExpr(attr.value)
|
||||
elif self._attribute_has_loc_list(attr, dwarf_version):
|
||||
return self.location_lists.get_location_list_at_offset(
|
||||
attr.value)
|
||||
else:
|
||||
raise ValueError("Attribute does not have location information")
|
||||
|
||||
#------ PRIVATE ------#
|
||||
|
||||
@staticmethod
|
||||
def _attribute_has_loc_expr(attr, dwarf_version):
|
||||
return ((dwarf_version < 4 and attr.form.startswith('DW_FORM_block') and
|
||||
not attr.name == 'DW_AT_const_value') or
|
||||
attr.form == 'DW_FORM_exprloc')
|
||||
|
||||
@staticmethod
|
||||
def _attribute_has_loc_list(attr, dwarf_version):
|
||||
return ((dwarf_version < 4 and
|
||||
attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and
|
||||
not attr.name == 'DW_AT_const_value') or
|
||||
attr.form == 'DW_FORM_sec_offset')
|
||||
|
||||
@staticmethod
|
||||
def _attribute_is_loclistptr_class(attr):
|
||||
return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length',
|
||||
'DW_AT_const_value', 'DW_AT_return_addr',
|
||||
'DW_AT_data_member_location',
|
||||
'DW_AT_frame_base', 'DW_AT_segment',
|
||||
'DW_AT_static_link', 'DW_AT_use_location',
|
||||
'DW_AT_vtable_elem_location',
|
||||
'DW_AT_GNU_call_site_value',
|
||||
'DW_AT_GNU_call_site_target',
|
||||
'DW_AT_GNU_call_site_data_value'))
|
||||
@@ -1,198 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/namelut.py
|
||||
#
|
||||
# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
|
||||
#
|
||||
# Vijay Ramasami (rvijayc@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import os
|
||||
import collections
|
||||
from collections import OrderedDict
|
||||
from ..common.utils import struct_parse
|
||||
from ..common.py3compat import Mapping
|
||||
from bisect import bisect_right
|
||||
import math
|
||||
from ..construct import CString, Struct, If
|
||||
|
||||
NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')
|
||||
|
||||
class NameLUT(Mapping):
|
||||
"""
|
||||
A "Name LUT" holds any of the tables specified by .debug_pubtypes or
|
||||
.debug_pubnames sections. This is basically a dictionary where the key is
|
||||
the symbol name (either a public variable, function or a type), and the
|
||||
value is the tuple (cu_offset, die_offset) corresponding to the variable.
|
||||
The die_offset is an absolute offset (meaning, it can be used to search the
|
||||
CU by iterating until a match is obtained).
|
||||
|
||||
An ordered dictionary is used to preserve the CU order (i.e, items are
|
||||
stored on a per-CU basis (as it was originally in the .debug_* section).
|
||||
|
||||
Usage:
|
||||
|
||||
The NameLUT walks and talks like a dictionary and hence it can be used as
|
||||
such. Some examples below:
|
||||
|
||||
# get the pubnames (a NameLUT from DWARF info).
|
||||
pubnames = dwarf_info.get_pubnames()
|
||||
|
||||
# lookup a variable.
|
||||
entry1 = pubnames["var_name1"]
|
||||
entry2 = pubnames.get("var_name2", default=<default_var>)
|
||||
print(entry2.cu_ofs)
|
||||
...
|
||||
|
||||
# iterate over items.
|
||||
for (name, entry) in pubnames.items():
|
||||
# do stuff with name, entry.cu_ofs, entry.die_ofs
|
||||
|
||||
# iterate over items on a per-CU basis.
|
||||
import itertools
|
||||
for cu_ofs, item_list in itertools.groupby(pubnames.items(),
|
||||
key = lambda x: x[1].cu_ofs):
|
||||
# items are now grouped by cu_ofs.
|
||||
# item_list is an iterator yeilding NameLUTEntry'ies belonging
|
||||
# to cu_ofs.
|
||||
# We can parse the CU at cu_offset and use the parsed CU results
|
||||
# to parse the pubname DIEs in the CU listed by item_list.
|
||||
for item in item_list:
|
||||
# work with item which is part of the CU with cu_ofs.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, stream, size, structs):
|
||||
|
||||
self._stream = stream
|
||||
self._size = size
|
||||
self._structs = structs
|
||||
# entries are lazily loaded on demand.
|
||||
self._entries = None
|
||||
# CU headers (for readelf).
|
||||
self._cu_headers = None
|
||||
|
||||
def get_entries(self):
|
||||
"""
|
||||
Returns the parsed NameLUT entries. The returned object is a dictionary
|
||||
with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
|
||||
the value.
|
||||
|
||||
This is useful when dealing with very large ELF files with millions of
|
||||
entries. The returned entries can be pickled to a file and restored by
|
||||
calling set_entries on subsequent loads.
|
||||
"""
|
||||
if self._entries is None:
|
||||
self._entries, self._cu_headers = self._get_entries()
|
||||
return self._entries
|
||||
|
||||
def set_entries(self, entries, cu_headers):
|
||||
"""
|
||||
Set the NameLUT entries from an external source. The input is a
|
||||
dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
|
||||
die_ofs) as the value.
|
||||
|
||||
This option is useful when dealing with very large ELF files with
|
||||
millions of entries. The entries can be parsed once and pickled to a
|
||||
file and can be restored via this function on subsequent loads.
|
||||
"""
|
||||
self._entries = entries
|
||||
self._cu_headers = cu_headers
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
Returns the number of entries in the NameLUT.
|
||||
"""
|
||||
if self._entries is None:
|
||||
self._entries, self._cu_headers = self._get_entries()
|
||||
return len(self._entries)
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""
|
||||
Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
|
||||
to the given symbol name.
|
||||
"""
|
||||
if self._entries is None:
|
||||
self._entries, self._cu_headers = self._get_entries()
|
||||
return self._entries.get(name)
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
Returns an iterator to the NameLUT dictionary.
|
||||
"""
|
||||
if self._entries is None:
|
||||
self._entries, self._cu_headers = self._get_entries()
|
||||
return iter(self._entries)
|
||||
|
||||
def items(self):
|
||||
"""
|
||||
Returns the NameLUT dictionary items.
|
||||
"""
|
||||
if self._entries is None:
|
||||
self._entries, self._cu_headers = self._get_entries()
|
||||
return self._entries.items()
|
||||
|
||||
def get(self, name, default=None):
|
||||
"""
|
||||
Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
|
||||
None if the symbol does not exist in the corresponding section.
|
||||
"""
|
||||
if self._entries is None:
|
||||
self._entries, self._cu_headers = self._get_entries()
|
||||
return self._entries.get(name, default)
|
||||
|
||||
def get_cu_headers(self):
|
||||
"""
|
||||
Returns all CU headers. Mainly required for readelf.
|
||||
"""
|
||||
if self._cu_headers is None:
|
||||
self._entries, self._cu_headers = self._get_entries()
|
||||
|
||||
return self._cu_headers
|
||||
|
||||
def _get_entries(self):
|
||||
"""
|
||||
Parse the (name, cu_ofs, die_ofs) information from this section and
|
||||
store as a dictionary.
|
||||
"""
|
||||
|
||||
self._stream.seek(0)
|
||||
entries = OrderedDict()
|
||||
cu_headers = []
|
||||
offset = 0
|
||||
# According to 6.1.1. of DWARFv4, each set of names is terminated by
|
||||
# an offset field containing zero (and no following string). Because
|
||||
# of sequential parsing, every next entry may be that terminator.
|
||||
# So, field "name" is conditional.
|
||||
entry_struct = Struct("Dwarf_offset_name_pair",
|
||||
self._structs.Dwarf_offset('die_ofs'),
|
||||
If(lambda ctx: ctx['die_ofs'], CString('name')))
|
||||
|
||||
# each run of this loop will fetch one CU worth of entries.
|
||||
while offset < self._size:
|
||||
|
||||
# read the header for this CU.
|
||||
namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
|
||||
self._stream, offset)
|
||||
cu_headers.append(namelut_hdr)
|
||||
# compute the next offset.
|
||||
offset = (offset + namelut_hdr.unit_length +
|
||||
self._structs.initial_length_field_size())
|
||||
|
||||
# before inner loop, latch data that will be used in the inner
|
||||
# loop to avoid attribute access and other computation.
|
||||
hdr_cu_ofs = namelut_hdr.debug_info_offset
|
||||
|
||||
# while die_ofs of the entry is non-zero (which indicates the end) ...
|
||||
while True:
|
||||
entry = struct_parse(entry_struct, self._stream)
|
||||
|
||||
# if it is zero, this is the terminating record.
|
||||
if entry.die_ofs == 0:
|
||||
break
|
||||
# add this entry to the look-up dictionary.
|
||||
entries[entry.name.decode('utf-8')] = NameLUTEntry(
|
||||
cu_ofs = hdr_cu_ofs,
|
||||
die_ofs = hdr_cu_ofs + entry.die_ofs)
|
||||
|
||||
# return the entries parsed so far.
|
||||
return (entries, cu_headers)
|
||||
@@ -1,65 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/ranges.py
|
||||
#
|
||||
# DWARF ranges section decoding (.debug_ranges)
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import os
|
||||
from collections import namedtuple
|
||||
|
||||
from ..common.utils import struct_parse
|
||||
|
||||
|
||||
RangeEntry = namedtuple('RangeEntry', 'begin_offset end_offset')
|
||||
BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address')
|
||||
|
||||
|
||||
class RangeLists(object):
|
||||
""" A single range list is a Python list consisting of RangeEntry or
|
||||
BaseAddressEntry objects.
|
||||
"""
|
||||
def __init__(self, stream, structs):
|
||||
self.stream = stream
|
||||
self.structs = structs
|
||||
self._max_addr = 2 ** (self.structs.address_size * 8) - 1
|
||||
|
||||
def get_range_list_at_offset(self, offset):
|
||||
""" Get a range list at the given offset in the section.
|
||||
"""
|
||||
self.stream.seek(offset, os.SEEK_SET)
|
||||
return self._parse_range_list_from_stream()
|
||||
|
||||
def iter_range_lists(self):
|
||||
""" Yield all range lists found in the section.
|
||||
"""
|
||||
# Just call _parse_range_list_from_stream until the stream ends
|
||||
self.stream.seek(0, os.SEEK_END)
|
||||
endpos = self.stream.tell()
|
||||
|
||||
self.stream.seek(0, os.SEEK_SET)
|
||||
while self.stream.tell() < endpos:
|
||||
yield self._parse_range_list_from_stream()
|
||||
|
||||
#------ PRIVATE ------#
|
||||
|
||||
def _parse_range_list_from_stream(self):
|
||||
lst = []
|
||||
while True:
|
||||
begin_offset = struct_parse(
|
||||
self.structs.Dwarf_target_addr(''), self.stream)
|
||||
end_offset = struct_parse(
|
||||
self.structs.Dwarf_target_addr(''), self.stream)
|
||||
if begin_offset == 0 and end_offset == 0:
|
||||
# End of list - we're done.
|
||||
break
|
||||
elif begin_offset == self._max_addr:
|
||||
# Base address selection entry
|
||||
lst.append(BaseAddressEntry(base_address=end_offset))
|
||||
else:
|
||||
# Range entry
|
||||
lst.append(RangeEntry(
|
||||
begin_offset=begin_offset,
|
||||
end_offset=end_offset))
|
||||
return lst
|
||||
@@ -1,354 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: dwarf/structs.py
|
||||
#
|
||||
# Encapsulation of Construct structs for parsing DWARF, adjusted for correct
|
||||
# endianness and word-size.
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..construct import (
|
||||
UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
|
||||
SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
|
||||
Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
|
||||
CString, Embed, StaticField, IfThenElse
|
||||
)
|
||||
from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
|
||||
from .enums import *
|
||||
|
||||
|
||||
class DWARFStructs(object):
|
||||
""" Exposes Construct structs suitable for parsing information from DWARF
|
||||
sections. Each compile unit in DWARF info can have its own structs
|
||||
object. Keep in mind that these structs have to be given a name (by
|
||||
calling them with a name) before being used for parsing (like other
|
||||
Construct structs). Those that should be used without a name are marked
|
||||
by (+).
|
||||
|
||||
Accessible attributes (mostly as described in chapter 7 of the DWARF
|
||||
spec v3):
|
||||
|
||||
Dwarf_[u]int{8,16,32,64):
|
||||
Data chunks of the common sizes
|
||||
|
||||
Dwarf_offset:
|
||||
32-bit or 64-bit word, depending on dwarf_format
|
||||
|
||||
Dwarf_length:
|
||||
32-bit or 64-bit word, depending on dwarf_format
|
||||
|
||||
Dwarf_target_addr:
|
||||
32-bit or 64-bit word, depending on address size
|
||||
|
||||
Dwarf_initial_length:
|
||||
"Initial length field" encoding
|
||||
section 7.4
|
||||
|
||||
Dwarf_{u,s}leb128:
|
||||
ULEB128 and SLEB128 variable-length encoding
|
||||
|
||||
Dwarf_CU_header (+):
|
||||
Compilation unit header
|
||||
|
||||
Dwarf_abbrev_declaration (+):
|
||||
Abbreviation table declaration - doesn't include the initial
|
||||
code, only the contents.
|
||||
|
||||
Dwarf_dw_form (+):
|
||||
A dictionary mapping 'DW_FORM_*' keys into construct Structs
|
||||
that parse such forms. These Structs have already been given
|
||||
dummy names.
|
||||
|
||||
Dwarf_lineprog_header (+):
|
||||
Line program header
|
||||
|
||||
Dwarf_lineprog_file_entry (+):
|
||||
A single file entry in a line program header or instruction
|
||||
|
||||
Dwarf_CIE_header (+):
|
||||
A call-frame CIE
|
||||
|
||||
Dwarf_FDE_header (+):
|
||||
A call-frame FDE
|
||||
|
||||
See also the documentation of public methods.
|
||||
"""
|
||||
def __init__(self,
|
||||
little_endian, dwarf_format, address_size, dwarf_version=2):
|
||||
""" dwarf_version:
|
||||
Numeric DWARF version
|
||||
|
||||
little_endian:
|
||||
True if the file is little endian, False if big
|
||||
|
||||
dwarf_format:
|
||||
DWARF Format: 32 or 64-bit (see spec section 7.4)
|
||||
|
||||
address_size:
|
||||
Target machine address size, in bytes (4 or 8). (See spec
|
||||
section 7.5.1)
|
||||
"""
|
||||
assert dwarf_format == 32 or dwarf_format == 64
|
||||
assert address_size == 8 or address_size == 4, str(address_size)
|
||||
self.little_endian = little_endian
|
||||
self.dwarf_format = dwarf_format
|
||||
self.address_size = address_size
|
||||
self.dwarf_version = dwarf_version
|
||||
self._create_structs()
|
||||
|
||||
def initial_length_field_size(self):
|
||||
""" Size of an initial length field.
|
||||
"""
|
||||
return 4 if self.dwarf_format == 32 else 12
|
||||
|
||||
def _create_structs(self):
|
||||
if self.little_endian:
|
||||
self.Dwarf_uint8 = ULInt8
|
||||
self.Dwarf_uint16 = ULInt16
|
||||
self.Dwarf_uint32 = ULInt32
|
||||
self.Dwarf_uint64 = ULInt64
|
||||
self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
|
||||
self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
|
||||
self.Dwarf_target_addr = (
|
||||
ULInt32 if self.address_size == 4 else ULInt64)
|
||||
self.Dwarf_int8 = SLInt8
|
||||
self.Dwarf_int16 = SLInt16
|
||||
self.Dwarf_int32 = SLInt32
|
||||
self.Dwarf_int64 = SLInt64
|
||||
else:
|
||||
self.Dwarf_uint8 = UBInt8
|
||||
self.Dwarf_uint16 = UBInt16
|
||||
self.Dwarf_uint32 = UBInt32
|
||||
self.Dwarf_uint64 = UBInt64
|
||||
self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
|
||||
self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
|
||||
self.Dwarf_target_addr = (
|
||||
UBInt32 if self.address_size == 4 else UBInt64)
|
||||
self.Dwarf_int8 = SBInt8
|
||||
self.Dwarf_int16 = SBInt16
|
||||
self.Dwarf_int32 = SBInt32
|
||||
self.Dwarf_int64 = SBInt64
|
||||
|
||||
self._create_initial_length()
|
||||
self._create_leb128()
|
||||
self._create_cu_header()
|
||||
self._create_abbrev_declaration()
|
||||
self._create_dw_form()
|
||||
self._create_lineprog_header()
|
||||
self._create_callframe_entry_headers()
|
||||
self._create_aranges_header()
|
||||
self._create_nameLUT_header()
|
||||
self._create_string_offsets_table_header()
|
||||
self._create_address_table_header()
|
||||
|
||||
def _create_initial_length(self):
|
||||
def _InitialLength(name):
|
||||
# Adapts a Struct that parses forward a full initial length field.
|
||||
# Only if the first word is the continuation value, the second
|
||||
# word is parsed from the stream.
|
||||
return _InitialLengthAdapter(
|
||||
Struct(name,
|
||||
self.Dwarf_uint32('first'),
|
||||
If(lambda ctx: ctx.first == 0xFFFFFFFF,
|
||||
self.Dwarf_uint64('second'),
|
||||
elsevalue=None)))
|
||||
self.Dwarf_initial_length = _InitialLength
|
||||
|
||||
def _create_leb128(self):
|
||||
self.Dwarf_uleb128 = ULEB128
|
||||
self.Dwarf_sleb128 = SLEB128
|
||||
|
||||
def _create_cu_header(self):
|
||||
self.Dwarf_CU_header = Struct('Dwarf_CU_header',
|
||||
self.Dwarf_initial_length('unit_length'),
|
||||
self.Dwarf_uint16('version'),
|
||||
# DWARFv5 reverses the order of address_size and debug_abbrev_offset.
|
||||
IfThenElse('', lambda ctx: ctx['version'] >= 5,
|
||||
Embed(Struct('',
|
||||
self.Dwarf_uint8('unit_type'),
|
||||
self.Dwarf_uint8('address_size'),
|
||||
self.Dwarf_offset('debug_abbrev_offset'))),
|
||||
Embed(Struct('',
|
||||
self.Dwarf_offset('debug_abbrev_offset'),
|
||||
self.Dwarf_uint8('address_size'))),
|
||||
))
|
||||
|
||||
def _create_abbrev_declaration(self):
|
||||
self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
|
||||
Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
|
||||
Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
|
||||
RepeatUntilExcluding(
|
||||
lambda obj, ctx:
|
||||
obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
|
||||
Struct('attr_spec',
|
||||
Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
|
||||
Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM))))
|
||||
|
||||
def _create_dw_form(self):
|
||||
self.Dwarf_dw_form = dict(
|
||||
DW_FORM_addr=self.Dwarf_target_addr(''),
|
||||
DW_FORM_addrx=self.Dwarf_uleb128(''),
|
||||
DW_FORM_addrx1=self.Dwarf_uint8(''),
|
||||
DW_FORM_addrx2=self.Dwarf_uint16(''),
|
||||
# DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
|
||||
DW_FORM_addrx4=self.Dwarf_uint32(''),
|
||||
|
||||
DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
|
||||
DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
|
||||
DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
|
||||
DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
|
||||
|
||||
# All DW_FORM_data<n> forms are assumed to be unsigned
|
||||
DW_FORM_data1=self.Dwarf_uint8(''),
|
||||
DW_FORM_data2=self.Dwarf_uint16(''),
|
||||
DW_FORM_data4=self.Dwarf_uint32(''),
|
||||
DW_FORM_data8=self.Dwarf_uint64(''),
|
||||
DW_FORM_sdata=self.Dwarf_sleb128(''),
|
||||
DW_FORM_udata=self.Dwarf_uleb128(''),
|
||||
|
||||
DW_FORM_string=CString(''),
|
||||
DW_FORM_strp=self.Dwarf_offset(''),
|
||||
DW_FORM_strx1=self.Dwarf_uint8(''),
|
||||
DW_FORM_strx2=self.Dwarf_uint16(''),
|
||||
# DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
|
||||
DW_FORM_strx4=self.Dwarf_uint64(''),
|
||||
DW_FORM_flag=self.Dwarf_uint8(''),
|
||||
|
||||
DW_FORM_ref=self.Dwarf_uint32(''),
|
||||
DW_FORM_ref1=self.Dwarf_uint8(''),
|
||||
DW_FORM_ref2=self.Dwarf_uint16(''),
|
||||
DW_FORM_ref4=self.Dwarf_uint32(''),
|
||||
DW_FORM_ref8=self.Dwarf_uint64(''),
|
||||
DW_FORM_ref_udata=self.Dwarf_uleb128(''),
|
||||
DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
|
||||
|
||||
DW_FORM_indirect=self.Dwarf_uleb128(''),
|
||||
|
||||
# New forms in DWARFv4
|
||||
DW_FORM_flag_present = StaticField('', 0),
|
||||
DW_FORM_sec_offset = self.Dwarf_offset(''),
|
||||
DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
|
||||
DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
|
||||
|
||||
DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
|
||||
DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
|
||||
DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
|
||||
)
|
||||
|
||||
def _create_aranges_header(self):
|
||||
self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
|
||||
self.Dwarf_initial_length('unit_length'),
|
||||
self.Dwarf_uint16('version'),
|
||||
self.Dwarf_offset('debug_info_offset'), # a little tbd
|
||||
self.Dwarf_uint8('address_size'),
|
||||
self.Dwarf_uint8('segment_size')
|
||||
)
|
||||
|
||||
def _create_nameLUT_header(self):
|
||||
self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
|
||||
self.Dwarf_initial_length('unit_length'),
|
||||
self.Dwarf_uint16('version'),
|
||||
self.Dwarf_offset('debug_info_offset'),
|
||||
self.Dwarf_length('debug_info_length')
|
||||
)
|
||||
|
||||
def _create_string_offsets_table_header(self):
|
||||
self.Dwarf_string_offsets_table_header = Struct(
|
||||
"Dwarf_string_offets_table_header",
|
||||
self.Dwarf_initial_length('unit_length'),
|
||||
self.Dwarf_uint16('version'),
|
||||
self.Dwarf_uint16('padding'),
|
||||
)
|
||||
|
||||
def _create_address_table_header(self):
|
||||
self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
|
||||
self.Dwarf_initial_length('unit_length'),
|
||||
self.Dwarf_uint16('version'),
|
||||
self.Dwarf_uint8('address_size'),
|
||||
self.Dwarf_uint8('segment_selector_size'),
|
||||
)
|
||||
|
||||
def _create_lineprog_header(self):
|
||||
# A file entry is terminated by a NULL byte, so we don't want to parse
|
||||
# past it. Therefore an If is used.
|
||||
self.Dwarf_lineprog_file_entry = Struct('file_entry',
|
||||
CString('name'),
|
||||
If(lambda ctx: len(ctx.name) != 0,
|
||||
Embed(Struct('',
|
||||
self.Dwarf_uleb128('dir_index'),
|
||||
self.Dwarf_uleb128('mtime'),
|
||||
self.Dwarf_uleb128('length')))))
|
||||
|
||||
self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
|
||||
self.Dwarf_initial_length('unit_length'),
|
||||
self.Dwarf_uint16('version'),
|
||||
self.Dwarf_offset('header_length'),
|
||||
self.Dwarf_uint8('minimum_instruction_length'),
|
||||
If(lambda ctx: ctx['version'] >= 4,
|
||||
self.Dwarf_uint8("maximum_operations_per_instruction"),
|
||||
1),
|
||||
self.Dwarf_uint8('default_is_stmt'),
|
||||
self.Dwarf_int8('line_base'),
|
||||
self.Dwarf_uint8('line_range'),
|
||||
self.Dwarf_uint8('opcode_base'),
|
||||
Array(lambda ctx: ctx['opcode_base'] - 1,
|
||||
self.Dwarf_uint8('standard_opcode_lengths')),
|
||||
RepeatUntilExcluding(
|
||||
lambda obj, ctx: obj == b'',
|
||||
CString('include_directory')),
|
||||
RepeatUntilExcluding(
|
||||
lambda obj, ctx: len(obj.name) == 0,
|
||||
self.Dwarf_lineprog_file_entry),
|
||||
)
|
||||
|
||||
def _create_callframe_entry_headers(self):
|
||||
self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
|
||||
self.Dwarf_initial_length('length'),
|
||||
self.Dwarf_offset('CIE_id'),
|
||||
self.Dwarf_uint8('version'),
|
||||
CString('augmentation'),
|
||||
self.Dwarf_uleb128('code_alignment_factor'),
|
||||
self.Dwarf_sleb128('data_alignment_factor'),
|
||||
self.Dwarf_uleb128('return_address_register'))
|
||||
self.EH_CIE_header = self.Dwarf_CIE_header
|
||||
|
||||
# The CIE header was modified in DWARFv4.
|
||||
if self.dwarf_version == 4:
|
||||
self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
|
||||
self.Dwarf_initial_length('length'),
|
||||
self.Dwarf_offset('CIE_id'),
|
||||
self.Dwarf_uint8('version'),
|
||||
CString('augmentation'),
|
||||
self.Dwarf_uint8('address_size'),
|
||||
self.Dwarf_uint8('segment_size'),
|
||||
self.Dwarf_uleb128('code_alignment_factor'),
|
||||
self.Dwarf_sleb128('data_alignment_factor'),
|
||||
self.Dwarf_uleb128('return_address_register'))
|
||||
|
||||
self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
|
||||
self.Dwarf_initial_length('length'),
|
||||
self.Dwarf_offset('CIE_pointer'),
|
||||
self.Dwarf_target_addr('initial_location'),
|
||||
self.Dwarf_target_addr('address_range'))
|
||||
|
||||
def _make_block_struct(self, length_field):
|
||||
""" Create a struct for DW_FORM_block<size>
|
||||
"""
|
||||
return PrefixedArray(
|
||||
subcon=self.Dwarf_uint8('elem'),
|
||||
length_field=length_field(''))
|
||||
|
||||
|
||||
class _InitialLengthAdapter(Adapter):
|
||||
""" A standard Construct adapter that expects a sub-construct
|
||||
as a struct with one or two values (first, second).
|
||||
"""
|
||||
def _decode(self, obj, context):
|
||||
if obj.first < 0xFFFFFF00:
|
||||
return obj.first
|
||||
else:
|
||||
if obj.first == 0xFFFFFFFF:
|
||||
return obj.second
|
||||
else:
|
||||
raise ConstructError("Failed decoding initial length for %X" % (
|
||||
obj.first))
|
||||
@@ -1 +0,0 @@
|
||||
EHABI_INDEX_ENTRY_SIZE = 8
|
||||
@@ -1,284 +0,0 @@
|
||||
# -------------------------------------------------------------------------------
|
||||
# elftools: ehabi/decoder.py
|
||||
#
|
||||
# Decode ARM exception handler bytecode.
|
||||
#
|
||||
# LeadroyaL (leadroyal@qq.com)
|
||||
# This code is in the public domain
|
||||
# -------------------------------------------------------------------------------
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
class EHABIBytecodeDecoder(object):
|
||||
""" Decoder of a sequence of ARM exception handler abi bytecode.
|
||||
|
||||
Reference:
|
||||
https://github.com/llvm/llvm-project/blob/master/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
|
||||
https://developer.arm.com/documentation/ihi0038/b/
|
||||
|
||||
Accessible attributes:
|
||||
|
||||
mnemonic_array:
|
||||
MnemonicItem array.
|
||||
|
||||
Parameters:
|
||||
|
||||
bytecode_array:
|
||||
Integer array, raw data of bytecode.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, bytecode_array):
|
||||
self._bytecode_array = bytecode_array
|
||||
self._index = None
|
||||
self.mnemonic_array = None
|
||||
self._decode()
|
||||
|
||||
def _decode(self):
|
||||
""" Decode bytecode array, put result into mnemonic_array.
|
||||
"""
|
||||
self._index = 0
|
||||
self.mnemonic_array = []
|
||||
while self._index < len(self._bytecode_array):
|
||||
for mask, value, handler in self.ring:
|
||||
if (self._bytecode_array[self._index] & mask) == value:
|
||||
start_idx = self._index
|
||||
mnemonic = handler(self)
|
||||
end_idx = self._index
|
||||
self.mnemonic_array.append(
|
||||
MnemonicItem(self._bytecode_array[start_idx: end_idx], mnemonic))
|
||||
break
|
||||
|
||||
def _decode_00xxxxxx(self):
|
||||
# SW.startLine() << format("0x%02X ; vsp = vsp + %u\n", Opcode,
|
||||
# ((Opcode & 0x3f) << 2) + 4);
|
||||
opcode = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
return 'vsp = vsp + %u' % (((opcode & 0x3f) << 2) + 4)
|
||||
|
||||
def _decode_01xxxxxx(self):
|
||||
# SW.startLine() << format("0x%02X ; vsp = vsp - %u\n", Opcode,
|
||||
# ((Opcode & 0x3f) << 2) + 4);
|
||||
opcode = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
return 'vsp = vsp - %u' % (((opcode & 0x3f) << 2) + 4)
|
||||
|
||||
gpr_register_names = ("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
|
||||
"r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc")
|
||||
|
||||
def _calculate_range(self, start, count):
|
||||
return ((1 << (count + 1)) - 1) << start
|
||||
|
||||
def _printGPR(self, gpr_mask):
|
||||
hits = [self.gpr_register_names[i] for i in range(32) if gpr_mask & (1 << i) != 0]
|
||||
return '{%s}' % ', '.join(hits)
|
||||
|
||||
def _print_registers(self, vfp_mask, prefix):
|
||||
hits = [prefix + str(i) for i in range(32) if vfp_mask & (1 << i) != 0]
|
||||
return '{%s}' % ', '.join(hits)
|
||||
|
||||
def _decode_1000iiii_iiiiiiii(self):
|
||||
op0 = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
op1 = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
# uint16_t GPRMask = (Opcode1 << 4) | ((Opcode0 & 0x0f) << 12);
|
||||
# SW.startLine()
|
||||
# << format("0x%02X 0x%02X ; %s",
|
||||
# Opcode0, Opcode1, GPRMask ? "pop " : "refuse to unwind");
|
||||
# if (GPRMask)
|
||||
# PrintGPR(GPRMask);
|
||||
gpr_mask = (op1 << 4) | ((op0 & 0x0f) << 12)
|
||||
if gpr_mask == 0:
|
||||
return 'refuse to unwind'
|
||||
else:
|
||||
return 'pop %s' % self._printGPR(gpr_mask)
|
||||
|
||||
def _decode_10011101(self):
|
||||
self._index += 1
|
||||
return 'reserved (ARM MOVrr)'
|
||||
|
||||
def _decode_10011111(self):
|
||||
self._index += 1
|
||||
return 'reserved (WiMMX MOVrr)'
|
||||
|
||||
def _decode_1001nnnn(self):
|
||||
# SW.startLine() << format("0x%02X ; vsp = r%u\n", Opcode, (Opcode & 0x0f));
|
||||
opcode = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
return 'vsp = r%u' % (opcode & 0x0f)
|
||||
|
||||
def _decode_10100nnn(self):
|
||||
# SW.startLine() << format("0x%02X ; pop ", Opcode);
|
||||
# PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4));
|
||||
opcode = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07))
|
||||
|
||||
def _decode_10101nnn(self):
|
||||
# SW.startLine() << format("0x%02X ; pop ", Opcode);
|
||||
# PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4) | (1 << 14));
|
||||
opcode = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07) | (1 << 14))
|
||||
|
||||
def _decode_10110000(self):
|
||||
# SW.startLine() << format("0x%02X ; finish\n", Opcode);
|
||||
self._index += 1
|
||||
return 'finish'
|
||||
|
||||
def _decode_10110001_0000iiii(self):
|
||||
# SW.startLine()
|
||||
# << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1,
|
||||
# ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop ");
|
||||
# if (((Opcode1 & 0xf0) == 0x00) && Opcode1)
|
||||
# PrintGPR((Opcode1 & 0x0f));
|
||||
self._index += 1 # skip constant byte
|
||||
op1 = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
if (op1 & 0xf0) != 0 or op1 == 0x00:
|
||||
return 'spare'
|
||||
else:
|
||||
return 'pop %s' % self._printGPR((op1 & 0x0f))
|
||||
|
||||
def _decode_10110010_uleb128(self):
|
||||
# SmallVector<uint8_t, 4> ULEB;
|
||||
# do { ULEB.push_back(Opcodes[OI ^ 3]); } while (Opcodes[OI++ ^ 3] & 0x80);
|
||||
# uint64_t Value = 0;
|
||||
# for (unsigned BI = 0, BE = ULEB.size(); BI != BE; ++BI)
|
||||
# Value = Value | ((ULEB[BI] & 0x7f) << (7 * BI));
|
||||
# OS << format("; vsp = vsp + %" PRIu64 "\n", 0x204 + (Value << 2));
|
||||
self._index += 1 # skip constant byte
|
||||
uleb_buffer = [self._bytecode_array[self._index]]
|
||||
self._index += 1
|
||||
while self._bytecode_array[self._index] & 0x80 == 0:
|
||||
uleb_buffer.append(self._bytecode_array[self._index])
|
||||
self._index += 1
|
||||
value = 0
|
||||
for b in reversed(uleb_buffer):
|
||||
value = (value << 7) + (b & 0x7F)
|
||||
return 'vsp = vsp + %u' % (0x204 + (value << 2))
|
||||
|
||||
def _decode_10110011_sssscccc(self):
|
||||
# these two decoders are equal
|
||||
return self._decode_11001001_sssscccc()
|
||||
|
||||
def _decode_101101nn(self):
|
||||
return self._spare()
|
||||
|
||||
def _decode_10111nnn(self):
|
||||
# SW.startLine() << format("0x%02X ; pop ", Opcode);
|
||||
# PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 8), "d");
|
||||
opcode = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
return 'pop %s' % self._print_registers(self._calculate_range(8, opcode & 0x07), "d")
|
||||
|
||||
def _decode_11000110_sssscccc(self):
|
||||
# SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
|
||||
# uint8_t Start = ((Opcode1 & 0xf0) >> 4);
|
||||
# uint8_t Count = ((Opcode1 & 0x0f) >> 0);
|
||||
# PrintRegisters((((1 << (Count + 1)) - 1) << Start), "wR");
|
||||
self._index += 1 # skip constant byte
|
||||
op1 = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
start = ((op1 & 0xf0) >> 4)
|
||||
count = ((op1 & 0x0f) >> 0)
|
||||
return 'pop %s' % self._print_registers(self._calculate_range(start, count), "wR")
|
||||
|
||||
def _decode_11000111_0000iiii(self):
|
||||
# SW.startLine()
|
||||
# << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1,
|
||||
# ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop ");
|
||||
# if ((Opcode1 & 0xf0) == 0x00 && Opcode1)
|
||||
# PrintRegisters(Opcode1 & 0x0f, "wCGR");
|
||||
self._index += 1 # skip constant byte
|
||||
op1 = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
if (op1 & 0xf0) != 0 or op1 == 0x00:
|
||||
return 'spare'
|
||||
else:
|
||||
return 'pop %s' % self._print_registers(op1 & 0x0f, "wCGR")
|
||||
|
||||
def _decode_11001000_sssscccc(self):
|
||||
# SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
|
||||
# uint8_t Start = 16 + ((Opcode1 & 0xf0) >> 4);
|
||||
# uint8_t Count = ((Opcode1 & 0x0f) >> 0);
|
||||
# PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d");
|
||||
self._index += 1 # skip constant byte
|
||||
op1 = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
start = 16 + ((op1 & 0xf0) >> 4)
|
||||
count = ((op1 & 0x0f) >> 0)
|
||||
return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d")
|
||||
|
||||
def _decode_11001001_sssscccc(self):
|
||||
# SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
|
||||
# uint8_t Start = ((Opcode1 & 0xf0) >> 4);
|
||||
# uint8_t Count = ((Opcode1 & 0x0f) >> 0);
|
||||
# PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d");
|
||||
self._index += 1 # skip constant byte
|
||||
op1 = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
start = ((op1 & 0xf0) >> 4)
|
||||
count = ((op1 & 0x0f) >> 0)
|
||||
return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d")
|
||||
|
||||
def _decode_11001yyy(self):
|
||||
return self._spare()
|
||||
|
||||
def _decode_11000nnn(self):
|
||||
# SW.startLine() << format("0x%02X ; pop ", Opcode);
|
||||
# PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 10), "wR");
|
||||
opcode = self._bytecode_array[self._index]
|
||||
self._index += 1
|
||||
return 'pop %s' % self._print_registers(self._calculate_range(10, opcode & 0x07), "wR")
|
||||
|
||||
def _decode_11010nnn(self):
|
||||
# these two decoders are equal
|
||||
return self._decode_10111nnn()
|
||||
|
||||
def _decode_11xxxyyy(self):
|
||||
return self._spare()
|
||||
|
||||
def _spare(self):
|
||||
self._index += 1
|
||||
return 'spare'
|
||||
|
||||
_DECODE_RECIPE_TYPE = namedtuple('_DECODE_RECIPE_TYPE', 'mask value handler')
|
||||
|
||||
ring = (
|
||||
_DECODE_RECIPE_TYPE(mask=0xc0, value=0x00, handler=_decode_00xxxxxx),
|
||||
_DECODE_RECIPE_TYPE(mask=0xc0, value=0x40, handler=_decode_01xxxxxx),
|
||||
_DECODE_RECIPE_TYPE(mask=0xf0, value=0x80, handler=_decode_1000iiii_iiiiiiii),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0x9d, handler=_decode_10011101),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0x9f, handler=_decode_10011111),
|
||||
_DECODE_RECIPE_TYPE(mask=0xf0, value=0x90, handler=_decode_1001nnnn),
|
||||
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xa0, handler=_decode_10100nnn),
|
||||
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xa8, handler=_decode_10101nnn),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb0, handler=_decode_10110000),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb1, handler=_decode_10110001_0000iiii),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb2, handler=_decode_10110010_uleb128),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb3, handler=_decode_10110011_sssscccc),
|
||||
_DECODE_RECIPE_TYPE(mask=0xfc, value=0xb4, handler=_decode_101101nn),
|
||||
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xb8, handler=_decode_10111nnn),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc6, handler=_decode_11000110_sssscccc),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc7, handler=_decode_11000111_0000iiii),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc8, handler=_decode_11001000_sssscccc),
|
||||
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc9, handler=_decode_11001001_sssscccc),
|
||||
_DECODE_RECIPE_TYPE(mask=0xc8, value=0xc8, handler=_decode_11001yyy),
|
||||
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xc0, handler=_decode_11000nnn),
|
||||
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xd0, handler=_decode_11010nnn),
|
||||
_DECODE_RECIPE_TYPE(mask=0xc0, value=0xc0, handler=_decode_11xxxyyy),
|
||||
)
|
||||
|
||||
|
||||
class MnemonicItem(object):
|
||||
""" Single mnemonic item.
|
||||
"""
|
||||
|
||||
def __init__(self, bytecode, mnemonic):
|
||||
self.bytecode = bytecode
|
||||
self.mnemonic = mnemonic
|
||||
|
||||
def __repr__(self):
|
||||
return '%s ; %s' % (' '.join(['0x%02x' % x for x in self.bytecode]), self.mnemonic)
|
||||
@@ -1,209 +0,0 @@
|
||||
# -------------------------------------------------------------------------------
|
||||
# elftools: ehabi/ehabiinfo.py
|
||||
#
|
||||
# Decoder for ARM exception handler bytecode.
|
||||
#
|
||||
# LeadroyaL (leadroyal@qq.com)
|
||||
# This code is in the public domain
|
||||
# -------------------------------------------------------------------------------
|
||||
|
||||
from ..common.utils import struct_parse
|
||||
|
||||
from .decoder import EHABIBytecodeDecoder
|
||||
from .constants import EHABI_INDEX_ENTRY_SIZE
|
||||
from .structs import EHABIStructs
|
||||
|
||||
|
||||
class EHABIInfo(object):
|
||||
""" ARM exception handler abi information class.
|
||||
|
||||
Parameters:
|
||||
|
||||
arm_idx_section:
|
||||
elf.sections.Section object, section which type is SHT_ARM_EXIDX.
|
||||
|
||||
little_endian:
|
||||
bool, endianness of elf file.
|
||||
"""
|
||||
|
||||
def __init__(self, arm_idx_section, little_endian):
|
||||
self._arm_idx_section = arm_idx_section
|
||||
self._struct = EHABIStructs(little_endian)
|
||||
self._num_entry = None
|
||||
|
||||
def section_name(self):
|
||||
return self._arm_idx_section.name
|
||||
|
||||
def section_offset(self):
|
||||
return self._arm_idx_section['sh_offset']
|
||||
|
||||
def num_entry(self):
|
||||
""" Number of exception handler entry in the section.
|
||||
"""
|
||||
if self._num_entry is None:
|
||||
self._num_entry = self._arm_idx_section['sh_size'] // EHABI_INDEX_ENTRY_SIZE
|
||||
return self._num_entry
|
||||
|
||||
def get_entry(self, n):
|
||||
""" Get the exception handler entry at index #n. (EHABIEntry object or a subclass)
|
||||
"""
|
||||
if n >= self.num_entry():
|
||||
raise IndexError('Invalid entry %d/%d' % (n, self._num_entry))
|
||||
eh_index_entry_offset = self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE
|
||||
eh_index_data = struct_parse(self._struct.EH_index_struct, self._arm_idx_section.stream, eh_index_entry_offset)
|
||||
word0, word1 = eh_index_data['word0'], eh_index_data['word1']
|
||||
|
||||
if word0 & 0x80000000 != 0:
|
||||
return CorruptEHABIEntry('Corrupt ARM exception handler table entry: %x' % n)
|
||||
|
||||
function_offset = arm_expand_prel31(word0, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE)
|
||||
|
||||
if word1 == 1:
|
||||
# 0x1 means cannot unwind
|
||||
return CannotUnwindEHABIEntry(function_offset)
|
||||
elif word1 & 0x80000000 == 0:
|
||||
# highest bit is zero, point to .ARM.extab data
|
||||
eh_table_offset = arm_expand_prel31(word1, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + 4)
|
||||
eh_index_data = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream, eh_table_offset)
|
||||
word0 = eh_index_data['word0']
|
||||
if word0 & 0x80000000 == 0:
|
||||
# highest bit is one, generic model
|
||||
return GenericEHABIEntry(function_offset, arm_expand_prel31(word0, eh_table_offset))
|
||||
else:
|
||||
# highest bit is one, arm compact model
|
||||
# highest half must be 0b1000 for compact model
|
||||
if word0 & 0x70000000 != 0:
|
||||
return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n)
|
||||
per_index = (word0 >> 24) & 0x7f
|
||||
if per_index == 0:
|
||||
# arm compact model 0
|
||||
opcode = [(word0 & 0xFF0000) >> 16, (word0 & 0xFF00) >> 8, word0 & 0xFF]
|
||||
return EHABIEntry(function_offset, per_index, opcode)
|
||||
elif per_index == 1 or per_index == 2:
|
||||
# arm compact model 1/2
|
||||
more_word = (word0 >> 16) & 0xff
|
||||
opcode = [(word0 >> 8) & 0xff, (word0 >> 0) & 0xff]
|
||||
self._arm_idx_section.stream.seek(eh_table_offset + 4)
|
||||
for i in range(more_word):
|
||||
r = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream)['word0']
|
||||
opcode.append((r >> 24) & 0xFF)
|
||||
opcode.append((r >> 16) & 0xFF)
|
||||
opcode.append((r >> 8) & 0xFF)
|
||||
opcode.append((r >> 0) & 0xFF)
|
||||
return EHABIEntry(function_offset, per_index, opcode, eh_table_offset=eh_table_offset)
|
||||
else:
|
||||
return CorruptEHABIEntry('Unknown ARM compact model %d at table entry: %x' % (per_index, n))
|
||||
else:
|
||||
# highest bit is one, compact model must be 0
|
||||
if word1 & 0x7f000000 != 0:
|
||||
return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n)
|
||||
opcode = [(word1 & 0xFF0000) >> 16, (word1 & 0xFF00) >> 8, word1 & 0xFF]
|
||||
return EHABIEntry(function_offset, 0, opcode)
|
||||
|
||||
|
||||
class EHABIEntry(object):
|
||||
""" Exception handler abi entry.
|
||||
|
||||
Accessible attributes:
|
||||
|
||||
function_offset:
|
||||
Integer.
|
||||
None if corrupt. (Reference: CorruptEHABIEntry)
|
||||
|
||||
personality:
|
||||
Integer.
|
||||
None if corrupt or unwindable. (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry)
|
||||
0/1/2 for ARM personality compact format.
|
||||
Others for generic personality.
|
||||
|
||||
bytecode_array:
|
||||
Integer array.
|
||||
None if corrupt or unwindable or generic personality.
|
||||
(Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry)
|
||||
|
||||
eh_table_offset:
|
||||
Integer.
|
||||
Only entries who point to .ARM.extab contains this field, otherwise return None.
|
||||
|
||||
unwindable:
|
||||
bool. Whether this function is unwindable.
|
||||
|
||||
corrupt:
|
||||
bool. Whether this entry is corrupt.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
function_offset,
|
||||
personality,
|
||||
bytecode_array,
|
||||
eh_table_offset=None,
|
||||
unwindable=True,
|
||||
corrupt=False):
|
||||
self.function_offset = function_offset
|
||||
self.personality = personality
|
||||
self.bytecode_array = bytecode_array
|
||||
self.eh_table_offset = eh_table_offset
|
||||
self.unwindable = unwindable
|
||||
self.corrupt = corrupt
|
||||
|
||||
def mnmemonic_array(self):
|
||||
if self.bytecode_array:
|
||||
return EHABIBytecodeDecoder(self.bytecode_array).mnemonic_array
|
||||
else:
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
return "<EHABIEntry function_offset=0x%x, personality=%d, %sbytecode=%s>" % (
|
||||
self.function_offset,
|
||||
self.personality,
|
||||
"eh_table_offset=0x%x, " % self.eh_table_offset if self.eh_table_offset else "",
|
||||
self.bytecode_array)
|
||||
|
||||
|
||||
class CorruptEHABIEntry(EHABIEntry):
|
||||
""" This entry is corrupt. Attribute #corrupt will be True.
|
||||
"""
|
||||
|
||||
def __init__(self, reason):
|
||||
super(CorruptEHABIEntry, self).__init__(function_offset=None, personality=None, bytecode_array=None,
|
||||
corrupt=True)
|
||||
self.reason = reason
|
||||
|
||||
def __repr__(self):
|
||||
return "<CorruptEHABIEntry reason=%s>" % self.reason
|
||||
|
||||
|
||||
class CannotUnwindEHABIEntry(EHABIEntry):
|
||||
""" This function cannot be unwind. Attribute #unwindable will be False.
|
||||
"""
|
||||
|
||||
def __init__(self, function_offset):
|
||||
super(CannotUnwindEHABIEntry, self).__init__(function_offset, personality=None, bytecode_array=None,
|
||||
unwindable=False)
|
||||
|
||||
def __repr__(self):
|
||||
return "<CannotUnwindEHABIEntry function_offset=0x%x>" % self.function_offset
|
||||
|
||||
|
||||
class GenericEHABIEntry(EHABIEntry):
|
||||
""" This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None.
|
||||
"""
|
||||
|
||||
def __init__(self, function_offset, personality):
|
||||
super(GenericEHABIEntry, self).__init__(function_offset, personality, bytecode_array=None)
|
||||
|
||||
def __repr__(self):
|
||||
return "<GenericEHABIEntry function_offset=0x%x, personality=0x%x>" % (self.function_offset, self.personality)
|
||||
|
||||
|
||||
def arm_expand_prel31(address, place):
|
||||
"""
|
||||
address: uint32
|
||||
place: uint32
|
||||
return: uint64
|
||||
"""
|
||||
location = address & 0x7fffffff
|
||||
if location & 0x04000000:
|
||||
location |= 0xffffffff80000000
|
||||
return location + place & 0xffffffffffffffff
|
||||
@@ -1,47 +0,0 @@
|
||||
# -------------------------------------------------------------------------------
|
||||
# elftools: ehabi/structs.py
|
||||
#
|
||||
# Encapsulation of Construct structs for parsing an EHABI, adjusted for
|
||||
# correct endianness and word-size.
|
||||
#
|
||||
# LeadroyaL (leadroyal@qq.com)
|
||||
# This code is in the public domain
|
||||
# -------------------------------------------------------------------------------
|
||||
|
||||
from ..construct import UBInt32, ULInt32, Struct
|
||||
|
||||
|
||||
class EHABIStructs(object):
|
||||
""" Accessible attributes:
|
||||
|
||||
EH_index_struct:
|
||||
Struct of item in section .ARM.exidx.
|
||||
|
||||
EH_table_struct:
|
||||
Struct of item in section .ARM.extab.
|
||||
"""
|
||||
|
||||
def __init__(self, little_endian):
|
||||
self._little_endian = little_endian
|
||||
self._create_structs()
|
||||
|
||||
def _create_structs(self):
|
||||
if self._little_endian:
|
||||
self.EHABI_uint32 = ULInt32
|
||||
else:
|
||||
self.EHABI_uint32 = UBInt32
|
||||
self._create_exception_handler_index()
|
||||
self._create_exception_handler_table()
|
||||
|
||||
def _create_exception_handler_index(self):
|
||||
self.EH_index_struct = Struct(
|
||||
'EH_index',
|
||||
self.EHABI_uint32('word0'),
|
||||
self.EHABI_uint32('word1')
|
||||
)
|
||||
|
||||
def _create_exception_handler_table(self):
|
||||
self.EH_table_struct = Struct(
|
||||
'EH_table',
|
||||
self.EHABI_uint32('word0'),
|
||||
)
|
||||
@@ -1,151 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/constants.py
|
||||
#
|
||||
# Constants and flags, placed into classes for namespacing
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
class E_FLAGS(object):
|
||||
""" Flag values for the e_flags field of the ELF header
|
||||
"""
|
||||
EF_ARM_EABIMASK=0xFF000000
|
||||
EF_ARM_EABI_VER1=0x01000000
|
||||
EF_ARM_EABI_VER2=0x02000000
|
||||
EF_ARM_EABI_VER3=0x03000000
|
||||
EF_ARM_EABI_VER4=0x04000000
|
||||
EF_ARM_EABI_VER5=0x05000000
|
||||
EF_ARM_GCCMASK=0x00400FFF
|
||||
EF_ARM_RELEXEC=0x01
|
||||
EF_ARM_HASENTRY=0x02
|
||||
EF_ARM_SYMSARESORTED=0x04
|
||||
EF_ARM_DYNSYMSUSESEGIDX=0x8
|
||||
EF_ARM_MAPSYMSFIRST=0x10
|
||||
EF_ARM_LE8=0x00400000
|
||||
EF_ARM_BE8=0x00800000
|
||||
EF_ARM_ABI_FLOAT_SOFT=0x00000200
|
||||
EF_ARM_ABI_FLOAT_HARD=0x00000400
|
||||
|
||||
EF_PPC64_ABI_V0=0
|
||||
EF_PPC64_ABI_V1=1
|
||||
EF_PPC64_ABI_V2=2
|
||||
|
||||
EF_MIPS_NOREORDER=1
|
||||
EF_MIPS_PIC=2
|
||||
EF_MIPS_CPIC=4
|
||||
EF_MIPS_XGOT=8
|
||||
EF_MIPS_64BIT_WHIRL=16
|
||||
EF_MIPS_ABI2=32
|
||||
EF_MIPS_ABI_ON32=64
|
||||
EF_MIPS_32BITMODE = 256
|
||||
EF_MIPS_NAN2008=1024
|
||||
EF_MIPS_ARCH=0xf0000000
|
||||
EF_MIPS_ARCH_1=0x00000000
|
||||
EF_MIPS_ARCH_2=0x10000000
|
||||
EF_MIPS_ARCH_3=0x20000000
|
||||
EF_MIPS_ARCH_4=0x30000000
|
||||
EF_MIPS_ARCH_5=0x40000000
|
||||
EF_MIPS_ARCH_32=0x50000000
|
||||
EF_MIPS_ARCH_64=0x60000000
|
||||
EF_MIPS_ARCH_32R2=0x70000000
|
||||
EF_MIPS_ARCH_64R2=0x80000000
|
||||
|
||||
|
||||
class E_FLAGS_MASKS(object):
|
||||
"""Masks to be used for convenience when working with E_FLAGS
|
||||
|
||||
This is a simplified approach that is also used by GNU binutils
|
||||
readelf
|
||||
"""
|
||||
EFM_MIPS_ABI = 0x0000F000
|
||||
EFM_MIPS_ABI_O32 = 0x00001000
|
||||
EFM_MIPS_ABI_O64 = 0x00002000
|
||||
EFM_MIPS_ABI_EABI32 = 0x00003000
|
||||
EFM_MIPS_ABI_EABI64 = 0x00004000
|
||||
|
||||
|
||||
class SHN_INDICES(object):
|
||||
""" Special section indices
|
||||
"""
|
||||
SHN_UNDEF=0
|
||||
SHN_LORESERVE=0xff00
|
||||
SHN_LOPROC=0xff00
|
||||
SHN_HIPROC=0xff1f
|
||||
SHN_ABS=0xfff1
|
||||
SHN_COMMON=0xfff2
|
||||
SHN_HIRESERVE=0xffff
|
||||
SHN_XINDEX=0xffff
|
||||
|
||||
|
||||
class SH_FLAGS(object):
|
||||
""" Flag values for the sh_flags field of section headers
|
||||
"""
|
||||
SHF_WRITE=0x1
|
||||
SHF_ALLOC=0x2
|
||||
SHF_EXECINSTR=0x4
|
||||
SHF_MERGE=0x10
|
||||
SHF_STRINGS=0x20
|
||||
SHF_INFO_LINK=0x40
|
||||
SHF_LINK_ORDER=0x80
|
||||
SHF_OS_NONCONFORMING=0x100
|
||||
SHF_GROUP=0x200
|
||||
SHF_TLS=0x400
|
||||
SHF_COMPRESSED=0x800
|
||||
SHF_MASKOS=0x0ff00000
|
||||
SHF_EXCLUDE=0x80000000
|
||||
SHF_MASKPROC=0xf0000000
|
||||
|
||||
|
||||
class RH_FLAGS(object):
|
||||
""" Flag values for the DT_MIPS_FLAGS dynamic table entries
|
||||
"""
|
||||
RHF_NONE=0x00000000
|
||||
RHF_QUICKSTART=0x00000001
|
||||
RHF_NOTPOT=0x00000002
|
||||
RHF_NO_LIBRARY_REPLACEMENT=0x00000004
|
||||
RHF_NO_MOVE=0x00000008
|
||||
RHF_SGI_ONLY=0x00000010
|
||||
RHF_GUARANTEE_INIT=0x00000020
|
||||
RHF_DELTA_C_PLUS_PLUS=0x00000040
|
||||
RHF_GUARANTEE_START_INIT=0x00000080
|
||||
RHF_PIXIE=0x00000100
|
||||
RHF_DEFAULT_DELAY_LOAD=0x00000200
|
||||
RHF_REQUICKSTART=0x00000400
|
||||
RHF_REQUICKSTARTED=0x00000800
|
||||
RHF_CORD=0x00001000
|
||||
RHF_NO_UNRES_UNDEF=0x00002000
|
||||
RHF_RLD_ORDER_SAFE=0x00004000
|
||||
|
||||
|
||||
class P_FLAGS(object):
|
||||
""" Flag values for the p_flags field of program headers
|
||||
"""
|
||||
PF_X=0x1
|
||||
PF_W=0x2
|
||||
PF_R=0x4
|
||||
PF_MASKOS=0x00FF0000
|
||||
PF_MASKPROC=0xFF000000
|
||||
|
||||
|
||||
# symbol info flags for entries
|
||||
# in the .SUNW_syminfo section
|
||||
class SUNW_SYMINFO_FLAGS(object):
|
||||
""" Flags for the si_flags field of entries
|
||||
in the .SUNW_syminfo section
|
||||
"""
|
||||
SYMINFO_FLG_DIRECT=0x1
|
||||
SYMINFO_FLG_FILTER=0x2
|
||||
SYMINFO_FLG_COPY=0x4
|
||||
SYMINFO_FLG_LAZYLOAD=0x8
|
||||
SYMINFO_FLG_DIRECTBIND=0x10
|
||||
SYMINFO_FLG_NOEXTDIRECT=0x20
|
||||
SYMINFO_FLG_AUXILIARY=0x40
|
||||
SYMINFO_FLG_INTERPOSE=0x80
|
||||
SYMINFO_FLG_CAP=0x100
|
||||
SYMINFO_FLG_DEFERRED=0x200
|
||||
|
||||
class VER_FLAGS(object):
|
||||
VER_FLG_BASE=0x1
|
||||
VER_FLG_WEAK=0x2
|
||||
VER_FLG_INFO=0x4
|
||||
@@ -1,939 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/descriptions.py
|
||||
#
|
||||
# Textual descriptions of the various enums and flags of ELF
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from .enums import (
|
||||
ENUM_D_TAG, ENUM_E_VERSION, ENUM_P_TYPE_BASE, ENUM_SH_TYPE_BASE,
|
||||
ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64,
|
||||
ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
|
||||
ENUM_RELOC_TYPE_MIPS, ENUM_ATTR_TAG_ARM, ENUM_DT_FLAGS, ENUM_DT_FLAGS_1)
|
||||
from .constants import (
|
||||
P_FLAGS, RH_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS)
|
||||
from ..common.py3compat import bytes2hex, iteritems
|
||||
|
||||
|
||||
def describe_ei_class(x):
|
||||
return _DESCR_EI_CLASS.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_ei_data(x):
|
||||
return _DESCR_EI_DATA.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_ei_version(x):
|
||||
s = '%d' % ENUM_E_VERSION[x]
|
||||
if x == 'EV_CURRENT':
|
||||
s += ' (current)'
|
||||
return s
|
||||
|
||||
|
||||
def describe_ei_osabi(x):
|
||||
return _DESCR_EI_OSABI.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_e_type(x, elffile=None):
|
||||
if elffile is not None and x == 'ET_DYN':
|
||||
# Detect whether this is a normal SO or a PIE executable
|
||||
dynamic = elffile.get_section_by_name('.dynamic')
|
||||
for t in dynamic.iter_tags('DT_FLAGS_1'):
|
||||
if t.entry.d_val & ENUM_DT_FLAGS_1['DF_1_PIE']:
|
||||
return 'DYN (Position-Independent Executable file)'
|
||||
return _DESCR_E_TYPE.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_e_machine(x):
|
||||
return _DESCR_E_MACHINE.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_e_version_numeric(x):
|
||||
return '0x%x' % ENUM_E_VERSION[x]
|
||||
|
||||
|
||||
def describe_p_type(x):
|
||||
if x in _DESCR_P_TYPE:
|
||||
return _DESCR_P_TYPE.get(x)
|
||||
elif x >= ENUM_P_TYPE_BASE['PT_LOOS'] and x <= ENUM_P_TYPE_BASE['PT_HIOS']:
|
||||
return 'LOOS+%lx' % (x - ENUM_P_TYPE_BASE['PT_LOOS'])
|
||||
else:
|
||||
return _unknown
|
||||
|
||||
|
||||
def describe_p_flags(x):
|
||||
s = ''
|
||||
for flag in (P_FLAGS.PF_R, P_FLAGS.PF_W, P_FLAGS.PF_X):
|
||||
s += _DESCR_P_FLAGS[flag] if (x & flag) else ' '
|
||||
return s
|
||||
|
||||
|
||||
def describe_rh_flags(x):
|
||||
return ' '.join(
|
||||
_DESCR_RH_FLAGS[flag]
|
||||
for flag in (RH_FLAGS.RHF_NONE, RH_FLAGS.RHF_QUICKSTART,
|
||||
RH_FLAGS.RHF_NOTPOT, RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT,
|
||||
RH_FLAGS.RHF_NO_MOVE, RH_FLAGS.RHF_SGI_ONLY,
|
||||
RH_FLAGS.RHF_GUARANTEE_INIT,
|
||||
RH_FLAGS.RHF_DELTA_C_PLUS_PLUS,
|
||||
RH_FLAGS.RHF_GUARANTEE_START_INIT, RH_FLAGS.RHF_PIXIE,
|
||||
RH_FLAGS.RHF_DEFAULT_DELAY_LOAD,
|
||||
RH_FLAGS.RHF_REQUICKSTART, RH_FLAGS.RHF_REQUICKSTARTED,
|
||||
RH_FLAGS.RHF_CORD, RH_FLAGS.RHF_NO_UNRES_UNDEF,
|
||||
RH_FLAGS.RHF_RLD_ORDER_SAFE)
|
||||
if x & flag)
|
||||
|
||||
|
||||
def describe_sh_type(x):
|
||||
if x in _DESCR_SH_TYPE:
|
||||
return _DESCR_SH_TYPE.get(x)
|
||||
elif (x >= ENUM_SH_TYPE_BASE['SHT_LOOS'] and
|
||||
x < ENUM_SH_TYPE_BASE['SHT_GNU_versym']):
|
||||
return 'loos+0x%lx' % (x - ENUM_SH_TYPE_BASE['SHT_LOOS'])
|
||||
else:
|
||||
return _unknown
|
||||
|
||||
|
||||
def describe_sh_flags(x):
|
||||
s = ''
|
||||
for flag in (
|
||||
SH_FLAGS.SHF_WRITE, SH_FLAGS.SHF_ALLOC, SH_FLAGS.SHF_EXECINSTR,
|
||||
SH_FLAGS.SHF_MERGE, SH_FLAGS.SHF_STRINGS, SH_FLAGS.SHF_INFO_LINK,
|
||||
SH_FLAGS.SHF_LINK_ORDER, SH_FLAGS.SHF_OS_NONCONFORMING,
|
||||
SH_FLAGS.SHF_GROUP, SH_FLAGS.SHF_TLS, SH_FLAGS.SHF_MASKOS,
|
||||
SH_FLAGS.SHF_EXCLUDE):
|
||||
s += _DESCR_SH_FLAGS[flag] if (x & flag) else ''
|
||||
if not x & SH_FLAGS.SHF_EXCLUDE:
|
||||
if x & SH_FLAGS.SHF_MASKPROC:
|
||||
s += 'p'
|
||||
return s
|
||||
|
||||
|
||||
def describe_symbol_type(x):
|
||||
return _DESCR_ST_INFO_TYPE.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_symbol_bind(x):
|
||||
return _DESCR_ST_INFO_BIND.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_symbol_visibility(x):
|
||||
return _DESCR_ST_VISIBILITY.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_symbol_local(x):
|
||||
return '[<localentry>: ' + str(1 << x) + ']'
|
||||
|
||||
|
||||
def describe_symbol_other(x):
|
||||
vis = describe_symbol_visibility(x['visibility'])
|
||||
if x['local'] > 1 and x['local'] < 7:
|
||||
return vis + ' ' + describe_symbol_local(x['local'])
|
||||
return vis
|
||||
|
||||
|
||||
def describe_symbol_shndx(x):
|
||||
return _DESCR_ST_SHNDX.get(x, '%3s' % x)
|
||||
|
||||
|
||||
def describe_reloc_type(x, elffile):
|
||||
arch = elffile.get_machine_arch()
|
||||
if arch == 'x86':
|
||||
return _DESCR_RELOC_TYPE_i386.get(x, _unknown)
|
||||
elif arch == 'x64':
|
||||
return _DESCR_RELOC_TYPE_x64.get(x, _unknown)
|
||||
elif arch == 'ARM':
|
||||
return _DESCR_RELOC_TYPE_ARM.get(x, _unknown)
|
||||
elif arch == 'AArch64':
|
||||
return _DESCR_RELOC_TYPE_AARCH64.get(x, _unknown)
|
||||
elif arch == '64-bit PowerPC':
|
||||
return _DESCR_RELOC_TYPE_PPC64.get(x, _unknown)
|
||||
elif arch == 'MIPS':
|
||||
return _DESCR_RELOC_TYPE_MIPS.get(x, _unknown)
|
||||
else:
|
||||
return 'unrecognized: %-7x' % (x & 0xFFFFFFFF)
|
||||
|
||||
|
||||
def describe_dyn_tag(x):
|
||||
return _DESCR_D_TAG.get(x, _unknown)
|
||||
|
||||
|
||||
def describe_dt_flags(x):
|
||||
return ' '.join(key[3:] for key, val in
|
||||
sorted(ENUM_DT_FLAGS.items(), key=lambda t: t[1]) if x & val)
|
||||
|
||||
|
||||
def describe_dt_flags_1(x):
|
||||
return ' '.join(key[5:] for key, val in
|
||||
sorted(ENUM_DT_FLAGS_1.items(), key=lambda t: t[1]) if x & val)
|
||||
|
||||
|
||||
def describe_syminfo_flags(x):
|
||||
return ''.join(_DESCR_SYMINFO_FLAGS[flag] for flag in (
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_CAP,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECT,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_FILTER,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_AUXILIARY,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECTBIND,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_COPY,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_LAZYLOAD,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_NOEXTDIRECT,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_INTERPOSE,
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DEFERRED) if x & flag)
|
||||
|
||||
|
||||
def describe_symbol_boundto(x):
|
||||
return _DESCR_SYMINFO_BOUNDTO.get(x, '%3s' % x)
|
||||
|
||||
|
||||
def describe_ver_flags(x):
|
||||
return ' | '.join(_DESCR_VER_FLAGS[flag] for flag in (
|
||||
VER_FLAGS.VER_FLG_WEAK,
|
||||
VER_FLAGS.VER_FLG_BASE,
|
||||
VER_FLAGS.VER_FLG_INFO) if x & flag)
|
||||
|
||||
|
||||
def describe_note(x):
|
||||
n_desc = x['n_desc']
|
||||
desc = ''
|
||||
if x['n_type'] == 'NT_GNU_ABI_TAG':
|
||||
if x['n_name'] == 'Android':
|
||||
desc = '\n description data: %s ' % bytes2hex(x['n_descdata'])
|
||||
else:
|
||||
desc = '\n OS: %s, ABI: %d.%d.%d' % (
|
||||
_DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown),
|
||||
n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny'])
|
||||
elif x['n_type'] == 'NT_GNU_BUILD_ID':
|
||||
desc = '\n Build ID: %s' % (n_desc)
|
||||
elif x['n_type'] == 'NT_GNU_GOLD_VERSION':
|
||||
desc = '\n Version: %s' % (n_desc)
|
||||
elif x['n_type'] == 'NT_GNU_PROPERTY_TYPE_0':
|
||||
desc = '\n Properties: ' + describe_note_gnu_properties(x['n_desc'])
|
||||
else:
|
||||
desc = '\n description data: {}'.format(bytes2hex(n_desc))
|
||||
|
||||
if x['n_type'] == 'NT_GNU_ABI_TAG' and x['n_name'] == 'Android':
|
||||
note_type = 'NT_VERSION'
|
||||
note_type_desc = 'version'
|
||||
else:
|
||||
note_type = (x['n_type'] if isinstance(x['n_type'], str)
|
||||
else 'Unknown note type:')
|
||||
note_type_desc = ('0x%.8x' % x['n_type']
|
||||
if isinstance(x['n_type'], int) else
|
||||
_DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown))
|
||||
return '%s (%s)%s' % (note_type, note_type_desc, desc)
|
||||
|
||||
|
||||
def describe_attr_tag_arm(tag, val, extra):
|
||||
idx = ENUM_ATTR_TAG_ARM[tag] - 1
|
||||
d_entry = _DESCR_ATTR_VAL_ARM[idx]
|
||||
|
||||
if d_entry is None:
|
||||
if tag == 'TAG_COMPATIBILITY':
|
||||
return (_DESCR_ATTR_TAG_ARM[tag]
|
||||
+ 'flag = %d, vendor = %s' % (val, extra))
|
||||
|
||||
elif tag == 'TAG_ALSO_COMPATIBLE_WITH':
|
||||
if val.tag == 'TAG_CPU_ARCH':
|
||||
return _DESCR_ATTR_TAG_ARM[tag] + d_entry[val]
|
||||
|
||||
else:
|
||||
return _DESCR_ATTR_TAG_ARM[tag] + '??? (%d)' % val.tag
|
||||
|
||||
elif tag == 'TAG_NODEFAULTS':
|
||||
return _DESCR_ATTR_TAG_ARM[tag] + 'True'
|
||||
|
||||
s = _DESCR_ATTR_TAG_ARM[tag]
|
||||
s += '"%s"' % val if val else ''
|
||||
return s
|
||||
|
||||
else:
|
||||
return _DESCR_ATTR_TAG_ARM[tag] + d_entry[val]
|
||||
|
||||
|
||||
def describe_note_gnu_property_x86_feature_1(value):
|
||||
descs = []
|
||||
for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS:
|
||||
if value & mask:
|
||||
descs.append(desc)
|
||||
return 'x86 feature: ' + ', '.join(descs)
|
||||
|
||||
def describe_note_gnu_properties(properties):
|
||||
descriptions = []
|
||||
for prop in properties:
|
||||
t, d, sz = prop.pr_type, prop.pr_data, prop.pr_datasz
|
||||
if t == 'GNU_PROPERTY_STACK_SIZE':
|
||||
if type(d) is int:
|
||||
prop_desc = 'stack size: 0x%x' % d
|
||||
else:
|
||||
prop_desc = 'stack size: <corrupt length: 0x%x>' % sz
|
||||
elif t == 'GNU_PROPERTY_NO_COPY_ON_PROTECTED':
|
||||
if sz != 0:
|
||||
prop_desc = ' <corrupt length: 0x%x>' % sz
|
||||
else:
|
||||
prop_desc = 'no copy on protected'
|
||||
elif t == 'GNU_PROPERTY_X86_FEATURE_1_AND':
|
||||
if sz != 4:
|
||||
prop_desc = ' <corrupt length: 0x%x>' % sz
|
||||
else:
|
||||
prop_desc = describe_note_gnu_property_x86_feature_1(d)
|
||||
elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
|
||||
prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
|
||||
elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
|
||||
prop_desc = '<application-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
|
||||
else:
|
||||
prop_desc = '<unknown type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
|
||||
descriptions.append(prop_desc)
|
||||
return '\n '.join(descriptions)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
_unknown = '<unknown>'
|
||||
|
||||
|
||||
_DESCR_EI_CLASS = dict(
|
||||
ELFCLASSNONE='none',
|
||||
ELFCLASS32='ELF32',
|
||||
ELFCLASS64='ELF64',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_EI_DATA = dict(
|
||||
ELFDATANONE='none',
|
||||
ELFDATA2LSB="2's complement, little endian",
|
||||
ELFDATA2MSB="2's complement, big endian",
|
||||
)
|
||||
|
||||
|
||||
_DESCR_EI_OSABI = dict(
|
||||
ELFOSABI_SYSV='UNIX - System V',
|
||||
ELFOSABI_HPUX='UNIX - HP-UX',
|
||||
ELFOSABI_NETBSD='UNIX - NetBSD',
|
||||
ELFOSABI_LINUX='UNIX - Linux',
|
||||
ELFOSABI_HURD='UNIX - GNU/Hurd',
|
||||
ELFOSABI_SOLARIS='UNIX - Solaris',
|
||||
ELFOSABI_AIX='UNIX - AIX',
|
||||
ELFOSABI_IRIX='UNIX - IRIX',
|
||||
ELFOSABI_FREEBSD='UNIX - FreeBSD',
|
||||
ELFOSABI_TRU64='UNIX - TRU64',
|
||||
ELFOSABI_MODESTO='Novell - Modesto',
|
||||
ELFOSABI_OPENBSD='UNIX - OpenBSD',
|
||||
ELFOSABI_OPENVMS='VMS - OpenVMS',
|
||||
ELFOSABI_NSK='HP - Non-Stop Kernel',
|
||||
ELFOSABI_AROS='AROS',
|
||||
ELFOSABI_FENIXOS='Fenix OS',
|
||||
ELFOSABI_CLOUD='Nuxi - CloudABI',
|
||||
ELFOSABI_SORTIX='Sortix',
|
||||
ELFOSABI_ARM_AEABI='ARM - EABI',
|
||||
ELFOSABI_ARM='ARM - ABI',
|
||||
ELFOSABI_CELL_LV2='CellOS Lv-2',
|
||||
ELFOSABI_STANDALONE='Standalone App',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_E_TYPE = dict(
|
||||
ET_NONE='NONE (None)',
|
||||
ET_REL='REL (Relocatable file)',
|
||||
ET_EXEC='EXEC (Executable file)',
|
||||
ET_DYN='DYN (Shared object file)',
|
||||
ET_CORE='CORE (Core file)',
|
||||
PROC_SPECIFIC='Processor Specific',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_E_MACHINE = dict(
|
||||
EM_NONE='None',
|
||||
EM_M32='WE32100',
|
||||
EM_SPARC='Sparc',
|
||||
EM_386='Intel 80386',
|
||||
EM_68K='MC68000',
|
||||
EM_88K='MC88000',
|
||||
EM_860='Intel 80860',
|
||||
EM_MIPS='MIPS R3000',
|
||||
EM_S370='IBM System/370',
|
||||
EM_MIPS_RS4_BE='MIPS 4000 big-endian',
|
||||
EM_IA_64='Intel IA-64',
|
||||
EM_X86_64='Advanced Micro Devices X86-64',
|
||||
EM_AVR='Atmel AVR 8-bit microcontroller',
|
||||
EM_ARM='ARM',
|
||||
EM_AARCH64='AArch64',
|
||||
EM_BLACKFIN='Analog Devices Blackfin',
|
||||
EM_PPC='PowerPC',
|
||||
EM_PPC64='PowerPC64',
|
||||
RESERVED='RESERVED',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_P_TYPE = dict(
|
||||
PT_NULL='NULL',
|
||||
PT_LOAD='LOAD',
|
||||
PT_DYNAMIC='DYNAMIC',
|
||||
PT_INTERP='INTERP',
|
||||
PT_NOTE='NOTE',
|
||||
PT_SHLIB='SHLIB',
|
||||
PT_PHDR='PHDR',
|
||||
PT_GNU_EH_FRAME='GNU_EH_FRAME',
|
||||
PT_GNU_STACK='GNU_STACK',
|
||||
PT_GNU_RELRO='GNU_RELRO',
|
||||
PT_GNU_PROPERTY='GNU_PROPERTY',
|
||||
PT_ARM_ARCHEXT='ARM_ARCHEXT',
|
||||
PT_ARM_EXIDX='EXIDX', # binutils calls this EXIDX, not ARM_EXIDX
|
||||
PT_AARCH64_ARCHEXT='AARCH64_ARCHEXT',
|
||||
PT_AARCH64_UNWIND='AARCH64_UNWIND',
|
||||
PT_TLS='TLS',
|
||||
PT_MIPS_ABIFLAGS='ABIFLAGS'
|
||||
)
|
||||
|
||||
|
||||
_DESCR_P_FLAGS = {
|
||||
P_FLAGS.PF_X: 'E',
|
||||
P_FLAGS.PF_R: 'R',
|
||||
P_FLAGS.PF_W: 'W',
|
||||
}
|
||||
|
||||
|
||||
_DESCR_SH_TYPE = dict(
|
||||
SHT_NULL='NULL',
|
||||
SHT_PROGBITS='PROGBITS',
|
||||
SHT_SYMTAB='SYMTAB',
|
||||
SHT_STRTAB='STRTAB',
|
||||
SHT_RELA='RELA',
|
||||
SHT_HASH='HASH',
|
||||
SHT_DYNAMIC='DYNAMIC',
|
||||
SHT_NOTE='NOTE',
|
||||
SHT_NOBITS='NOBITS',
|
||||
SHT_REL='REL',
|
||||
SHT_SHLIB='SHLIB',
|
||||
SHT_DYNSYM='DYNSYM',
|
||||
SHT_INIT_ARRAY='INIT_ARRAY',
|
||||
SHT_FINI_ARRAY='FINI_ARRAY',
|
||||
SHT_PREINIT_ARRAY='PREINIT_ARRAY',
|
||||
SHT_GNU_ATTRIBUTES='GNU_ATTRIBUTES',
|
||||
SHT_GNU_HASH='GNU_HASH',
|
||||
SHT_GROUP='GROUP',
|
||||
SHT_SYMTAB_SHNDX='SYMTAB SECTION INDICIES',
|
||||
SHT_GNU_verdef='VERDEF',
|
||||
SHT_GNU_verneed='VERNEED',
|
||||
SHT_GNU_versym='VERSYM',
|
||||
SHT_GNU_LIBLIST='GNU_LIBLIST',
|
||||
SHT_ARM_EXIDX='ARM_EXIDX',
|
||||
SHT_ARM_PREEMPTMAP='ARM_PREEMPTMAP',
|
||||
SHT_ARM_ATTRIBUTES='ARM_ATTRIBUTES',
|
||||
SHT_ARM_DEBUGOVERLAY='ARM_DEBUGOVERLAY',
|
||||
SHT_MIPS_LIBLIST='MIPS_LIBLIST',
|
||||
SHT_MIPS_DEBUG='MIPS_DEBUG',
|
||||
SHT_MIPS_REGINFO='MIPS_REGINFO',
|
||||
SHT_MIPS_PACKAGE='MIPS_PACKAGE',
|
||||
SHT_MIPS_PACKSYM='MIPS_PACKSYM',
|
||||
SHT_MIPS_RELD='MIPS_RELD',
|
||||
SHT_MIPS_IFACE='MIPS_IFACE',
|
||||
SHT_MIPS_CONTENT='MIPS_CONTENT',
|
||||
SHT_MIPS_OPTIONS='MIPS_OPTIONS',
|
||||
SHT_MIPS_SHDR='MIPS_SHDR',
|
||||
SHT_MIPS_FDESC='MIPS_FDESC',
|
||||
SHT_MIPS_EXTSYM='MIPS_EXTSYM',
|
||||
SHT_MIPS_DENSE='MIPS_DENSE',
|
||||
SHT_MIPS_PDESC='MIPS_PDESC',
|
||||
SHT_MIPS_LOCSYM='MIPS_LOCSYM',
|
||||
SHT_MIPS_AUXSYM='MIPS_AUXSYM',
|
||||
SHT_MIPS_OPTSYM='MIPS_OPTSYM',
|
||||
SHT_MIPS_LOCSTR='MIPS_LOCSTR',
|
||||
SHT_MIPS_LINE='MIPS_LINE',
|
||||
SHT_MIPS_RFDESC='MIPS_RFDESC',
|
||||
SHT_MIPS_DELTASYM='MIPS_DELTASYM',
|
||||
SHT_MIPS_DELTAINST='MIPS_DELTAINST',
|
||||
SHT_MIPS_DELTACLASS='MIPS_DELTACLASS',
|
||||
SHT_MIPS_DWARF='MIPS_DWARF',
|
||||
SHT_MIPS_DELTADECL='MIPS_DELTADECL',
|
||||
SHT_MIPS_SYMBOL_LIB='MIPS_SYMBOL_LIB',
|
||||
SHT_MIPS_EVENTS='MIPS_EVENTS',
|
||||
SHT_MIPS_TRANSLATE='MIPS_TRANSLATE',
|
||||
SHT_MIPS_PIXIE='MIPS_PIXIE',
|
||||
SHT_MIPS_XLATE='MIPS_XLATE',
|
||||
SHT_MIPS_XLATE_DEBUG='MIPS_XLATE_DEBUG',
|
||||
SHT_MIPS_WHIRL='MIPS_WHIRL',
|
||||
SHT_MIPS_EH_REGION='MIPS_EH_REGION',
|
||||
SHT_MIPS_XLATE_OLD='MIPS_XLATE_OLD',
|
||||
SHT_MIPS_PDR_EXCEPTION='MIPS_PDR_EXCEPTION',
|
||||
SHT_MIPS_ABIFLAGS='MIPS_ABIFLAGS',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_SH_FLAGS = {
|
||||
SH_FLAGS.SHF_WRITE: 'W',
|
||||
SH_FLAGS.SHF_ALLOC: 'A',
|
||||
SH_FLAGS.SHF_EXECINSTR: 'X',
|
||||
SH_FLAGS.SHF_MERGE: 'M',
|
||||
SH_FLAGS.SHF_STRINGS: 'S',
|
||||
SH_FLAGS.SHF_INFO_LINK: 'I',
|
||||
SH_FLAGS.SHF_LINK_ORDER: 'L',
|
||||
SH_FLAGS.SHF_OS_NONCONFORMING: 'O',
|
||||
SH_FLAGS.SHF_GROUP: 'G',
|
||||
SH_FLAGS.SHF_TLS: 'T',
|
||||
SH_FLAGS.SHF_MASKOS: 'o',
|
||||
SH_FLAGS.SHF_EXCLUDE: 'E',
|
||||
}
|
||||
|
||||
|
||||
_DESCR_RH_FLAGS = {
|
||||
RH_FLAGS.RHF_NONE: 'NONE',
|
||||
RH_FLAGS.RHF_QUICKSTART: 'QUICKSTART',
|
||||
RH_FLAGS.RHF_NOTPOT: 'NOTPOT',
|
||||
RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT: 'NO_LIBRARY_REPLACEMENT',
|
||||
RH_FLAGS.RHF_NO_MOVE: 'NO_MOVE',
|
||||
RH_FLAGS.RHF_SGI_ONLY: 'SGI_ONLY',
|
||||
RH_FLAGS.RHF_GUARANTEE_INIT: 'GUARANTEE_INIT',
|
||||
RH_FLAGS.RHF_DELTA_C_PLUS_PLUS: 'DELTA_C_PLUS_PLUS',
|
||||
RH_FLAGS.RHF_GUARANTEE_START_INIT: 'GUARANTEE_START_INIT',
|
||||
RH_FLAGS.RHF_PIXIE: 'PIXIE',
|
||||
RH_FLAGS.RHF_DEFAULT_DELAY_LOAD: 'DEFAULT_DELAY_LOAD',
|
||||
RH_FLAGS.RHF_REQUICKSTART: 'REQUICKSTART',
|
||||
RH_FLAGS.RHF_REQUICKSTARTED: 'REQUICKSTARTED',
|
||||
RH_FLAGS.RHF_CORD: 'CORD',
|
||||
RH_FLAGS.RHF_NO_UNRES_UNDEF: 'NO_UNRES_UNDEF',
|
||||
RH_FLAGS.RHF_RLD_ORDER_SAFE: 'RLD_ORDER_SAFE',
|
||||
}
|
||||
|
||||
|
||||
_DESCR_ST_INFO_TYPE = dict(
|
||||
STT_NOTYPE='NOTYPE',
|
||||
STT_OBJECT='OBJECT',
|
||||
STT_FUNC='FUNC',
|
||||
STT_SECTION='SECTION',
|
||||
STT_FILE='FILE',
|
||||
STT_COMMON='COMMON',
|
||||
STT_TLS='TLS',
|
||||
STT_NUM='NUM',
|
||||
STT_RELC='RELC',
|
||||
STT_SRELC='SRELC',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_ST_INFO_BIND = dict(
|
||||
STB_LOCAL='LOCAL',
|
||||
STB_GLOBAL='GLOBAL',
|
||||
STB_WEAK='WEAK',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_ST_VISIBILITY = dict(
|
||||
STV_DEFAULT='DEFAULT',
|
||||
STV_INTERNAL='INTERNAL',
|
||||
STV_HIDDEN='HIDDEN',
|
||||
STV_PROTECTED='PROTECTED',
|
||||
STV_EXPORTED='EXPORTED',
|
||||
STV_SINGLETON='SINGLETON',
|
||||
STV_ELIMINATE='ELIMINATE',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_ST_SHNDX = dict(
|
||||
SHN_UNDEF='UND',
|
||||
SHN_ABS='ABS',
|
||||
SHN_COMMON='COM',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_SYMINFO_FLAGS = {
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECT: 'D',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECTBIND: 'B',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_COPY: 'C',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_LAZYLOAD: 'L',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_NOEXTDIRECT: 'N',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_AUXILIARY: 'A',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_FILTER: 'F',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_INTERPOSE: 'I',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_CAP: 'S',
|
||||
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DEFERRED: 'P',
|
||||
}
|
||||
|
||||
|
||||
_DESCR_SYMINFO_BOUNDTO = dict(
|
||||
SYMINFO_BT_SELF='<self>',
|
||||
SYMINFO_BT_PARENT='<parent>',
|
||||
SYMINFO_BT_NONE='',
|
||||
SYMINFO_BT_EXTERN='<extern>',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_VER_FLAGS = {
|
||||
0: '',
|
||||
VER_FLAGS.VER_FLG_BASE: 'BASE',
|
||||
VER_FLAGS.VER_FLG_WEAK: 'WEAK',
|
||||
VER_FLAGS.VER_FLG_INFO: 'INFO',
|
||||
}
|
||||
|
||||
|
||||
# PT_NOTE section types
|
||||
_DESCR_NOTE_N_TYPE = dict(
|
||||
NT_GNU_ABI_TAG='ABI version tag',
|
||||
NT_GNU_HWCAP='DSO-supplied software HWCAP info',
|
||||
NT_GNU_BUILD_ID='unique build ID bitstring',
|
||||
NT_GNU_GOLD_VERSION='gold version',
|
||||
NT_GNU_PROPERTY_TYPE_0='program properties'
|
||||
)
|
||||
|
||||
|
||||
# Values in GNU .note.ABI-tag notes (n_type=='NT_GNU_ABI_TAG')
|
||||
_DESCR_NOTE_ABI_TAG_OS = dict(
|
||||
ELF_NOTE_OS_LINUX='Linux',
|
||||
ELF_NOTE_OS_GNU='GNU',
|
||||
ELF_NOTE_OS_SOLARIS2='Solaris 2',
|
||||
ELF_NOTE_OS_FREEBSD='FreeBSD',
|
||||
ELF_NOTE_OS_NETBSD='NetBSD',
|
||||
ELF_NOTE_OS_SYLLABLE='Syllable',
|
||||
)
|
||||
|
||||
|
||||
# Values in GNU .note.gnu.property notes (n_type=='NT_GNU_PROPERTY_TYPE_0') have
|
||||
# different formats which need to be parsed/described differently
|
||||
_DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC=0xc0000000
|
||||
_DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC=0xdfffffff
|
||||
_DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER=0xe0000000
|
||||
_DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER=0xffffffff
|
||||
|
||||
|
||||
# Bit masks for GNU_PROPERTY_X86_FEATURE_1_xxx flags in the form
|
||||
# (mask, flag_description) in the desired output order
|
||||
_DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS = (
|
||||
(1, 'IBT'),
|
||||
(2, 'SHSTK'),
|
||||
(4, 'LAM_U48'),
|
||||
(8, 'LAM_U57'),
|
||||
)
|
||||
|
||||
|
||||
def _reverse_dict(d, low_priority=()):
|
||||
"""
|
||||
This is a tiny helper function to "reverse" the keys/values of a dictionary
|
||||
provided in the first argument, i.e. {k: v} becomes {v: k}.
|
||||
|
||||
The second argument (optional) provides primitive control over what to do in
|
||||
the case of conflicting values - if a value is present in this list, it will
|
||||
not override any other entries of the same value.
|
||||
"""
|
||||
out = {}
|
||||
for k, v in iteritems(d):
|
||||
if v in out and k in low_priority:
|
||||
continue
|
||||
out[v] = k
|
||||
return out
|
||||
|
||||
_DESCR_RELOC_TYPE_i386 = _reverse_dict(ENUM_RELOC_TYPE_i386)
|
||||
_DESCR_RELOC_TYPE_x64 = _reverse_dict(ENUM_RELOC_TYPE_x64)
|
||||
_DESCR_RELOC_TYPE_ARM = _reverse_dict(ENUM_RELOC_TYPE_ARM)
|
||||
_DESCR_RELOC_TYPE_AARCH64 = _reverse_dict(ENUM_RELOC_TYPE_AARCH64)
|
||||
_DESCR_RELOC_TYPE_PPC64 = _reverse_dict(ENUM_RELOC_TYPE_PPC64)
|
||||
_DESCR_RELOC_TYPE_MIPS = _reverse_dict(ENUM_RELOC_TYPE_MIPS)
|
||||
|
||||
_low_priority_D_TAG = (
|
||||
# these are 'meta-tags' marking semantics of numeric ranges of the enum
|
||||
# they should not override other tags with the same numbers
|
||||
# see https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-42444.html
|
||||
'DT_LOOS',
|
||||
'DT_HIOS',
|
||||
'DT_LOPROC',
|
||||
'DT_HIPROC',
|
||||
'DT_ENCODING',
|
||||
)
|
||||
_DESCR_D_TAG = _reverse_dict(ENUM_D_TAG, low_priority=_low_priority_D_TAG)
|
||||
|
||||
_DESCR_ATTR_TAG_ARM = dict(
|
||||
TAG_FILE='File Attributes',
|
||||
TAG_SECTION='Section Attributes:',
|
||||
TAG_SYMBOL='Symbol Attributes:',
|
||||
TAG_CPU_RAW_NAME='Tag_CPU_raw_name: ',
|
||||
TAG_CPU_NAME='Tag_CPU_name: ',
|
||||
TAG_CPU_ARCH='Tag_CPU_arch: ',
|
||||
TAG_CPU_ARCH_PROFILE='Tag_CPU_arch_profile: ',
|
||||
TAG_ARM_ISA_USE='Tag_ARM_ISA_use: ',
|
||||
TAG_THUMB_ISA_USE='Tag_Thumb_ISA_use: ',
|
||||
TAG_FP_ARCH='Tag_FP_arch: ',
|
||||
TAG_WMMX_ARCH='Tag_WMMX_arch: ',
|
||||
TAG_ADVANCED_SIMD_ARCH='Tag_Advanced_SIMD_arch: ',
|
||||
TAG_PCS_CONFIG='Tag_PCS_config: ',
|
||||
TAG_ABI_PCS_R9_USE='Tag_ABI_PCS_R9_use: ',
|
||||
TAG_ABI_PCS_RW_DATA='Tag_ABI_PCS_RW_use: ',
|
||||
TAG_ABI_PCS_RO_DATA='Tag_ABI_PCS_RO_use: ',
|
||||
TAG_ABI_PCS_GOT_USE='Tag_ABI_PCS_GOT_use: ',
|
||||
TAG_ABI_PCS_WCHAR_T='Tag_ABI_PCS_wchar_t: ',
|
||||
TAG_ABI_FP_ROUNDING='Tag_ABI_FP_rounding: ',
|
||||
TAG_ABI_FP_DENORMAL='Tag_ABI_FP_denormal: ',
|
||||
TAG_ABI_FP_EXCEPTIONS='Tag_ABI_FP_exceptions: ',
|
||||
TAG_ABI_FP_USER_EXCEPTIONS='Tag_ABI_FP_user_exceptions: ',
|
||||
TAG_ABI_FP_NUMBER_MODEL='Tag_ABI_FP_number_model: ',
|
||||
TAG_ABI_ALIGN_NEEDED='Tag_ABI_align_needed: ',
|
||||
TAG_ABI_ALIGN_PRESERVED='Tag_ABI_align_preserved: ',
|
||||
TAG_ABI_ENUM_SIZE='Tag_ABI_enum_size: ',
|
||||
TAG_ABI_HARDFP_USE='Tag_ABI_HardFP_use: ',
|
||||
TAG_ABI_VFP_ARGS='Tag_ABI_VFP_args: ',
|
||||
TAG_ABI_WMMX_ARGS='Tag_ABI_WMMX_args: ',
|
||||
TAG_ABI_OPTIMIZATION_GOALS='Tag_ABI_optimization_goals: ',
|
||||
TAG_ABI_FP_OPTIMIZATION_GOALS='Tag_ABI_FP_optimization_goals: ',
|
||||
TAG_COMPATIBILITY='Tag_compatibility: ',
|
||||
TAG_CPU_UNALIGNED_ACCESS='Tag_CPU_unaligned_access: ',
|
||||
TAG_FP_HP_EXTENSION='Tag_FP_HP_extension: ',
|
||||
TAG_ABI_FP_16BIT_FORMAT='Tag_ABI_FP_16bit_format: ',
|
||||
TAG_MPEXTENSION_USE='Tag_MPextension_use: ',
|
||||
TAG_DIV_USE='Tag_DIV_use: ',
|
||||
TAG_NODEFAULTS='Tag_nodefaults: ',
|
||||
TAG_ALSO_COMPATIBLE_WITH='Tag_also_compatible_with: ',
|
||||
TAG_T2EE_USE='Tag_T2EE_use: ',
|
||||
TAG_CONFORMANCE='Tag_conformance: ',
|
||||
TAG_VIRTUALIZATION_USE='Tag_Virtualization_use: ',
|
||||
TAG_MPEXTENSION_USE_OLD='Tag_MPextension_use_old: ',
|
||||
)
|
||||
|
||||
|
||||
_DESCR_ATTR_VAL_ARM = [
|
||||
None, #1
|
||||
None, #2
|
||||
None, #3
|
||||
None, #4
|
||||
None, #5
|
||||
{ #6 TAG_CPU_ARCH
|
||||
0 : 'Pre-v4',
|
||||
1 : 'v4',
|
||||
2 : 'v4T',
|
||||
3 : 'v5T',
|
||||
4 : 'v5TE',
|
||||
5 : 'v5TEJ',
|
||||
6 : 'v6',
|
||||
7 : 'v6KZ',
|
||||
8 : 'v6T2',
|
||||
9 : 'v6K',
|
||||
10: 'v7',
|
||||
11: 'v6-M',
|
||||
12: 'v6S-M',
|
||||
13: 'v7E-M',
|
||||
14: 'v8',
|
||||
15: 'v8-R',
|
||||
16: 'v8-M.baseline',
|
||||
17: 'v8-M.mainline',
|
||||
},
|
||||
{ #7 TAG_CPU_ARCH_PROFILE
|
||||
0x00: 'None',
|
||||
0x41: 'Application',
|
||||
0x52: 'Realtime',
|
||||
0x4D: 'Microcontroller',
|
||||
0x53: 'Application or Realtime',
|
||||
},
|
||||
{ #8 TAG_ARM_ISA
|
||||
0: 'No',
|
||||
1: 'Yes',
|
||||
},
|
||||
{ #9 TAG_THUMB_ISA
|
||||
0: 'No',
|
||||
1: 'Thumb-1',
|
||||
2: 'Thumb-2',
|
||||
3: 'Yes',
|
||||
},
|
||||
{ #10 TAG_FP_ARCH
|
||||
0: 'No',
|
||||
1: 'VFPv1',
|
||||
2: 'VFPv2 ',
|
||||
3: 'VFPv3',
|
||||
4: 'VFPv3-D16',
|
||||
5: 'VFPv4',
|
||||
6: 'VFPv4-D16',
|
||||
7: 'FP ARM v8',
|
||||
8: 'FPv5/FP-D16 for ARMv8',
|
||||
},
|
||||
{ #11 TAG_WMMX_ARCH
|
||||
0: 'No',
|
||||
1: 'WMMXv1',
|
||||
2: 'WMMXv2',
|
||||
},
|
||||
{ #12 TAG_ADVANCED_SIMD_ARCH
|
||||
0: 'No',
|
||||
1: 'NEONv1',
|
||||
2: 'NEONv1 with Fused-MAC',
|
||||
3: 'NEON for ARMv8',
|
||||
4: 'NEON for ARMv8.1',
|
||||
},
|
||||
{ #13 TAG_PCS_CONFIG
|
||||
0: 'None',
|
||||
1: 'Bare platform',
|
||||
2: 'Linux application',
|
||||
3: 'Linux DSO',
|
||||
4: 'PalmOS 2004',
|
||||
5: 'PalmOS (reserved)',
|
||||
6: 'SymbianOS 2004',
|
||||
7: 'SymbianOS (reserved)',
|
||||
},
|
||||
{ #14 TAG_ABI_PCS_R9_USE
|
||||
0: 'v6',
|
||||
1: 'SB',
|
||||
2: 'TLS',
|
||||
3: 'Unused',
|
||||
},
|
||||
{ #15 TAG_ABI_PCS_RW_DATA
|
||||
0: 'Absolute',
|
||||
1: 'PC-relative',
|
||||
2: 'SB-relative',
|
||||
3: 'None',
|
||||
},
|
||||
{ #16 TAG_ABI_PCS_RO_DATA
|
||||
0: 'Absolute',
|
||||
1: 'PC-relative',
|
||||
2: 'None',
|
||||
},
|
||||
{ #17 TAG_ABI_PCS_GOT_USE
|
||||
0: 'None',
|
||||
1: 'direct',
|
||||
2: 'GOT-indirect',
|
||||
},
|
||||
{ #18 TAG_ABI_PCS_WCHAR_T
|
||||
0: 'None',
|
||||
1: '??? 1',
|
||||
2: '2',
|
||||
3: '??? 3',
|
||||
4: '4',
|
||||
},
|
||||
{ #19 TAG_ABI_FP_ROUNDING
|
||||
0: 'Unused',
|
||||
1: 'Needed',
|
||||
},
|
||||
{ #20 TAG_ABI_FP_DENORMAL
|
||||
0: 'Unused',
|
||||
1: 'Needed',
|
||||
2: 'Sign only',
|
||||
},
|
||||
{ #21 TAG_ABI_FP_EXCEPTIONS
|
||||
0: 'Unused',
|
||||
1: 'Needed',
|
||||
},
|
||||
{ #22 TAG_ABI_FP_USER_EXCEPTIONS
|
||||
0: 'Unused',
|
||||
1: 'Needed',
|
||||
},
|
||||
{ #23 TAG_ABI_FP_NUMBER_MODEL
|
||||
0: 'Unused',
|
||||
1: 'Finite',
|
||||
2: 'RTABI',
|
||||
3: 'IEEE 754',
|
||||
},
|
||||
{ #24 TAG_ABI_ALIGN_NEEDED
|
||||
0: 'None',
|
||||
1: '8-byte',
|
||||
2: '4-byte',
|
||||
3: '??? 3',
|
||||
},
|
||||
{ #25 TAG_ABI_ALIGN_PRESERVED
|
||||
0: 'None',
|
||||
1: '8-byte, except leaf SP',
|
||||
2: '8-byte',
|
||||
3: '??? 3',
|
||||
},
|
||||
{ #26 TAG_ABI_ENUM_SIZE
|
||||
0: 'Unused',
|
||||
1: 'small',
|
||||
2: 'int',
|
||||
3: 'forced to int',
|
||||
},
|
||||
{ #27 TAG_ABI_HARDFP_USE
|
||||
0: 'As Tag_FP_arch',
|
||||
1: 'SP only',
|
||||
2: 'Reserved',
|
||||
3: 'Deprecated',
|
||||
},
|
||||
{ #28 TAG_ABI_VFP_ARGS
|
||||
0: 'AAPCS',
|
||||
1: 'VFP registers',
|
||||
2: 'custom',
|
||||
3: 'compatible',
|
||||
},
|
||||
{ #29 TAG_ABI_WMMX_ARGS
|
||||
0: 'AAPCS',
|
||||
1: 'WMMX registers',
|
||||
2: 'custom',
|
||||
},
|
||||
{ #30 TAG_ABI_OPTIMIZATION_GOALS
|
||||
0: 'None',
|
||||
1: 'Prefer Speed',
|
||||
2: 'Aggressive Speed',
|
||||
3: 'Prefer Size',
|
||||
4: 'Aggressive Size',
|
||||
5: 'Prefer Debug',
|
||||
6: 'Aggressive Debug',
|
||||
},
|
||||
{ #31 TAG_ABI_FP_OPTIMIZATION_GOALS
|
||||
0: 'None',
|
||||
1: 'Prefer Speed',
|
||||
2: 'Aggressive Speed',
|
||||
3: 'Prefer Size',
|
||||
4: 'Aggressive Size',
|
||||
5: 'Prefer Accuracy',
|
||||
6: 'Aggressive Accuracy',
|
||||
},
|
||||
{ #32 TAG_COMPATIBILITY
|
||||
0: 'No',
|
||||
1: 'Yes',
|
||||
},
|
||||
None, #33
|
||||
{ #34 TAG_CPU_UNALIGNED_ACCESS
|
||||
0: 'None',
|
||||
1: 'v6',
|
||||
},
|
||||
None, #35
|
||||
{ #36 TAG_FP_HP_EXTENSION
|
||||
0: 'Not Allowed',
|
||||
1: 'Allowed',
|
||||
},
|
||||
None, #37
|
||||
{ #38 TAG_ABI_FP_16BIT_FORMAT
|
||||
0: 'None',
|
||||
1: 'IEEE 754',
|
||||
2: 'Alternative Format',
|
||||
},
|
||||
None, #39
|
||||
None, #40
|
||||
None, #41
|
||||
{ #42 TAG_MPEXTENSION_USE
|
||||
0: 'Not Allowed',
|
||||
1: 'Allowed',
|
||||
},
|
||||
None, #43
|
||||
{ #44 TAG_DIV_USE
|
||||
0: 'Allowed in Thumb-ISA, v7-R or v7-M',
|
||||
1: 'Not allowed',
|
||||
2: 'Allowed in v7-A with integer division extension',
|
||||
},
|
||||
None, #45
|
||||
None, #46
|
||||
None, #47
|
||||
None, #48
|
||||
None, #49
|
||||
None, #50
|
||||
None, #51
|
||||
None, #52
|
||||
None, #53
|
||||
None, #54
|
||||
None, #55
|
||||
None, #56
|
||||
None, #57
|
||||
None, #58
|
||||
None, #59
|
||||
None, #60
|
||||
None, #61
|
||||
None, #62
|
||||
None, #63
|
||||
None, #64
|
||||
None, #65
|
||||
{ #66 TAG_FP_HP_EXTENSION
|
||||
0: 'Not Allowed',
|
||||
1: 'Allowed',
|
||||
},
|
||||
None, #67
|
||||
{ #68 TAG_VIRTUALIZATION_USE
|
||||
0: 'Not Allowed',
|
||||
1: 'TrustZone',
|
||||
2: 'Virtualization Extensions',
|
||||
3: 'TrustZone and Virtualization Extensions',
|
||||
},
|
||||
None, #69
|
||||
{ #70 TAG_MPEXTENSION_USE_OLD
|
||||
0: 'Not Allowed',
|
||||
1: 'Allowed',
|
||||
},
|
||||
]
|
||||
@@ -1,352 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/dynamic.py
|
||||
#
|
||||
# ELF Dynamic Tags
|
||||
#
|
||||
# Mike Frysinger (vapier@gentoo.org)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import itertools
|
||||
|
||||
from collections import defaultdict
|
||||
from .hash import ELFHashTable, GNUHashTable
|
||||
from .sections import Section, Symbol
|
||||
from .enums import ENUM_D_TAG
|
||||
from .segments import Segment
|
||||
from .relocation import RelocationTable
|
||||
from ..common.exceptions import ELFError
|
||||
from ..common.utils import elf_assert, struct_parse, parse_cstring_from_stream
|
||||
|
||||
|
||||
class _DynamicStringTable(object):
|
||||
""" Bare string table based on values found via ELF dynamic tags and
|
||||
loadable segments only. Good enough for get_string() only.
|
||||
"""
|
||||
def __init__(self, stream, table_offset):
|
||||
self._stream = stream
|
||||
self._table_offset = table_offset
|
||||
|
||||
def get_string(self, offset):
|
||||
""" Get the string stored at the given offset in this string table.
|
||||
"""
|
||||
s = parse_cstring_from_stream(self._stream, self._table_offset + offset)
|
||||
return s.decode('utf-8') if s else ''
|
||||
|
||||
|
||||
class DynamicTag(object):
|
||||
""" Dynamic Tag object - representing a single dynamic tag entry from a
|
||||
dynamic section.
|
||||
|
||||
Allows dictionary-like access to the dynamic structure. For special
|
||||
tags (those listed in the _HANDLED_TAGS set below), creates additional
|
||||
attributes for convenience. For example, .soname will contain the actual
|
||||
value of DT_SONAME (fetched from the dynamic symbol table).
|
||||
"""
|
||||
_HANDLED_TAGS = frozenset(
|
||||
['DT_NEEDED', 'DT_RPATH', 'DT_RUNPATH', 'DT_SONAME',
|
||||
'DT_SUNW_FILTER'])
|
||||
|
||||
def __init__(self, entry, stringtable):
|
||||
if stringtable is None:
|
||||
raise ELFError('Creating DynamicTag without string table')
|
||||
self.entry = entry
|
||||
if entry.d_tag in self._HANDLED_TAGS:
|
||||
setattr(self, entry.d_tag[3:].lower(),
|
||||
stringtable.get_string(self.entry.d_val))
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to entries
|
||||
"""
|
||||
return self.entry[name]
|
||||
|
||||
def __repr__(self):
|
||||
return '<DynamicTag (%s): %r>' % (self.entry.d_tag, self.entry)
|
||||
|
||||
def __str__(self):
|
||||
if self.entry.d_tag in self._HANDLED_TAGS:
|
||||
s = '"%s"' % getattr(self, self.entry.d_tag[3:].lower())
|
||||
else:
|
||||
s = '%#x' % self.entry.d_ptr
|
||||
return '<DynamicTag (%s) %s>' % (self.entry.d_tag, s)
|
||||
|
||||
|
||||
class Dynamic(object):
|
||||
""" Shared functionality between dynamic sections and segments.
|
||||
"""
|
||||
def __init__(self, stream, elffile, stringtable, position, empty):
|
||||
"""
|
||||
stream:
|
||||
The file-like object from which to load data
|
||||
|
||||
elffile:
|
||||
The parent elffile object
|
||||
|
||||
stringtable:
|
||||
A stringtable reference to use for parsing string references in
|
||||
entries
|
||||
|
||||
position:
|
||||
The file offset of the dynamic segment/section
|
||||
|
||||
empty:
|
||||
Whether this is a degenerate case with zero entries. Normally, every
|
||||
dynamic table will have at least one entry, the DT_NULL terminator.
|
||||
"""
|
||||
self.elffile = elffile
|
||||
self.elfstructs = elffile.structs
|
||||
self._stream = stream
|
||||
self._num_tags = -1 if not empty else 0
|
||||
self._offset = position
|
||||
self._tagsize = self.elfstructs.Elf_Dyn.sizeof()
|
||||
self._empty = empty
|
||||
|
||||
# Do not access this directly yourself; use _get_stringtable() instead.
|
||||
self._stringtable = stringtable
|
||||
|
||||
def get_table_offset(self, tag_name):
|
||||
""" Return the virtual address and file offset of a dynamic table.
|
||||
"""
|
||||
ptr = None
|
||||
for tag in self._iter_tags(type=tag_name):
|
||||
ptr = tag['d_ptr']
|
||||
break
|
||||
|
||||
# If we found a virtual address, locate the offset in the file
|
||||
# by using the program headers.
|
||||
offset = None
|
||||
if ptr:
|
||||
offset = next(self.elffile.address_offsets(ptr), None)
|
||||
|
||||
return ptr, offset
|
||||
|
||||
def _get_stringtable(self):
|
||||
""" Return a string table for looking up dynamic tag related strings.
|
||||
|
||||
This won't be a "full" string table object, but will at least
|
||||
support the get_string() function.
|
||||
"""
|
||||
if self._stringtable:
|
||||
return self._stringtable
|
||||
|
||||
# If the ELF has stripped its section table (which is unusual, but
|
||||
# perfectly valid), we need to use the dynamic tags to locate the
|
||||
# dynamic string table.
|
||||
_, table_offset = self.get_table_offset('DT_STRTAB')
|
||||
if table_offset is not None:
|
||||
self._stringtable = _DynamicStringTable(self._stream, table_offset)
|
||||
return self._stringtable
|
||||
|
||||
# That didn't work for some reason. Let's use the section header
|
||||
# even though this ELF is super weird.
|
||||
self._stringtable = self.elffile.get_section_by_name('.dynstr')
|
||||
return self._stringtable
|
||||
|
||||
def _iter_tags(self, type=None):
|
||||
""" Yield all raw tags (limit to |type| if specified)
|
||||
"""
|
||||
if self._empty:
|
||||
return
|
||||
for n in itertools.count():
|
||||
tag = self._get_tag(n)
|
||||
if type is None or tag['d_tag'] == type:
|
||||
yield tag
|
||||
if tag['d_tag'] == 'DT_NULL':
|
||||
break
|
||||
|
||||
def iter_tags(self, type=None):
|
||||
""" Yield all tags (limit to |type| if specified)
|
||||
"""
|
||||
for tag in self._iter_tags(type=type):
|
||||
yield DynamicTag(tag, self._get_stringtable())
|
||||
|
||||
def _get_tag(self, n):
|
||||
""" Get the raw tag at index #n from the file
|
||||
"""
|
||||
if self._num_tags != -1 and n >= self._num_tags:
|
||||
raise IndexError(n)
|
||||
offset = self._offset + n * self._tagsize
|
||||
return struct_parse(
|
||||
self.elfstructs.Elf_Dyn,
|
||||
self._stream,
|
||||
stream_pos=offset)
|
||||
|
||||
def get_tag(self, n):
|
||||
""" Get the tag at index #n from the file (DynamicTag object)
|
||||
"""
|
||||
return DynamicTag(self._get_tag(n), self._get_stringtable())
|
||||
|
||||
def num_tags(self):
|
||||
""" Number of dynamic tags in the file, including the DT_NULL tag
|
||||
"""
|
||||
if self._num_tags != -1:
|
||||
return self._num_tags
|
||||
|
||||
for n in itertools.count():
|
||||
tag = self.get_tag(n)
|
||||
if tag.entry.d_tag == 'DT_NULL':
|
||||
self._num_tags = n + 1
|
||||
return self._num_tags
|
||||
|
||||
def get_relocation_tables(self):
|
||||
""" Load all available relocation tables from DYNAMIC tags.
|
||||
|
||||
Returns a dictionary mapping found table types (REL, RELA,
|
||||
JMPREL) to RelocationTable objects.
|
||||
"""
|
||||
|
||||
result = {}
|
||||
|
||||
if list(self.iter_tags('DT_REL')):
|
||||
result['REL'] = RelocationTable(self.elffile,
|
||||
self.get_table_offset('DT_REL')[1],
|
||||
next(self.iter_tags('DT_RELSZ'))['d_val'], False)
|
||||
|
||||
relentsz = next(self.iter_tags('DT_RELENT'))['d_val']
|
||||
elf_assert(result['REL'].entry_size == relentsz,
|
||||
'Expected DT_RELENT to be %s' % relentsz)
|
||||
|
||||
if list(self.iter_tags('DT_RELA')):
|
||||
result['RELA'] = RelocationTable(self.elffile,
|
||||
self.get_table_offset('DT_RELA')[1],
|
||||
next(self.iter_tags('DT_RELASZ'))['d_val'], True)
|
||||
|
||||
relentsz = next(self.iter_tags('DT_RELAENT'))['d_val']
|
||||
elf_assert(result['RELA'].entry_size == relentsz,
|
||||
'Expected DT_RELAENT to be %s' % relentsz)
|
||||
|
||||
if list(self.iter_tags('DT_JMPREL')):
|
||||
result['JMPREL'] = RelocationTable(self.elffile,
|
||||
self.get_table_offset('DT_JMPREL')[1],
|
||||
next(self.iter_tags('DT_PLTRELSZ'))['d_val'],
|
||||
next(self.iter_tags('DT_PLTREL'))['d_val'] == ENUM_D_TAG['DT_RELA'])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class DynamicSection(Section, Dynamic):
|
||||
""" ELF dynamic table section. Knows how to process the list of tags.
|
||||
"""
|
||||
def __init__(self, header, name, elffile):
|
||||
Section.__init__(self, header, name, elffile)
|
||||
stringtable = elffile.get_section(header['sh_link'])
|
||||
Dynamic.__init__(self, self.stream, self.elffile, stringtable,
|
||||
self['sh_offset'], self['sh_type'] == 'SHT_NOBITS')
|
||||
|
||||
|
||||
class DynamicSegment(Segment, Dynamic):
|
||||
""" ELF dynamic table segment. Knows how to process the list of tags.
|
||||
"""
|
||||
def __init__(self, header, stream, elffile):
|
||||
# The string table section to be used to resolve string names in
|
||||
# the dynamic tag array is the one pointed at by the sh_link field
|
||||
# of the dynamic section header.
|
||||
# So we must look for the dynamic section contained in the dynamic
|
||||
# segment, we do so by searching for the dynamic section whose content
|
||||
# is located at the same offset as the dynamic segment
|
||||
stringtable = None
|
||||
for section in elffile.iter_sections():
|
||||
if (isinstance(section, DynamicSection) and
|
||||
section['sh_offset'] == header['p_offset']):
|
||||
stringtable = elffile.get_section(section['sh_link'])
|
||||
break
|
||||
Segment.__init__(self, header, stream)
|
||||
Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'],
|
||||
self['p_filesz'] == 0)
|
||||
self._symbol_size = self.elfstructs.Elf_Sym.sizeof()
|
||||
self._num_symbols = None
|
||||
self._symbol_name_map = None
|
||||
|
||||
def num_symbols(self):
|
||||
""" Number of symbols in the table recovered from DT_SYMTAB
|
||||
"""
|
||||
if self._num_symbols is not None:
|
||||
return self._num_symbols
|
||||
|
||||
# Check if a DT_GNU_HASH tag exists and recover the number of symbols
|
||||
# from the corresponding hash table
|
||||
_, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH')
|
||||
if gnu_hash_offset is not None:
|
||||
hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self)
|
||||
self._num_symbols = hash_section.get_number_of_symbols()
|
||||
|
||||
# If DT_GNU_HASH did not exist, maybe we can use DT_HASH
|
||||
if self._num_symbols is None:
|
||||
_, hash_offset = self.get_table_offset('DT_HASH')
|
||||
if hash_offset is not None:
|
||||
# Get the hash table from the DT_HASH offset
|
||||
hash_section = ELFHashTable(self.elffile, hash_offset, self)
|
||||
self._num_symbols = hash_section.get_number_of_symbols()
|
||||
|
||||
if self._num_symbols is None:
|
||||
# Find closest higher pointer than tab_ptr. We'll use that to mark
|
||||
# the end of the symbol table.
|
||||
tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
|
||||
if tab_ptr is None or tab_offset is None:
|
||||
raise ELFError('Segment does not contain DT_SYMTAB.')
|
||||
nearest_ptr = None
|
||||
for tag in self.iter_tags():
|
||||
tag_ptr = tag['d_ptr']
|
||||
if tag['d_tag'] == 'DT_SYMENT':
|
||||
if self._symbol_size != tag['d_val']:
|
||||
# DT_SYMENT is the size of one symbol entry. It must be
|
||||
# the same as returned by Elf_Sym.sizeof.
|
||||
raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' %
|
||||
(tag['d_val'], self._symbol_size))
|
||||
if (tag_ptr > tab_ptr and
|
||||
(nearest_ptr is None or nearest_ptr > tag_ptr)):
|
||||
nearest_ptr = tag_ptr
|
||||
|
||||
if nearest_ptr is None:
|
||||
# Use the end of segment that contains DT_SYMTAB.
|
||||
for segment in self.elffile.iter_segments():
|
||||
if (segment['p_vaddr'] <= tab_ptr and
|
||||
tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])):
|
||||
nearest_ptr = segment['p_vaddr'] + segment['p_filesz']
|
||||
|
||||
end_ptr = nearest_ptr
|
||||
self._num_symbols = (end_ptr - tab_ptr) // self._symbol_size
|
||||
|
||||
if self._num_symbols is None:
|
||||
raise ELFError('Cannot determine the end of DT_SYMTAB.')
|
||||
|
||||
return self._num_symbols
|
||||
|
||||
def get_symbol(self, index):
|
||||
""" Get the symbol at index #index from the table (Symbol object)
|
||||
"""
|
||||
tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
|
||||
if tab_ptr is None or tab_offset is None:
|
||||
raise ELFError('Segment does not contain DT_SYMTAB.')
|
||||
|
||||
symbol = struct_parse(
|
||||
self.elfstructs.Elf_Sym,
|
||||
self._stream,
|
||||
stream_pos=tab_offset + index * self._symbol_size)
|
||||
|
||||
string_table = self._get_stringtable()
|
||||
symbol_name = string_table.get_string(symbol["st_name"])
|
||||
|
||||
return Symbol(symbol, symbol_name)
|
||||
|
||||
def get_symbol_by_name(self, name):
|
||||
""" Get a symbol(s) by name. Return None if no symbol by the given name
|
||||
exists.
|
||||
"""
|
||||
# The first time this method is called, construct a name to number
|
||||
# mapping
|
||||
#
|
||||
if self._symbol_name_map is None:
|
||||
self._symbol_name_map = defaultdict(list)
|
||||
for i, sym in enumerate(self.iter_symbols()):
|
||||
self._symbol_name_map[sym.name].append(i)
|
||||
symnums = self._symbol_name_map.get(name)
|
||||
return [self.get_symbol(i) for i in symnums] if symnums else None
|
||||
|
||||
def iter_symbols(self):
|
||||
""" Yield all symbols in this dynamic segment. The symbols are usually
|
||||
the same as returned by SymbolTableSection.iter_symbols. However,
|
||||
in stripped binaries, SymbolTableSection might have been removed.
|
||||
This method reads from the mandatory dynamic tag DT_SYMTAB.
|
||||
"""
|
||||
for i in range(self.num_symbols()):
|
||||
yield(self.get_symbol(i))
|
||||
@@ -1,757 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/elffile.py
|
||||
#
|
||||
# ELFFile - main class for accessing ELF files
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
import io
|
||||
import struct
|
||||
import zlib
|
||||
|
||||
try:
|
||||
import resource
|
||||
PAGESIZE = resource.getpagesize()
|
||||
except ImportError:
|
||||
try:
|
||||
# Windows system
|
||||
import mmap
|
||||
PAGESIZE = mmap.PAGESIZE
|
||||
except ImportError:
|
||||
# Jython
|
||||
PAGESIZE = 4096
|
||||
|
||||
from ..common.py3compat import BytesIO
|
||||
from ..common.exceptions import ELFError
|
||||
from ..common.utils import struct_parse, elf_assert
|
||||
from .structs import ELFStructs
|
||||
from .sections import (
|
||||
Section, StringTableSection, SymbolTableSection,
|
||||
SymbolTableIndexSection, SUNWSyminfoTableSection, NullSection,
|
||||
NoteSection, StabSection, ARMAttributesSection)
|
||||
from .dynamic import DynamicSection, DynamicSegment
|
||||
from .relocation import RelocationSection, RelocationHandler
|
||||
from .gnuversions import (
|
||||
GNUVerNeedSection, GNUVerDefSection,
|
||||
GNUVerSymSection)
|
||||
from .segments import Segment, InterpSegment, NoteSegment
|
||||
from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
|
||||
from ..ehabi.ehabiinfo import EHABIInfo
|
||||
from .hash import ELFHashSection, GNUHashSection
|
||||
from .constants import SHN_INDICES
|
||||
|
||||
class ELFFile(object):
|
||||
""" Creation: the constructor accepts a stream (file-like object) with the
|
||||
contents of an ELF file.
|
||||
|
||||
Accessible attributes:
|
||||
|
||||
stream:
|
||||
The stream holding the data of the file - must be a binary
|
||||
stream (bytes, not string).
|
||||
|
||||
elfclass:
|
||||
32 or 64 - specifies the word size of the target machine
|
||||
|
||||
little_endian:
|
||||
boolean - specifies the target machine's endianness
|
||||
|
||||
elftype:
|
||||
string or int, either known value of E_TYPE enum defining ELF
|
||||
type (e.g. executable, dynamic library or core dump) or integral
|
||||
unparsed value
|
||||
|
||||
header:
|
||||
the complete ELF file header
|
||||
|
||||
e_ident_raw:
|
||||
the raw e_ident field of the header
|
||||
"""
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
self._identify_file()
|
||||
self.structs = ELFStructs(
|
||||
little_endian=self.little_endian,
|
||||
elfclass=self.elfclass)
|
||||
|
||||
self.structs.create_basic_structs()
|
||||
self.header = self._parse_elf_header()
|
||||
self.structs.create_advanced_structs(
|
||||
self['e_type'],
|
||||
self['e_machine'],
|
||||
self['e_ident']['EI_OSABI'])
|
||||
self.stream.seek(0)
|
||||
self.e_ident_raw = self.stream.read(16)
|
||||
|
||||
self._section_header_stringtable = \
|
||||
self._get_section_header_stringtable()
|
||||
self._section_name_map = None
|
||||
|
||||
def num_sections(self):
|
||||
""" Number of sections in the file
|
||||
"""
|
||||
if self['e_shoff'] == 0:
|
||||
return 0
|
||||
# From the ELF ABI documentation at
|
||||
# https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html:
|
||||
# "e_shnum normally tells how many entries the section header table
|
||||
# contains. [...] If the number of sections is greater than or equal to
|
||||
# SHN_LORESERVE (0xff00), e_shnum has the value SHN_UNDEF (0) and the
|
||||
# actual number of section header table entries is contained in the
|
||||
# sh_size field of the section header at index 0 (otherwise, the sh_size
|
||||
# member of the initial entry contains 0)."
|
||||
if self['e_shnum'] == 0:
|
||||
return self._get_section_header(0)['sh_size']
|
||||
return self['e_shnum']
|
||||
|
||||
def get_section(self, n):
|
||||
""" Get the section at index #n from the file (Section object or a
|
||||
subclass)
|
||||
"""
|
||||
section_header = self._get_section_header(n)
|
||||
return self._make_section(section_header)
|
||||
|
||||
def get_section_by_name(self, name):
|
||||
""" Get a section from the file, by name. Return None if no such
|
||||
section exists.
|
||||
"""
|
||||
# The first time this method is called, construct a name to number
|
||||
# mapping
|
||||
#
|
||||
if self._section_name_map is None:
|
||||
self._make_section_name_map()
|
||||
secnum = self._section_name_map.get(name, None)
|
||||
return None if secnum is None else self.get_section(secnum)
|
||||
|
||||
def get_section_index(self, section_name):
|
||||
""" Gets the index of the section by name. Return None if no such
|
||||
section name exists.
|
||||
"""
|
||||
# The first time this method is called, construct a name to number
|
||||
# mapping
|
||||
#
|
||||
if self._section_name_map is None:
|
||||
self._make_section_name_map()
|
||||
return self._section_name_map.get(section_name, None)
|
||||
|
||||
def iter_sections(self, type=None):
|
||||
""" Yield all the sections in the file. If the optional |type|
|
||||
parameter is passed, this method will only yield sections of the
|
||||
given type. The parameter value must be a string containing the
|
||||
name of the type as defined in the ELF specification, e.g.
|
||||
'SHT_SYMTAB'.
|
||||
"""
|
||||
for i in range(self.num_sections()):
|
||||
section = self.get_section(i)
|
||||
if type is None or section['sh_type'] == type:
|
||||
yield section
|
||||
|
||||
def num_segments(self):
|
||||
""" Number of segments in the file
|
||||
"""
|
||||
# From: https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI
|
||||
# Section: 4.1.2 Number of Program Headers
|
||||
# If the number of program headers is greater than or equal to
|
||||
# PN_XNUM (0xffff), this member has the value PN_XNUM
|
||||
# (0xffff). The actual number of program header table entries
|
||||
# is contained in the sh_info field of the section header at
|
||||
# index 0.
|
||||
if self['e_phnum'] < 0xffff:
|
||||
return self['e_phnum']
|
||||
else:
|
||||
return self.get_section(0)['sh_info']
|
||||
|
||||
def get_segment(self, n):
|
||||
""" Get the segment at index #n from the file (Segment object)
|
||||
"""
|
||||
segment_header = self._get_segment_header(n)
|
||||
return self._make_segment(segment_header)
|
||||
|
||||
def iter_segments(self, type=None):
|
||||
""" Yield all the segments in the file. If the optional |type|
|
||||
parameter is passed, this method will only yield segments of the
|
||||
given type. The parameter value must be a string containing the
|
||||
name of the type as defined in the ELF specification, e.g.
|
||||
'PT_LOAD'.
|
||||
"""
|
||||
for i in range(self.num_segments()):
|
||||
segment = self.get_segment(i)
|
||||
if type is None or segment['p_type'] == type:
|
||||
yield segment
|
||||
|
||||
def address_offsets(self, start, size=1):
|
||||
""" Yield a file offset for each ELF segment containing a memory region.
|
||||
|
||||
A memory region is defined by the range [start...start+size). The
|
||||
offset of the region is yielded.
|
||||
"""
|
||||
end = start + size
|
||||
# consider LOAD only to prevent same address being yielded twice
|
||||
for seg in self.iter_segments(type='PT_LOAD'):
|
||||
if (start >= seg['p_vaddr'] and
|
||||
end <= seg['p_vaddr'] + seg['p_filesz']):
|
||||
yield start - seg['p_vaddr'] + seg['p_offset']
|
||||
|
||||
def has_dwarf_info(self):
|
||||
""" Check whether this file appears to have debugging information.
|
||||
We assume that if it has the .debug_info or .zdebug_info section, it
|
||||
has all the other required sections as well.
|
||||
"""
|
||||
return bool(self.get_section_by_name('.debug_info') or
|
||||
self.get_section_by_name('.zdebug_info') or
|
||||
self.get_section_by_name('.eh_frame'))
|
||||
|
||||
def get_dwarf_info(self, relocate_dwarf_sections=True):
|
||||
""" Return a DWARFInfo object representing the debugging information in
|
||||
this file.
|
||||
|
||||
If relocate_dwarf_sections is True, relocations for DWARF sections
|
||||
are looked up and applied.
|
||||
"""
|
||||
# Expect that has_dwarf_info was called, so at least .debug_info is
|
||||
# present.
|
||||
# Sections that aren't found will be passed as None to DWARFInfo.
|
||||
|
||||
section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev',
|
||||
'.debug_str', '.debug_line', '.debug_frame',
|
||||
'.debug_loc', '.debug_ranges', '.debug_pubtypes',
|
||||
'.debug_pubnames', '.debug_addr', '.debug_str_offsets')
|
||||
|
||||
compressed = bool(self.get_section_by_name('.zdebug_info'))
|
||||
if compressed:
|
||||
section_names = tuple(map(lambda x: '.z' + x[1:], section_names))
|
||||
|
||||
# As it is loaded in the process image, .eh_frame cannot be compressed
|
||||
section_names += ('.eh_frame', )
|
||||
|
||||
(debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name,
|
||||
debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
|
||||
debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
|
||||
debug_pubnames_name, debug_addr_name, debug_str_offsets_name,
|
||||
eh_frame_sec_name) = section_names
|
||||
|
||||
debug_sections = {}
|
||||
for secname in section_names:
|
||||
section = self.get_section_by_name(secname)
|
||||
if section is None:
|
||||
debug_sections[secname] = None
|
||||
else:
|
||||
dwarf_section = self._read_dwarf_section(
|
||||
section,
|
||||
relocate_dwarf_sections)
|
||||
if compressed and secname.startswith('.z'):
|
||||
dwarf_section = self._decompress_dwarf_section(dwarf_section)
|
||||
debug_sections[secname] = dwarf_section
|
||||
|
||||
return DWARFInfo(
|
||||
config=DwarfConfig(
|
||||
little_endian=self.little_endian,
|
||||
default_address_size=self.elfclass // 8,
|
||||
machine_arch=self.get_machine_arch()),
|
||||
debug_info_sec=debug_sections[debug_info_sec_name],
|
||||
debug_aranges_sec=debug_sections[debug_aranges_sec_name],
|
||||
debug_abbrev_sec=debug_sections[debug_abbrev_sec_name],
|
||||
debug_frame_sec=debug_sections[debug_frame_sec_name],
|
||||
eh_frame_sec=debug_sections[eh_frame_sec_name],
|
||||
debug_str_sec=debug_sections[debug_str_sec_name],
|
||||
debug_loc_sec=debug_sections[debug_loc_sec_name],
|
||||
debug_ranges_sec=debug_sections[debug_ranges_sec_name],
|
||||
debug_line_sec=debug_sections[debug_line_sec_name],
|
||||
debug_pubtypes_sec=debug_sections[debug_pubtypes_name],
|
||||
debug_pubnames_sec=debug_sections[debug_pubnames_name],
|
||||
debug_addr_sec=debug_sections[debug_addr_name],
|
||||
debug_str_offsets_sec=debug_sections[debug_str_offsets_name],
|
||||
)
|
||||
|
||||
def has_ehabi_info(self):
|
||||
""" Check whether this file appears to have arm exception handler index table.
|
||||
"""
|
||||
return any(self.iter_sections(type='SHT_ARM_EXIDX'))
|
||||
|
||||
def get_ehabi_infos(self):
|
||||
""" Generally, shared library and executable contain 1 .ARM.exidx section.
|
||||
Object file contains many .ARM.exidx sections.
|
||||
So we must traverse every section and filter sections whose type is SHT_ARM_EXIDX.
|
||||
"""
|
||||
_ret = []
|
||||
if self['e_type'] == 'ET_REL':
|
||||
# TODO: support relocatable file
|
||||
assert False, "Current version of pyelftools doesn't support relocatable file."
|
||||
for section in self.iter_sections(type='SHT_ARM_EXIDX'):
|
||||
_ret.append(EHABIInfo(section, self.little_endian))
|
||||
return _ret if len(_ret) > 0 else None
|
||||
|
||||
def get_machine_arch(self):
|
||||
""" Return the machine architecture, as detected from the ELF header.
|
||||
"""
|
||||
architectures = {
|
||||
'EM_M32' : 'AT&T WE 32100',
|
||||
'EM_SPARC' : 'SPARC',
|
||||
'EM_386' : 'x86',
|
||||
'EM_68K' : 'Motorola 68000',
|
||||
'EM_88K' : 'Motorola 88000',
|
||||
'EM_IAMCU' : 'Intel MCU',
|
||||
'EM_860' : 'Intel 80860',
|
||||
'EM_MIPS' : 'MIPS',
|
||||
'EM_S370' : 'IBM System/370',
|
||||
'EM_MIPS_RS3_LE' : 'MIPS RS3000 Little-endian',
|
||||
'EM_PARISC' : 'Hewlett-Packard PA-RISC',
|
||||
'EM_VPP500' : 'Fujitsu VPP500',
|
||||
'EM_SPARC32PLUS' : 'Enhanced SPARC',
|
||||
'EM_960' : 'Intel 80960',
|
||||
'EM_PPC' : 'PowerPC',
|
||||
'EM_PPC64' : '64-bit PowerPC',
|
||||
'EM_S390' : 'IBM System/390',
|
||||
'EM_SPU' : 'IBM SPU/SPC',
|
||||
'EM_V800' : 'NEC V800',
|
||||
'EM_FR20' : 'Fujitsu FR20',
|
||||
'EM_RH32' : 'TRW RH-32',
|
||||
'EM_RCE' : 'Motorola RCE',
|
||||
'EM_ARM' : 'ARM',
|
||||
'EM_ALPHA' : 'Digital Alpha',
|
||||
'EM_SH' : 'Hitachi SH',
|
||||
'EM_SPARCV9' : 'SPARC Version 9',
|
||||
'EM_TRICORE' : 'Siemens TriCore embedded processor',
|
||||
'EM_ARC' : 'Argonaut RISC Core, Argonaut Technologies Inc.',
|
||||
'EM_H8_300' : 'Hitachi H8/300',
|
||||
'EM_H8_300H' : 'Hitachi H8/300H',
|
||||
'EM_H8S' : 'Hitachi H8S',
|
||||
'EM_H8_500' : 'Hitachi H8/500',
|
||||
'EM_IA_64' : 'Intel IA-64',
|
||||
'EM_MIPS_X' : 'MIPS-X',
|
||||
'EM_COLDFIRE' : 'Motorola ColdFire',
|
||||
'EM_68HC12' : 'Motorola M68HC12',
|
||||
'EM_MMA' : 'Fujitsu MMA',
|
||||
'EM_PCP' : 'Siemens PCP',
|
||||
'EM_NCPU' : 'Sony nCPU',
|
||||
'EM_NDR1' : 'Denso NDR1',
|
||||
'EM_STARCORE' : 'Motorola Star*Core',
|
||||
'EM_ME16' : 'Toyota ME16',
|
||||
'EM_ST100' : 'STMicroelectronics ST100',
|
||||
'EM_TINYJ' : 'Advanced Logic TinyJ',
|
||||
'EM_X86_64' : 'x64',
|
||||
'EM_PDSP' : 'Sony DSP',
|
||||
'EM_PDP10' : 'Digital Equipment PDP-10',
|
||||
'EM_PDP11' : 'Digital Equipment PDP-11',
|
||||
'EM_FX66' : 'Siemens FX66',
|
||||
'EM_ST9PLUS' : 'STMicroelectronics ST9+ 8/16 bit',
|
||||
'EM_ST7' : 'STMicroelectronics ST7 8-bit',
|
||||
'EM_68HC16' : 'Motorola MC68HC16',
|
||||
'EM_68HC11' : 'Motorola MC68HC11',
|
||||
'EM_68HC08' : 'Motorola MC68HC08',
|
||||
'EM_68HC05' : 'Motorola MC68HC05',
|
||||
'EM_SVX' : 'Silicon Graphics SVx',
|
||||
'EM_ST19' : 'STMicroelectronics ST19 8-bit',
|
||||
'EM_VAX' : 'Digital VAX',
|
||||
'EM_CRIS' : 'Axis Communications 32-bit',
|
||||
'EM_JAVELIN' : 'Infineon Technologies 32-bit',
|
||||
'EM_FIREPATH' : 'Element 14 64-bit DSP',
|
||||
'EM_ZSP' : 'LSI Logic 16-bit DSP',
|
||||
'EM_MMIX' : 'Donald Knuth\'s educational 64-bit',
|
||||
'EM_HUANY' : 'Harvard University machine-independent object files',
|
||||
'EM_PRISM' : 'SiTera Prism',
|
||||
'EM_AVR' : 'Atmel AVR 8-bit',
|
||||
'EM_FR30' : 'Fujitsu FR30',
|
||||
'EM_D10V' : 'Mitsubishi D10V',
|
||||
'EM_D30V' : 'Mitsubishi D30V',
|
||||
'EM_V850' : 'NEC v850',
|
||||
'EM_M32R' : 'Mitsubishi M32R',
|
||||
'EM_MN10300' : 'Matsushita MN10300',
|
||||
'EM_MN10200' : 'Matsushita MN10200',
|
||||
'EM_PJ' : 'picoJava',
|
||||
'EM_OPENRISC' : 'OpenRISC 32-bit',
|
||||
'EM_ARC_COMPACT' : 'ARC International ARCompact',
|
||||
'EM_XTENSA' : 'Tensilica Xtensa',
|
||||
'EM_VIDEOCORE' : 'Alphamosaic VideoCore',
|
||||
'EM_TMM_GPP' : 'Thompson Multimedia',
|
||||
'EM_NS32K' : 'National Semiconductor 32000 series',
|
||||
'EM_TPC' : 'Tenor Network TPC',
|
||||
'EM_SNP1K' : 'Trebia SNP 1000',
|
||||
'EM_ST200' : 'STMicroelectronics ST200',
|
||||
'EM_IP2K' : 'Ubicom IP2xxx',
|
||||
'EM_MAX' : 'MAX',
|
||||
'EM_CR' : 'National Semiconductor CompactRISC',
|
||||
'EM_F2MC16' : 'Fujitsu F2MC16',
|
||||
'EM_MSP430' : 'Texas Instruments msp430',
|
||||
'EM_BLACKFIN' : 'Analog Devices Blackfin',
|
||||
'EM_SE_C33' : 'Seiko Epson S1C33',
|
||||
'EM_SEP' : 'Sharp',
|
||||
'EM_ARCA' : 'Arca RISC',
|
||||
'EM_UNICORE' : 'PKU-Unity MPRC',
|
||||
'EM_EXCESS' : 'eXcess',
|
||||
'EM_DXP' : 'Icera Semiconductor Deep Execution Processor',
|
||||
'EM_ALTERA_NIOS2' : 'Altera Nios II',
|
||||
'EM_CRX' : 'National Semiconductor CompactRISC CRX',
|
||||
'EM_XGATE' : 'Motorola XGATE',
|
||||
'EM_C166' : 'Infineon C16x/XC16x',
|
||||
'EM_M16C' : 'Renesas M16C',
|
||||
'EM_DSPIC30F' : 'Microchip Technology dsPIC30F',
|
||||
'EM_CE' : 'Freescale Communication Engine RISC core',
|
||||
'EM_M32C' : 'Renesas M32C',
|
||||
'EM_TSK3000' : 'Altium TSK3000',
|
||||
'EM_RS08' : 'Freescale RS08',
|
||||
'EM_SHARC' : 'Analog Devices SHARC',
|
||||
'EM_ECOG2' : 'Cyan Technology eCOG2',
|
||||
'EM_SCORE7' : 'Sunplus S+core7 RISC',
|
||||
'EM_DSP24' : 'New Japan Radio (NJR) 24-bit DSP',
|
||||
'EM_VIDEOCORE3' : 'Broadcom VideoCore III',
|
||||
'EM_LATTICEMICO32' : 'Lattice FPGA RISC',
|
||||
'EM_SE_C17' : 'Seiko Epson C17',
|
||||
'EM_TI_C6000' : 'TI TMS320C6000',
|
||||
'EM_TI_C2000' : 'TI TMS320C2000',
|
||||
'EM_TI_C5500' : 'TI TMS320C55x',
|
||||
'EM_TI_ARP32' : 'TI Application Specific RISC, 32bit',
|
||||
'EM_TI_PRU' : 'TI Programmable Realtime Unit',
|
||||
'EM_MMDSP_PLUS' : 'STMicroelectronics 64bit VLIW',
|
||||
'EM_CYPRESS_M8C' : 'Cypress M8C',
|
||||
'EM_R32C' : 'Renesas R32C',
|
||||
'EM_TRIMEDIA' : 'NXP Semiconductors TriMedia',
|
||||
'EM_QDSP6' : 'QUALCOMM DSP6',
|
||||
'EM_8051' : 'Intel 8051',
|
||||
'EM_STXP7X' : 'STMicroelectronics STxP7x',
|
||||
'EM_NDS32' : 'Andes Technology RISC',
|
||||
'EM_ECOG1' : 'Cyan Technology eCOG1X',
|
||||
'EM_ECOG1X' : 'Cyan Technology eCOG1X',
|
||||
'EM_MAXQ30' : 'Dallas Semiconductor MAXQ30',
|
||||
'EM_XIMO16' : 'New Japan Radio (NJR) 16-bit',
|
||||
'EM_MANIK' : 'M2000 Reconfigurable RISC',
|
||||
'EM_CRAYNV2' : 'Cray Inc. NV2',
|
||||
'EM_RX' : 'Renesas RX',
|
||||
'EM_METAG' : 'Imagination Technologies META',
|
||||
'EM_MCST_ELBRUS' : 'MCST Elbrus',
|
||||
'EM_ECOG16' : 'Cyan Technology eCOG16',
|
||||
'EM_CR16' : 'National Semiconductor CompactRISC CR16 16-bit',
|
||||
'EM_ETPU' : 'Freescale',
|
||||
'EM_SLE9X' : 'Infineon Technologies SLE9X',
|
||||
'EM_L10M' : 'Intel L10M',
|
||||
'EM_K10M' : 'Intel K10M',
|
||||
'EM_AARCH64' : 'AArch64',
|
||||
'EM_AVR32' : 'Atmel 32-bit',
|
||||
'EM_STM8' : 'STMicroeletronics STM8 8-bit',
|
||||
'EM_TILE64' : 'Tilera TILE64',
|
||||
'EM_TILEPRO' : 'Tilera TILEPro',
|
||||
'EM_MICROBLAZE' : 'Xilinx MicroBlaze 32-bit RISC',
|
||||
'EM_CUDA' : 'NVIDIA CUDA',
|
||||
'EM_TILEGX' : 'Tilera TILE-Gx',
|
||||
'EM_CLOUDSHIELD' : 'CloudShield',
|
||||
'EM_COREA_1ST' : 'KIPO-KAIST Core-A 1st generation',
|
||||
'EM_COREA_2ND' : 'KIPO-KAIST Core-A 2nd generation',
|
||||
'EM_ARC_COMPACT2' : 'Synopsys ARCompact V2',
|
||||
'EM_OPEN8' : 'Open8 8-bit RISC',
|
||||
'EM_RL78' : 'Renesas RL78',
|
||||
'EM_VIDEOCORE5' : 'Broadcom VideoCore V',
|
||||
'EM_78KOR' : 'Renesas 78KOR',
|
||||
'EM_56800EX' : 'Freescale 56800EX',
|
||||
'EM_BA1' : 'Beyond BA1',
|
||||
'EM_BA2' : 'Beyond BA2',
|
||||
'EM_XCORE' : 'XMOS xCORE',
|
||||
'EM_MCHP_PIC' : 'Microchip 8-bit PIC',
|
||||
'EM_INTEL205' : 'Reserved by Intel',
|
||||
'EM_INTEL206' : 'Reserved by Intel',
|
||||
'EM_INTEL207' : 'Reserved by Intel',
|
||||
'EM_INTEL208' : 'Reserved by Intel',
|
||||
'EM_INTEL209' : 'Reserved by Intel',
|
||||
'EM_KM32' : 'KM211 KM32 32-bit',
|
||||
'EM_KMX32' : 'KM211 KMX32 32-bit',
|
||||
'EM_KMX16' : 'KM211 KMX16 16-bit',
|
||||
'EM_KMX8' : 'KM211 KMX8 8-bit',
|
||||
'EM_KVARC' : 'KM211 KVARC',
|
||||
'EM_CDP' : 'Paneve CDP',
|
||||
'EM_COGE' : 'Cognitive',
|
||||
'EM_COOL' : 'Bluechip Systems CoolEngine',
|
||||
'EM_NORC' : 'Nanoradio Optimized RISC',
|
||||
'EM_CSR_KALIMBA' : 'CSR Kalimba',
|
||||
'EM_Z80' : 'Zilog Z80',
|
||||
'EM_VISIUM' : 'VISIUMcore',
|
||||
'EM_FT32' : 'FTDI Chip FT32 32-bit RISC',
|
||||
'EM_MOXIE' : 'Moxie',
|
||||
'EM_AMDGPU' : 'AMD GPU',
|
||||
'EM_RISCV' : 'RISC-V',
|
||||
'EM_BPF' : 'Linux BPF - in-kernel virtual machine',
|
||||
'EM_CSKY' : 'C-SKY',
|
||||
'EM_FRV' : 'Fujitsu FR-V'
|
||||
}
|
||||
|
||||
return architectures.get(self['e_machine'], '<unknown>')
|
||||
|
||||
def get_shstrndx(self):
|
||||
""" Find the string table section index for the section header table
|
||||
"""
|
||||
# From https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html:
|
||||
# If the section name string table section index is greater than or
|
||||
# equal to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX
|
||||
# (0xffff) and the actual index of the section name string table section
|
||||
# is contained in the sh_link field of the section header at index 0.
|
||||
if self['e_shstrndx'] != SHN_INDICES.SHN_XINDEX:
|
||||
return self['e_shstrndx']
|
||||
else:
|
||||
return self._get_section_header(0)['sh_link']
|
||||
|
||||
#-------------------------------- PRIVATE --------------------------------#
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to header entries
|
||||
"""
|
||||
return self.header[name]
|
||||
|
||||
def _identify_file(self):
|
||||
""" Verify the ELF file and identify its class and endianness.
|
||||
"""
|
||||
# Note: this code reads the stream directly, without using ELFStructs,
|
||||
# since we don't yet know its exact format. ELF was designed to be
|
||||
# read like this - its e_ident field is word-size and endian agnostic.
|
||||
self.stream.seek(0)
|
||||
magic = self.stream.read(4)
|
||||
elf_assert(magic == b'\x7fELF', 'Magic number does not match')
|
||||
|
||||
ei_class = self.stream.read(1)
|
||||
if ei_class == b'\x01':
|
||||
self.elfclass = 32
|
||||
elif ei_class == b'\x02':
|
||||
self.elfclass = 64
|
||||
else:
|
||||
raise ELFError('Invalid EI_CLASS %s' % repr(ei_class))
|
||||
|
||||
ei_data = self.stream.read(1)
|
||||
if ei_data == b'\x01':
|
||||
self.little_endian = True
|
||||
elif ei_data == b'\x02':
|
||||
self.little_endian = False
|
||||
else:
|
||||
raise ELFError('Invalid EI_DATA %s' % repr(ei_data))
|
||||
|
||||
def _section_offset(self, n):
|
||||
""" Compute the offset of section #n in the file
|
||||
"""
|
||||
return self['e_shoff'] + n * self['e_shentsize']
|
||||
|
||||
def _segment_offset(self, n):
|
||||
""" Compute the offset of segment #n in the file
|
||||
"""
|
||||
return self['e_phoff'] + n * self['e_phentsize']
|
||||
|
||||
def _make_segment(self, segment_header):
|
||||
""" Create a Segment object of the appropriate type
|
||||
"""
|
||||
segtype = segment_header['p_type']
|
||||
if segtype == 'PT_INTERP':
|
||||
return InterpSegment(segment_header, self.stream)
|
||||
elif segtype == 'PT_DYNAMIC':
|
||||
return DynamicSegment(segment_header, self.stream, self)
|
||||
elif segtype == 'PT_NOTE':
|
||||
return NoteSegment(segment_header, self.stream, self)
|
||||
else:
|
||||
return Segment(segment_header, self.stream)
|
||||
|
||||
def _get_section_header(self, n):
|
||||
""" Find the header of section #n, parse it and return the struct
|
||||
"""
|
||||
return struct_parse(
|
||||
self.structs.Elf_Shdr,
|
||||
self.stream,
|
||||
stream_pos=self._section_offset(n))
|
||||
|
||||
def _get_section_name(self, section_header):
|
||||
""" Given a section header, find this section's name in the file's
|
||||
string table
|
||||
"""
|
||||
name_offset = section_header['sh_name']
|
||||
return self._section_header_stringtable.get_string(name_offset)
|
||||
|
||||
def _make_section(self, section_header):
|
||||
""" Create a section object of the appropriate type
|
||||
"""
|
||||
name = self._get_section_name(section_header)
|
||||
sectype = section_header['sh_type']
|
||||
|
||||
if sectype == 'SHT_STRTAB':
|
||||
return StringTableSection(section_header, name, self)
|
||||
elif sectype == 'SHT_NULL':
|
||||
return NullSection(section_header, name, self)
|
||||
elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM', 'SHT_SUNW_LDYNSYM'):
|
||||
return self._make_symbol_table_section(section_header, name)
|
||||
elif sectype == 'SHT_SYMTAB_SHNDX':
|
||||
return self._make_symbol_table_index_section(section_header, name)
|
||||
elif sectype == 'SHT_SUNW_syminfo':
|
||||
return self._make_sunwsyminfo_table_section(section_header, name)
|
||||
elif sectype == 'SHT_GNU_verneed':
|
||||
return self._make_gnu_verneed_section(section_header, name)
|
||||
elif sectype == 'SHT_GNU_verdef':
|
||||
return self._make_gnu_verdef_section(section_header, name)
|
||||
elif sectype == 'SHT_GNU_versym':
|
||||
return self._make_gnu_versym_section(section_header, name)
|
||||
elif sectype in ('SHT_REL', 'SHT_RELA'):
|
||||
return RelocationSection(section_header, name, self)
|
||||
elif sectype == 'SHT_DYNAMIC':
|
||||
return DynamicSection(section_header, name, self)
|
||||
elif sectype == 'SHT_NOTE':
|
||||
return NoteSection(section_header, name, self)
|
||||
elif sectype == 'SHT_PROGBITS' and name == '.stab':
|
||||
return StabSection(section_header, name, self)
|
||||
elif sectype == 'SHT_ARM_ATTRIBUTES':
|
||||
return ARMAttributesSection(section_header, name, self)
|
||||
elif sectype == 'SHT_HASH':
|
||||
return self._make_elf_hash_section(section_header, name)
|
||||
elif sectype == 'SHT_GNU_HASH':
|
||||
return self._make_gnu_hash_section(section_header, name)
|
||||
else:
|
||||
return Section(section_header, name, self)
|
||||
|
||||
def _make_section_name_map(self):
|
||||
self._section_name_map = {}
|
||||
for i, sec in enumerate(self.iter_sections()):
|
||||
self._section_name_map[sec.name] = i
|
||||
|
||||
def _make_symbol_table_section(self, section_header, name):
|
||||
""" Create a SymbolTableSection
|
||||
"""
|
||||
linked_strtab_index = section_header['sh_link']
|
||||
strtab_section = self.get_section(linked_strtab_index)
|
||||
return SymbolTableSection(
|
||||
section_header, name,
|
||||
elffile=self,
|
||||
stringtable=strtab_section)
|
||||
|
||||
def _make_symbol_table_index_section(self, section_header, name):
|
||||
""" Create a SymbolTableIndexSection object
|
||||
"""
|
||||
linked_symtab_index = section_header['sh_link']
|
||||
return SymbolTableIndexSection(
|
||||
section_header, name, elffile=self,
|
||||
symboltable=linked_symtab_index)
|
||||
|
||||
def _make_sunwsyminfo_table_section(self, section_header, name):
|
||||
""" Create a SUNWSyminfoTableSection
|
||||
"""
|
||||
linked_strtab_index = section_header['sh_link']
|
||||
strtab_section = self.get_section(linked_strtab_index)
|
||||
return SUNWSyminfoTableSection(
|
||||
section_header, name,
|
||||
elffile=self,
|
||||
symboltable=strtab_section)
|
||||
|
||||
def _make_gnu_verneed_section(self, section_header, name):
|
||||
""" Create a GNUVerNeedSection
|
||||
"""
|
||||
linked_strtab_index = section_header['sh_link']
|
||||
strtab_section = self.get_section(linked_strtab_index)
|
||||
return GNUVerNeedSection(
|
||||
section_header, name,
|
||||
elffile=self,
|
||||
stringtable=strtab_section)
|
||||
|
||||
def _make_gnu_verdef_section(self, section_header, name):
|
||||
""" Create a GNUVerDefSection
|
||||
"""
|
||||
linked_strtab_index = section_header['sh_link']
|
||||
strtab_section = self.get_section(linked_strtab_index)
|
||||
return GNUVerDefSection(
|
||||
section_header, name,
|
||||
elffile=self,
|
||||
stringtable=strtab_section)
|
||||
|
||||
def _make_gnu_versym_section(self, section_header, name):
|
||||
""" Create a GNUVerSymSection
|
||||
"""
|
||||
linked_strtab_index = section_header['sh_link']
|
||||
strtab_section = self.get_section(linked_strtab_index)
|
||||
return GNUVerSymSection(
|
||||
section_header, name,
|
||||
elffile=self,
|
||||
symboltable=strtab_section)
|
||||
|
||||
def _make_elf_hash_section(self, section_header, name):
|
||||
linked_symtab_index = section_header['sh_link']
|
||||
symtab_section = self.get_section(linked_symtab_index)
|
||||
return ELFHashSection(
|
||||
section_header, name, self, symtab_section
|
||||
)
|
||||
|
||||
def _make_gnu_hash_section(self, section_header, name):
|
||||
linked_symtab_index = section_header['sh_link']
|
||||
symtab_section = self.get_section(linked_symtab_index)
|
||||
return GNUHashSection(
|
||||
section_header, name, self, symtab_section
|
||||
)
|
||||
|
||||
def _get_segment_header(self, n):
|
||||
""" Find the header of segment #n, parse it and return the struct
|
||||
"""
|
||||
return struct_parse(
|
||||
self.structs.Elf_Phdr,
|
||||
self.stream,
|
||||
stream_pos=self._segment_offset(n))
|
||||
|
||||
def _get_section_header_stringtable(self):
|
||||
""" Get the string table section corresponding to the section header
|
||||
table.
|
||||
"""
|
||||
stringtable_section_num = self.get_shstrndx()
|
||||
return StringTableSection(
|
||||
header=self._get_section_header(stringtable_section_num),
|
||||
name='',
|
||||
elffile=self)
|
||||
|
||||
def _parse_elf_header(self):
|
||||
""" Parses the ELF file header and assigns the result to attributes
|
||||
of this object.
|
||||
"""
|
||||
return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0)
|
||||
|
||||
def _read_dwarf_section(self, section, relocate_dwarf_sections):
|
||||
""" Read the contents of a DWARF section from the stream and return a
|
||||
DebugSectionDescriptor. Apply relocations if asked to.
|
||||
"""
|
||||
# The section data is read into a new stream, for processing
|
||||
section_stream = BytesIO()
|
||||
section_stream.write(section.data())
|
||||
|
||||
if relocate_dwarf_sections:
|
||||
reloc_handler = RelocationHandler(self)
|
||||
reloc_section = reloc_handler.find_relocations_for_section(section)
|
||||
if reloc_section is not None:
|
||||
reloc_handler.apply_section_relocations(
|
||||
section_stream, reloc_section)
|
||||
|
||||
return DebugSectionDescriptor(
|
||||
stream=section_stream,
|
||||
name=section.name,
|
||||
global_offset=section['sh_offset'],
|
||||
size=section.data_size,
|
||||
address=section['sh_addr'])
|
||||
|
||||
@staticmethod
|
||||
def _decompress_dwarf_section(section):
|
||||
""" Returns the uncompressed contents of the provided DWARF section.
|
||||
"""
|
||||
# TODO: support other compression formats from readelf.c
|
||||
assert section.size > 12, 'Unsupported compression format.'
|
||||
|
||||
section.stream.seek(0)
|
||||
# According to readelf.c the content should contain "ZLIB"
|
||||
# followed by the uncompressed section size - 8 bytes in
|
||||
# big-endian order
|
||||
compression_type = section.stream.read(4)
|
||||
assert compression_type == b'ZLIB', \
|
||||
'Invalid compression type: %r' % (compression_type)
|
||||
|
||||
uncompressed_size = struct.unpack('>Q', section.stream.read(8))[0]
|
||||
|
||||
decompressor = zlib.decompressobj()
|
||||
uncompressed_stream = BytesIO()
|
||||
while True:
|
||||
chunk = section.stream.read(PAGESIZE)
|
||||
if not chunk:
|
||||
break
|
||||
uncompressed_stream.write(decompressor.decompress(chunk))
|
||||
uncompressed_stream.write(decompressor.flush())
|
||||
|
||||
uncompressed_stream.seek(0, io.SEEK_END)
|
||||
size = uncompressed_stream.tell()
|
||||
assert uncompressed_size == size, \
|
||||
'Wrong uncompressed size: expected %r, but got %r' % (
|
||||
uncompressed_size, size,
|
||||
)
|
||||
|
||||
return section._replace(stream=uncompressed_stream, size=size)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,225 +0,0 @@
|
||||
#------------------------------------------------------------------------------
|
||||
# elftools: elf/gnuversions.py
|
||||
#
|
||||
# ELF sections
|
||||
#
|
||||
# Yann Rouillard (yann@pleiades.fr.eu.org)
|
||||
# This code is in the public domain
|
||||
#------------------------------------------------------------------------------
|
||||
from ..construct import CString
|
||||
from ..common.utils import struct_parse, elf_assert
|
||||
from .sections import Section, Symbol
|
||||
|
||||
|
||||
class Version(object):
|
||||
""" Version object - representing a version definition or dependency
|
||||
entry from a "Version Needed" or a "Version Dependency" table section.
|
||||
|
||||
This kind of entry contains a pointer to an array of auxiliary entries
|
||||
that store the information about version names or dependencies.
|
||||
These entries are not stored in this object and should be accessed
|
||||
through the appropriate method of a section object which will return
|
||||
an iterator of VersionAuxiliary objects.
|
||||
|
||||
Similarly to Section objects, allows dictionary-like access to
|
||||
verdef/verneed entry
|
||||
"""
|
||||
def __init__(self, entry, name=None):
|
||||
self.entry = entry
|
||||
self.name = name
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to entry
|
||||
"""
|
||||
return self.entry[name]
|
||||
|
||||
|
||||
class VersionAuxiliary(object):
|
||||
""" Version Auxiliary object - representing an auxiliary entry of a version
|
||||
definition or dependency entry
|
||||
|
||||
Similarly to Section objects, allows dictionary-like access to the
|
||||
verdaux/vernaux entry
|
||||
"""
|
||||
def __init__(self, entry, name):
|
||||
self.entry = entry
|
||||
self.name = name
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to entries
|
||||
"""
|
||||
return self.entry[name]
|
||||
|
||||
|
||||
class GNUVersionSection(Section):
|
||||
""" Common ancestor class for ELF SUNW|GNU Version Needed/Dependency
|
||||
sections class which contains shareable code
|
||||
"""
|
||||
|
||||
def __init__(self, header, name, elffile, stringtable,
|
||||
field_prefix, version_struct, version_auxiliaries_struct):
|
||||
super(GNUVersionSection, self).__init__(header, name, elffile)
|
||||
self.stringtable = stringtable
|
||||
self.field_prefix = field_prefix
|
||||
self.version_struct = version_struct
|
||||
self.version_auxiliaries_struct = version_auxiliaries_struct
|
||||
|
||||
def num_versions(self):
|
||||
""" Number of version entries in the section
|
||||
"""
|
||||
return self['sh_info']
|
||||
|
||||
def _field_name(self, name, auxiliary=False):
|
||||
""" Return the real field's name of version or a version auxiliary
|
||||
entry
|
||||
"""
|
||||
middle = 'a_' if auxiliary else '_'
|
||||
return self.field_prefix + middle + name
|
||||
|
||||
def _iter_version_auxiliaries(self, entry_offset, count):
|
||||
""" Yield all auxiliary entries of a version entry
|
||||
"""
|
||||
name_field = self._field_name('name', auxiliary=True)
|
||||
next_field = self._field_name('next', auxiliary=True)
|
||||
|
||||
for _ in range(count):
|
||||
entry = struct_parse(
|
||||
self.version_auxiliaries_struct,
|
||||
self.stream,
|
||||
stream_pos=entry_offset)
|
||||
|
||||
name = self.stringtable.get_string(entry[name_field])
|
||||
version_aux = VersionAuxiliary(entry, name)
|
||||
yield version_aux
|
||||
|
||||
entry_offset += entry[next_field]
|
||||
|
||||
def iter_versions(self):
|
||||
""" Yield all the version entries in the section
|
||||
Each time it returns the main version structure
|
||||
and an iterator to walk through its auxiliaries entries
|
||||
"""
|
||||
aux_field = self._field_name('aux')
|
||||
count_field = self._field_name('cnt')
|
||||
next_field = self._field_name('next')
|
||||
|
||||
entry_offset = self['sh_offset']
|
||||
for _ in range(self.num_versions()):
|
||||
entry = struct_parse(
|
||||
self.version_struct,
|
||||
self.stream,
|
||||
stream_pos=entry_offset)
|
||||
|
||||
elf_assert(entry[count_field] > 0,
|
||||
'Expected number of version auxiliary entries (%s) to be > 0'
|
||||
'for the following version entry: %s' % (
|
||||
count_field, str(entry)))
|
||||
|
||||
version = Version(entry)
|
||||
aux_entries_offset = entry_offset + entry[aux_field]
|
||||
version_auxiliaries_iter = self._iter_version_auxiliaries(
|
||||
aux_entries_offset, entry[count_field])
|
||||
|
||||
yield version, version_auxiliaries_iter
|
||||
|
||||
entry_offset += entry[next_field]
|
||||
|
||||
|
||||
class GNUVerNeedSection(GNUVersionSection):
|
||||
""" ELF SUNW or GNU Version Needed table section.
|
||||
Has an associated StringTableSection that's passed in the constructor.
|
||||
"""
|
||||
def __init__(self, header, name, elffile, stringtable):
|
||||
super(GNUVerNeedSection, self).__init__(
|
||||
header, name, elffile, stringtable, 'vn',
|
||||
elffile.structs.Elf_Verneed, elffile.structs.Elf_Vernaux)
|
||||
self._has_indexes = None
|
||||
|
||||
def has_indexes(self):
|
||||
""" Return True if at least one version definition entry has an index
|
||||
that is stored in the vna_other field.
|
||||
This information is used for symbol versioning
|
||||
"""
|
||||
if self._has_indexes is None:
|
||||
self._has_indexes = False
|
||||
for _, vernaux_iter in self.iter_versions():
|
||||
for vernaux in vernaux_iter:
|
||||
if vernaux['vna_other']:
|
||||
self._has_indexes = True
|
||||
break
|
||||
|
||||
return self._has_indexes
|
||||
|
||||
def iter_versions(self):
|
||||
for verneed, vernaux in super(GNUVerNeedSection, self).iter_versions():
|
||||
verneed.name = self.stringtable.get_string(verneed['vn_file'])
|
||||
yield verneed, vernaux
|
||||
|
||||
def get_version(self, index):
|
||||
""" Get the version information located at index #n in the table
|
||||
Return boths the verneed structure and the vernaux structure
|
||||
that contains the name of the version
|
||||
"""
|
||||
for verneed, vernaux_iter in self.iter_versions():
|
||||
for vernaux in vernaux_iter:
|
||||
if vernaux['vna_other'] == index:
|
||||
return verneed, vernaux
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class GNUVerDefSection(GNUVersionSection):
|
||||
""" ELF SUNW or GNU Version Definition table section.
|
||||
Has an associated StringTableSection that's passed in the constructor.
|
||||
"""
|
||||
def __init__(self, header, name, elffile, stringtable):
|
||||
super(GNUVerDefSection, self).__init__(
|
||||
header, name, elffile, stringtable, 'vd',
|
||||
elffile.structs.Elf_Verdef, elffile.structs.Elf_Verdaux)
|
||||
|
||||
def get_version(self, index):
|
||||
""" Get the version information located at index #n in the table
|
||||
Return boths the verdef structure and an iterator to retrieve
|
||||
both the version names and dependencies in the form of
|
||||
verdaux entries
|
||||
"""
|
||||
for verdef, verdaux_iter in self.iter_versions():
|
||||
if verdef['vd_ndx'] == index:
|
||||
return verdef, verdaux_iter
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class GNUVerSymSection(Section):
|
||||
""" ELF SUNW or GNU Versym table section.
|
||||
Has an associated SymbolTableSection that's passed in the constructor.
|
||||
"""
|
||||
def __init__(self, header, name, elffile, symboltable):
|
||||
super(GNUVerSymSection, self).__init__(header, name, elffile)
|
||||
self.symboltable = symboltable
|
||||
|
||||
def num_symbols(self):
|
||||
""" Number of symbols in the table
|
||||
"""
|
||||
return self['sh_size'] // self['sh_entsize']
|
||||
|
||||
def get_symbol(self, n):
|
||||
""" Get the symbol at index #n from the table (Symbol object)
|
||||
It begins at 1 and not 0 since the first entry is used to
|
||||
store the current version of the syminfo table
|
||||
"""
|
||||
# Grab the symbol's entry from the stream
|
||||
entry_offset = self['sh_offset'] + n * self['sh_entsize']
|
||||
entry = struct_parse(
|
||||
self.structs.Elf_Versym,
|
||||
self.stream,
|
||||
stream_pos=entry_offset)
|
||||
# Find the symbol name in the associated symbol table
|
||||
name = self.symboltable.get_symbol(n).name
|
||||
return Symbol(entry, name)
|
||||
|
||||
def iter_symbols(self):
|
||||
""" Yield all the symbols in the table
|
||||
"""
|
||||
for i in range(self.num_symbols()):
|
||||
yield self.get_symbol(i)
|
||||
@@ -1,186 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/hash.py
|
||||
#
|
||||
# ELF hash table sections
|
||||
#
|
||||
# Andreas Ziegler (andreas.ziegler@fau.de)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
import struct
|
||||
|
||||
from ..common.utils import struct_parse
|
||||
from .sections import Section
|
||||
|
||||
|
||||
class ELFHashTable(object):
|
||||
""" Representation of an ELF hash table to find symbols in the
|
||||
symbol table - useful for super-stripped binaries without section
|
||||
headers where only the start of the symbol table is known from the
|
||||
dynamic segment. The layout and contents are nicely described at
|
||||
https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/.
|
||||
|
||||
The symboltable argument needs to implement a get_symbol() method -
|
||||
in a regular ELF file, this will be the linked symbol table section
|
||||
as indicated by the sh_link attribute. For super-stripped binaries,
|
||||
one should use the DynamicSegment object as the symboltable as it
|
||||
supports symbol lookup without access to a symbol table section.
|
||||
"""
|
||||
|
||||
def __init__(self, elffile, start_offset, symboltable):
|
||||
self.elffile = elffile
|
||||
self._symboltable = symboltable
|
||||
self.params = struct_parse(self.elffile.structs.Elf_Hash,
|
||||
self.elffile.stream,
|
||||
start_offset)
|
||||
|
||||
def get_number_of_symbols(self):
|
||||
""" Get the number of symbols from the hash table parameters.
|
||||
"""
|
||||
return self.params['nchains']
|
||||
|
||||
def get_symbol(self, name):
|
||||
""" Look up a symbol from this hash table with the given name.
|
||||
"""
|
||||
if self.params['nbuckets'] == 0:
|
||||
return None
|
||||
hval = self.elf_hash(name) % self.params['nbuckets']
|
||||
symndx = self.params['buckets'][hval]
|
||||
while symndx != 0:
|
||||
sym = self._symboltable.get_symbol(symndx)
|
||||
if sym.name == name:
|
||||
return sym
|
||||
symndx = self.params['chains'][symndx]
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def elf_hash(name):
|
||||
""" Compute the hash value for a given symbol name.
|
||||
"""
|
||||
if not isinstance(name, bytes):
|
||||
name = name.encode('utf-8')
|
||||
h = 0
|
||||
x = 0
|
||||
for c in bytearray(name):
|
||||
h = (h << 4) + c
|
||||
x = h & 0xF0000000
|
||||
if x != 0:
|
||||
h ^= (x >> 24)
|
||||
h &= ~x
|
||||
return h
|
||||
|
||||
|
||||
class ELFHashSection(Section, ELFHashTable):
|
||||
""" Section representation of an ELF hash table. In regular ELF files, this
|
||||
allows us to use the common functions defined on Section objects when
|
||||
dealing with the hash table.
|
||||
"""
|
||||
def __init__(self, header, name, elffile, symboltable):
|
||||
Section.__init__(self, header, name, elffile)
|
||||
ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable)
|
||||
|
||||
|
||||
class GNUHashTable(object):
|
||||
""" Representation of a GNU hash table to find symbols in the
|
||||
symbol table - useful for super-stripped binaries without section
|
||||
headers where only the start of the symbol table is known from the
|
||||
dynamic segment. The layout and contents are nicely described at
|
||||
https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/.
|
||||
|
||||
The symboltable argument needs to implement a get_symbol() method -
|
||||
in a regular ELF file, this will be the linked symbol table section
|
||||
as indicated by the sh_link attribute. For super-stripped binaries,
|
||||
one should use the DynamicSegment object as the symboltable as it
|
||||
supports symbol lookup without access to a symbol table section.
|
||||
"""
|
||||
def __init__(self, elffile, start_offset, symboltable):
|
||||
self.elffile = elffile
|
||||
self._symboltable = symboltable
|
||||
self.params = struct_parse(self.elffile.structs.Gnu_Hash,
|
||||
self.elffile.stream,
|
||||
start_offset)
|
||||
# Element sizes in the hash table
|
||||
self._wordsize = self.elffile.structs.Elf_word('').sizeof()
|
||||
self._xwordsize = self.elffile.structs.Elf_xword('').sizeof()
|
||||
self._chain_pos = start_offset + 4 * self._wordsize + \
|
||||
self.params['bloom_size'] * self._xwordsize + \
|
||||
self.params['nbuckets'] * self._wordsize
|
||||
|
||||
def get_number_of_symbols(self):
|
||||
""" Get the number of symbols in the hash table by finding the bucket
|
||||
with the highest symbol index and walking to the end of its chain.
|
||||
"""
|
||||
# Find highest index in buckets array
|
||||
max_idx = max(self.params['buckets'])
|
||||
if max_idx < self.params['symoffset']:
|
||||
return self.params['symoffset']
|
||||
|
||||
# Position the stream at the start of the corresponding chain
|
||||
max_chain_pos = self._chain_pos + \
|
||||
(max_idx - self.params['symoffset']) * self._wordsize
|
||||
self.elffile.stream.seek(max_chain_pos)
|
||||
hash_format = '<I' if self.elffile.little_endian else '>I'
|
||||
|
||||
# Walk the chain to its end (lowest bit is set)
|
||||
while True:
|
||||
cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
|
||||
if cur_hash & 1:
|
||||
return max_idx + 1
|
||||
|
||||
max_idx += 1
|
||||
|
||||
def _matches_bloom(self, H1):
|
||||
""" Helper function to check if the given hash could be in the hash
|
||||
table by testing it against the bloom filter.
|
||||
"""
|
||||
arch_bits = self.elffile.elfclass
|
||||
H2 = H1 >> self.params['bloom_shift']
|
||||
word_idx = int(H1 / arch_bits) % self.params['bloom_size']
|
||||
BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits))
|
||||
return (self.params['bloom'][word_idx] & BITMASK) == BITMASK
|
||||
|
||||
def get_symbol(self, name):
|
||||
""" Look up a symbol from this hash table with the given name.
|
||||
"""
|
||||
namehash = self.gnu_hash(name)
|
||||
if not self._matches_bloom(namehash):
|
||||
return None
|
||||
|
||||
symidx = self.params['buckets'][namehash % self.params['nbuckets']]
|
||||
if symidx < self.params['symoffset']:
|
||||
return None
|
||||
|
||||
self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize)
|
||||
hash_format = '<I' if self.elffile.little_endian else '>I'
|
||||
while True:
|
||||
cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
|
||||
if cur_hash | 1 == namehash | 1:
|
||||
symbol = self._symboltable.get_symbol(symidx)
|
||||
if name == symbol.name:
|
||||
return symbol
|
||||
|
||||
if cur_hash & 1:
|
||||
break
|
||||
symidx += 1
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def gnu_hash(key):
|
||||
""" Compute the GNU-style hash value for a given symbol name.
|
||||
"""
|
||||
if not isinstance(key, bytes):
|
||||
key = key.encode('utf-8')
|
||||
h = 5381
|
||||
for c in bytearray(key):
|
||||
h = h * 33 + c
|
||||
return h & 0xFFFFFFFF
|
||||
|
||||
|
||||
class GNUHashSection(Section, GNUHashTable):
|
||||
""" Section representation of a GNU hash table. In regular ELF files, this
|
||||
allows us to use the common functions defined on Section objects when
|
||||
dealing with the hash table.
|
||||
"""
|
||||
def __init__(self, header, name, elffile, symboltable):
|
||||
Section.__init__(self, header, name, elffile)
|
||||
GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable)
|
||||
@@ -1,62 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/notes.py
|
||||
#
|
||||
# ELF notes
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..common.py3compat import bytes2hex, bytes2str
|
||||
from ..common.utils import struct_parse, roundup
|
||||
from ..construct import CString
|
||||
|
||||
|
||||
def iter_notes(elffile, offset, size):
|
||||
""" Yield all the notes in a section or segment.
|
||||
"""
|
||||
end = offset + size
|
||||
while offset < end:
|
||||
note = struct_parse(
|
||||
elffile.structs.Elf_Nhdr,
|
||||
elffile.stream,
|
||||
stream_pos=offset)
|
||||
note['n_offset'] = offset
|
||||
offset += elffile.structs.Elf_Nhdr.sizeof()
|
||||
elffile.stream.seek(offset)
|
||||
# n_namesz is 4-byte aligned.
|
||||
disk_namesz = roundup(note['n_namesz'], 2)
|
||||
note['n_name'] = bytes2str(
|
||||
CString('').parse(elffile.stream.read(disk_namesz)))
|
||||
offset += disk_namesz
|
||||
|
||||
desc_data = elffile.stream.read(note['n_descsz'])
|
||||
note['n_descdata'] = desc_data
|
||||
if note['n_type'] == 'NT_GNU_ABI_TAG':
|
||||
note['n_desc'] = struct_parse(elffile.structs.Elf_abi,
|
||||
elffile.stream,
|
||||
offset)
|
||||
elif note['n_type'] == 'NT_GNU_BUILD_ID':
|
||||
note['n_desc'] = bytes2hex(desc_data)
|
||||
elif note['n_type'] == 'NT_GNU_GOLD_VERSION':
|
||||
note['n_desc'] = bytes2str(desc_data)
|
||||
elif note['n_type'] == 'NT_PRPSINFO':
|
||||
note['n_desc'] = struct_parse(elffile.structs.Elf_Prpsinfo,
|
||||
elffile.stream,
|
||||
offset)
|
||||
elif note['n_type'] == 'NT_FILE':
|
||||
note['n_desc'] = struct_parse(elffile.structs.Elf_Nt_File,
|
||||
elffile.stream,
|
||||
offset)
|
||||
elif note['n_type'] == 'NT_GNU_PROPERTY_TYPE_0':
|
||||
off = offset
|
||||
props = []
|
||||
while off < end:
|
||||
p = struct_parse(elffile.structs.Elf_Prop, elffile.stream, off)
|
||||
off += roundup(p.pr_datasz + 8, 2 if elffile.elfclass == 32 else 3)
|
||||
props.append(p)
|
||||
note['n_desc'] = props
|
||||
else:
|
||||
note['n_desc'] = desc_data
|
||||
offset += roundup(note['n_descsz'], 2)
|
||||
note['n_size'] = offset - note['n_offset']
|
||||
yield note
|
||||
@@ -1,309 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/relocation.py
|
||||
#
|
||||
# ELF relocations
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from collections import namedtuple
|
||||
|
||||
from ..common.exceptions import ELFRelocationError
|
||||
from ..common.utils import elf_assert, struct_parse
|
||||
from .sections import Section
|
||||
from .enums import (
|
||||
ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS,
|
||||
ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
|
||||
ENUM_D_TAG)
|
||||
|
||||
|
||||
class Relocation(object):
|
||||
""" Relocation object - representing a single relocation entry. Allows
|
||||
dictionary-like access to the entry's fields.
|
||||
|
||||
Can be either a REL or RELA relocation.
|
||||
"""
|
||||
def __init__(self, entry, elffile):
|
||||
self.entry = entry
|
||||
self.elffile = elffile
|
||||
|
||||
def is_RELA(self):
|
||||
""" Is this a RELA relocation? If not, it's REL.
|
||||
"""
|
||||
return 'r_addend' in self.entry
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Dict-like access to entries
|
||||
"""
|
||||
return self.entry[name]
|
||||
|
||||
def __repr__(self):
|
||||
return '<Relocation (%s): %s>' % (
|
||||
'RELA' if self.is_RELA() else 'REL',
|
||||
self.entry)
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
|
||||
class RelocationTable(object):
|
||||
""" Shared functionality between relocation sections and relocation tables
|
||||
"""
|
||||
|
||||
def __init__(self, elffile, offset, size, is_rela):
|
||||
self._stream = elffile.stream
|
||||
self._elffile = elffile
|
||||
self._elfstructs = elffile.structs
|
||||
self._size = size
|
||||
self._offset = offset
|
||||
self._is_rela = is_rela
|
||||
|
||||
if is_rela:
|
||||
self.entry_struct = self._elfstructs.Elf_Rela
|
||||
else:
|
||||
self.entry_struct = self._elfstructs.Elf_Rel
|
||||
|
||||
self.entry_size = self.entry_struct.sizeof()
|
||||
|
||||
def is_RELA(self):
|
||||
""" Is this a RELA relocation section? If not, it's REL.
|
||||
"""
|
||||
return self._is_rela
|
||||
|
||||
def num_relocations(self):
|
||||
""" Number of relocations in the section
|
||||
"""
|
||||
return self._size // self.entry_size
|
||||
|
||||
def get_relocation(self, n):
|
||||
""" Get the relocation at index #n from the section (Relocation object)
|
||||
"""
|
||||
entry_offset = self._offset + n * self.entry_size
|
||||
entry = struct_parse(
|
||||
self.entry_struct,
|
||||
self._stream,
|
||||
stream_pos=entry_offset)
|
||||
return Relocation(entry, self._elffile)
|
||||
|
||||
def iter_relocations(self):
|
||||
""" Yield all the relocations in the section
|
||||
"""
|
||||
for i in range(self.num_relocations()):
|
||||
yield self.get_relocation(i)
|
||||
|
||||
|
||||
class RelocationSection(Section, RelocationTable):
|
||||
""" ELF relocation section. Serves as a collection of Relocation entries.
|
||||
"""
|
||||
def __init__(self, header, name, elffile):
|
||||
Section.__init__(self, header, name, elffile)
|
||||
RelocationTable.__init__(self, self.elffile,
|
||||
self['sh_offset'], self['sh_size'], header['sh_type'] == 'SHT_RELA')
|
||||
|
||||
elf_assert(header['sh_type'] in ('SHT_REL', 'SHT_RELA'),
|
||||
'Unknown relocation type section')
|
||||
elf_assert(header['sh_entsize'] == self.entry_size,
|
||||
'Expected sh_entsize of %s section to be %s' % (
|
||||
header['sh_type'], self.entry_size))
|
||||
|
||||
|
||||
class RelocationHandler(object):
|
||||
""" Handles the logic of relocations in ELF files.
|
||||
"""
|
||||
def __init__(self, elffile):
|
||||
self.elffile = elffile
|
||||
|
||||
def find_relocations_for_section(self, section):
|
||||
""" Given a section, find the relocation section for it in the ELF
|
||||
file. Return a RelocationSection object, or None if none was
|
||||
found.
|
||||
"""
|
||||
reloc_section_names = (
|
||||
'.rel' + section.name,
|
||||
'.rela' + section.name)
|
||||
# Find the relocation section aimed at this one. Currently assume
|
||||
# that either .rel or .rela section exists for this section, but
|
||||
# not both.
|
||||
for relsection in self.elffile.iter_sections():
|
||||
if ( isinstance(relsection, RelocationSection) and
|
||||
relsection.name in reloc_section_names):
|
||||
return relsection
|
||||
return None
|
||||
|
||||
def apply_section_relocations(self, stream, reloc_section):
|
||||
""" Apply all relocations in reloc_section (a RelocationSection object)
|
||||
to the given stream, that contains the data of the section that is
|
||||
being relocated. The stream is modified as a result.
|
||||
"""
|
||||
# The symbol table associated with this relocation section
|
||||
symtab = self.elffile.get_section(reloc_section['sh_link'])
|
||||
for reloc in reloc_section.iter_relocations():
|
||||
self._do_apply_relocation(stream, reloc, symtab)
|
||||
|
||||
def _do_apply_relocation(self, stream, reloc, symtab):
|
||||
# Preparations for performing the relocation: obtain the value of
|
||||
# the symbol mentioned in the relocation, as well as the relocation
|
||||
# recipe which tells us how to actually perform it.
|
||||
# All peppered with some sanity checking.
|
||||
if reloc['r_info_sym'] >= symtab.num_symbols():
|
||||
raise ELFRelocationError(
|
||||
'Invalid symbol reference in relocation: index %s' % (
|
||||
reloc['r_info_sym']))
|
||||
sym_value = symtab.get_symbol(reloc['r_info_sym'])['st_value']
|
||||
|
||||
reloc_type = reloc['r_info_type']
|
||||
recipe = None
|
||||
|
||||
if self.elffile.get_machine_arch() == 'x86':
|
||||
if reloc.is_RELA():
|
||||
raise ELFRelocationError(
|
||||
'Unexpected RELA relocation for x86: %s' % reloc)
|
||||
recipe = self._RELOCATION_RECIPES_X86.get(reloc_type, None)
|
||||
elif self.elffile.get_machine_arch() == 'x64':
|
||||
if not reloc.is_RELA():
|
||||
raise ELFRelocationError(
|
||||
'Unexpected REL relocation for x64: %s' % reloc)
|
||||
recipe = self._RELOCATION_RECIPES_X64.get(reloc_type, None)
|
||||
elif self.elffile.get_machine_arch() == 'MIPS':
|
||||
if reloc.is_RELA():
|
||||
raise ELFRelocationError(
|
||||
'Unexpected RELA relocation for MIPS: %s' % reloc)
|
||||
recipe = self._RELOCATION_RECIPES_MIPS.get(reloc_type, None)
|
||||
elif self.elffile.get_machine_arch() == 'ARM':
|
||||
if reloc.is_RELA():
|
||||
raise ELFRelocationError(
|
||||
'Unexpected RELA relocation for ARM: %s' % reloc)
|
||||
recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None)
|
||||
elif self.elffile.get_machine_arch() == 'AArch64':
|
||||
recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None)
|
||||
elif self.elffile.get_machine_arch() == '64-bit PowerPC':
|
||||
recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None)
|
||||
|
||||
if recipe is None:
|
||||
raise ELFRelocationError(
|
||||
'Unsupported relocation type: %s' % reloc_type)
|
||||
|
||||
# So now we have everything we need to actually perform the relocation.
|
||||
# Let's get to it:
|
||||
|
||||
# 0. Find out which struct we're going to be using to read this value
|
||||
# from the stream and write it back.
|
||||
if recipe.bytesize == 4:
|
||||
value_struct = self.elffile.structs.Elf_word('')
|
||||
elif recipe.bytesize == 8:
|
||||
value_struct = self.elffile.structs.Elf_word64('')
|
||||
else:
|
||||
raise ELFRelocationError('Invalid bytesize %s for relocation' %
|
||||
recipe.bytesize)
|
||||
|
||||
# 1. Read the value from the stream (with correct size and endianness)
|
||||
original_value = struct_parse(
|
||||
value_struct,
|
||||
stream,
|
||||
stream_pos=reloc['r_offset'])
|
||||
# 2. Apply the relocation to the value, acting according to the recipe
|
||||
relocated_value = recipe.calc_func(
|
||||
value=original_value,
|
||||
sym_value=sym_value,
|
||||
offset=reloc['r_offset'],
|
||||
addend=reloc['r_addend'] if recipe.has_addend else 0)
|
||||
# 3. Write the relocated value back into the stream
|
||||
stream.seek(reloc['r_offset'])
|
||||
|
||||
# Make sure the relocated value fits back by wrapping it around. This
|
||||
# looks like a problem, but it seems to be the way this is done in
|
||||
# binutils too.
|
||||
relocated_value = relocated_value % (2 ** (recipe.bytesize * 8))
|
||||
value_struct.build_stream(relocated_value, stream)
|
||||
|
||||
# Relocations are represented by "recipes". Each recipe specifies:
|
||||
# bytesize: The number of bytes to read (and write back) to the section.
|
||||
# This is the unit of data on which relocation is performed.
|
||||
# has_addend: Does this relocation have an extra addend?
|
||||
# calc_func: A function that performs the relocation on an extracted
|
||||
# value, and returns the updated value.
|
||||
#
|
||||
_RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE',
|
||||
'bytesize has_addend calc_func')
|
||||
|
||||
def _reloc_calc_identity(value, sym_value, offset, addend=0):
|
||||
return value
|
||||
|
||||
def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0):
|
||||
return sym_value + value
|
||||
|
||||
def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
|
||||
return sym_value + value - offset
|
||||
|
||||
def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
|
||||
return sym_value + addend
|
||||
|
||||
def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0):
|
||||
return sym_value + addend - offset
|
||||
|
||||
def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
|
||||
return sym_value // 4 + value - offset // 4
|
||||
|
||||
_RELOCATION_RECIPES_ARM = {
|
||||
ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=False,
|
||||
calc_func=_reloc_calc_sym_plus_value),
|
||||
ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=False,
|
||||
calc_func=_arm_reloc_calc_sym_plus_value_pcrel),
|
||||
}
|
||||
|
||||
_RELOCATION_RECIPES_AARCH64 = {
|
||||
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
|
||||
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
|
||||
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=True,
|
||||
calc_func=_reloc_calc_sym_plus_addend_pcrel),
|
||||
}
|
||||
|
||||
# https://dmz-portal.mips.com/wiki/MIPS_relocation_types
|
||||
_RELOCATION_RECIPES_MIPS = {
|
||||
ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
|
||||
ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=False,
|
||||
calc_func=_reloc_calc_sym_plus_value),
|
||||
}
|
||||
|
||||
_RELOCATION_RECIPES_PPC64 = {
|
||||
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
|
||||
ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel),
|
||||
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR64']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
|
||||
}
|
||||
|
||||
_RELOCATION_RECIPES_X86 = {
|
||||
ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
|
||||
ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=False,
|
||||
calc_func=_reloc_calc_sym_plus_value),
|
||||
ENUM_RELOC_TYPE_i386['R_386_PC32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=False,
|
||||
calc_func=_reloc_calc_sym_plus_value_pcrel),
|
||||
}
|
||||
|
||||
_RELOCATION_RECIPES_X64 = {
|
||||
ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=8, has_addend=True, calc_func=_reloc_calc_identity),
|
||||
ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
|
||||
ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=True,
|
||||
calc_func=_reloc_calc_sym_plus_addend_pcrel),
|
||||
ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
|
||||
ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE(
|
||||
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,507 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/sections.py
|
||||
#
|
||||
# ELF sections
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..common.exceptions import ELFCompressionError
|
||||
from ..common.utils import struct_parse, elf_assert, parse_cstring_from_stream
|
||||
from collections import defaultdict
|
||||
from .constants import SH_FLAGS
|
||||
from .notes import iter_notes
|
||||
|
||||
import zlib
|
||||
|
||||
|
||||
class Section(object):
|
||||
""" Base class for ELF sections. Also used for all sections types that have
|
||||
no special functionality.
|
||||
|
||||
Allows dictionary-like access to the section header. For example:
|
||||
> sec = Section(...)
|
||||
> sec['sh_type'] # section type
|
||||
"""
|
||||
def __init__(self, header, name, elffile):
|
||||
self.header = header
|
||||
self.name = name
|
||||
self.elffile = elffile
|
||||
self.stream = self.elffile.stream
|
||||
self.structs = self.elffile.structs
|
||||
self._compressed = header['sh_flags'] & SH_FLAGS.SHF_COMPRESSED
|
||||
|
||||
if self.compressed:
|
||||
# Read the compression header now to know about the size/alignment
|
||||
# of the decompressed data.
|
||||
header = struct_parse(self.structs.Elf_Chdr,
|
||||
self.stream,
|
||||
stream_pos=self['sh_offset'])
|
||||
self._compression_type = header['ch_type']
|
||||
self._decompressed_size = header['ch_size']
|
||||
self._decompressed_align = header['ch_addralign']
|
||||
else:
|
||||
self._decompressed_size = header['sh_size']
|
||||
self._decompressed_align = header['sh_addralign']
|
||||
|
||||
@property
|
||||
def compressed(self):
|
||||
""" Is this section compressed?
|
||||
"""
|
||||
return self._compressed
|
||||
|
||||
@property
|
||||
def data_size(self):
|
||||
""" Return the logical size for this section's data.
|
||||
|
||||
This can be different from the .sh_size header field when the section
|
||||
is compressed.
|
||||
"""
|
||||
return self._decompressed_size
|
||||
|
||||
@property
|
||||
def data_alignment(self):
|
||||
""" Return the logical alignment for this section's data.
|
||||
|
||||
This can be different from the .sh_addralign header field when the
|
||||
section is compressed.
|
||||
"""
|
||||
return self._decompressed_align
|
||||
|
||||
def data(self):
|
||||
""" The section data from the file.
|
||||
|
||||
Note that data is decompressed if the stored section data is
|
||||
compressed.
|
||||
"""
|
||||
# If this section is NOBITS, there is no data. provide a dummy answer
|
||||
if self.header['sh_type'] == 'SHT_NOBITS':
|
||||
return b'\0'*self.data_size
|
||||
|
||||
# If this section is compressed, deflate it
|
||||
if self.compressed:
|
||||
c_type = self._compression_type
|
||||
if c_type == 'ELFCOMPRESS_ZLIB':
|
||||
# Read the data to decompress starting right after the
|
||||
# compression header until the end of the section.
|
||||
hdr_size = self.structs.Elf_Chdr.sizeof()
|
||||
self.stream.seek(self['sh_offset'] + hdr_size)
|
||||
compressed = self.stream.read(self['sh_size'] - hdr_size)
|
||||
|
||||
decomp = zlib.decompressobj()
|
||||
result = decomp.decompress(compressed, self.data_size)
|
||||
else:
|
||||
raise ELFCompressionError(
|
||||
'Unknown compression type: {:#0x}'.format(c_type)
|
||||
)
|
||||
|
||||
if len(result) != self._decompressed_size:
|
||||
raise ELFCompressionError(
|
||||
'Decompressed data is {} bytes long, should be {} bytes'
|
||||
' long'.format(len(result), self._decompressed_size)
|
||||
)
|
||||
else:
|
||||
self.stream.seek(self['sh_offset'])
|
||||
result = self.stream.read(self._decompressed_size)
|
||||
|
||||
return result
|
||||
|
||||
def is_null(self):
|
||||
""" Is this a null section?
|
||||
"""
|
||||
return False
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to header entries
|
||||
"""
|
||||
return self.header[name]
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
return self.header == other.header
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.header)
|
||||
|
||||
|
||||
class NullSection(Section):
|
||||
""" ELF NULL section
|
||||
"""
|
||||
def is_null(self):
|
||||
return True
|
||||
|
||||
|
||||
class StringTableSection(Section):
|
||||
""" ELF string table section.
|
||||
"""
|
||||
def get_string(self, offset):
|
||||
""" Get the string stored at the given offset in this string table.
|
||||
"""
|
||||
table_offset = self['sh_offset']
|
||||
s = parse_cstring_from_stream(self.stream, table_offset + offset)
|
||||
return s.decode('utf-8', errors='replace') if s else ''
|
||||
|
||||
|
||||
class SymbolTableIndexSection(Section):
|
||||
""" A section containing the section header table indices corresponding
|
||||
to symbols in the linked symbol table. This section has to exist if the
|
||||
symbol table contains an entry with a section header index set to
|
||||
SHN_XINDEX (0xffff). The format of the section is described at
|
||||
https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html
|
||||
"""
|
||||
def __init__(self, header, name, elffile, symboltable):
|
||||
super(SymbolTableIndexSection, self).__init__(header, name, elffile)
|
||||
self.symboltable = symboltable
|
||||
|
||||
def get_section_index(self, n):
|
||||
""" Get the section header table index for the symbol with index #n.
|
||||
The section contains an array of Elf32_word values with one entry
|
||||
for every symbol in the associated symbol table.
|
||||
"""
|
||||
return struct_parse(self.elffile.structs.Elf_word(''), self.stream,
|
||||
self['sh_offset'] + n * self['sh_entsize'])
|
||||
|
||||
|
||||
class SymbolTableSection(Section):
|
||||
""" ELF symbol table section. Has an associated StringTableSection that's
|
||||
passed in the constructor.
|
||||
"""
|
||||
def __init__(self, header, name, elffile, stringtable):
|
||||
super(SymbolTableSection, self).__init__(header, name, elffile)
|
||||
self.stringtable = stringtable
|
||||
elf_assert(self['sh_entsize'] > 0,
|
||||
'Expected entry size of section %r to be > 0' % name)
|
||||
elf_assert(self['sh_size'] % self['sh_entsize'] == 0,
|
||||
'Expected section size to be a multiple of entry size in section %r' % name)
|
||||
self._symbol_name_map = None
|
||||
|
||||
def num_symbols(self):
|
||||
""" Number of symbols in the table
|
||||
"""
|
||||
return self['sh_size'] // self['sh_entsize']
|
||||
|
||||
def get_symbol(self, n):
|
||||
""" Get the symbol at index #n from the table (Symbol object)
|
||||
"""
|
||||
# Grab the symbol's entry from the stream
|
||||
entry_offset = self['sh_offset'] + n * self['sh_entsize']
|
||||
entry = struct_parse(
|
||||
self.structs.Elf_Sym,
|
||||
self.stream,
|
||||
stream_pos=entry_offset)
|
||||
# Find the symbol name in the associated string table
|
||||
name = self.stringtable.get_string(entry['st_name'])
|
||||
return Symbol(entry, name)
|
||||
|
||||
def get_symbol_by_name(self, name):
|
||||
""" Get a symbol(s) by name. Return None if no symbol by the given name
|
||||
exists.
|
||||
"""
|
||||
# The first time this method is called, construct a name to number
|
||||
# mapping
|
||||
#
|
||||
if self._symbol_name_map is None:
|
||||
self._symbol_name_map = defaultdict(list)
|
||||
for i, sym in enumerate(self.iter_symbols()):
|
||||
self._symbol_name_map[sym.name].append(i)
|
||||
symnums = self._symbol_name_map.get(name)
|
||||
return [self.get_symbol(i) for i in symnums] if symnums else None
|
||||
|
||||
def iter_symbols(self):
|
||||
""" Yield all the symbols in the table
|
||||
"""
|
||||
for i in range(self.num_symbols()):
|
||||
yield self.get_symbol(i)
|
||||
|
||||
|
||||
class Symbol(object):
|
||||
""" Symbol object - representing a single symbol entry from a symbol table
|
||||
section.
|
||||
|
||||
Similarly to Section objects, allows dictionary-like access to the
|
||||
symbol entry.
|
||||
"""
|
||||
def __init__(self, entry, name):
|
||||
self.entry = entry
|
||||
self.name = name
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to entries
|
||||
"""
|
||||
return self.entry[name]
|
||||
|
||||
|
||||
class SUNWSyminfoTableSection(Section):
|
||||
""" ELF .SUNW Syminfo table section.
|
||||
Has an associated SymbolTableSection that's passed in the constructor.
|
||||
"""
|
||||
def __init__(self, header, name, elffile, symboltable):
|
||||
super(SUNWSyminfoTableSection, self).__init__(header, name, elffile)
|
||||
self.symboltable = symboltable
|
||||
|
||||
def num_symbols(self):
|
||||
""" Number of symbols in the table
|
||||
"""
|
||||
return self['sh_size'] // self['sh_entsize'] - 1
|
||||
|
||||
def get_symbol(self, n):
|
||||
""" Get the symbol at index #n from the table (Symbol object).
|
||||
It begins at 1 and not 0 since the first entry is used to
|
||||
store the current version of the syminfo table.
|
||||
"""
|
||||
# Grab the symbol's entry from the stream
|
||||
entry_offset = self['sh_offset'] + n * self['sh_entsize']
|
||||
entry = struct_parse(
|
||||
self.structs.Elf_Sunw_Syminfo,
|
||||
self.stream,
|
||||
stream_pos=entry_offset)
|
||||
# Find the symbol name in the associated symbol table
|
||||
name = self.symboltable.get_symbol(n).name
|
||||
return Symbol(entry, name)
|
||||
|
||||
def iter_symbols(self):
|
||||
""" Yield all the symbols in the table
|
||||
"""
|
||||
for i in range(1, self.num_symbols() + 1):
|
||||
yield self.get_symbol(i)
|
||||
|
||||
|
||||
class NoteSection(Section):
|
||||
""" ELF NOTE section. Knows how to parse notes.
|
||||
"""
|
||||
def iter_notes(self):
|
||||
""" Yield all the notes in the section. Each result is a dictionary-
|
||||
like object with "n_name", "n_type", and "n_desc" fields, amongst
|
||||
others.
|
||||
"""
|
||||
return iter_notes(self.elffile, self['sh_offset'], self['sh_size'])
|
||||
|
||||
|
||||
class StabSection(Section):
|
||||
""" ELF stab section.
|
||||
"""
|
||||
def iter_stabs(self):
|
||||
""" Yield all stab entries. Result type is ELFStructs.Elf_Stabs.
|
||||
"""
|
||||
offset = self['sh_offset']
|
||||
size = self['sh_size']
|
||||
end = offset + size
|
||||
while offset < end:
|
||||
stabs = struct_parse(
|
||||
self.structs.Elf_Stabs,
|
||||
self.stream,
|
||||
stream_pos=offset)
|
||||
stabs['n_offset'] = offset
|
||||
offset += self.structs.Elf_Stabs.sizeof()
|
||||
self.stream.seek(offset)
|
||||
yield stabs
|
||||
|
||||
|
||||
class ARMAttribute(object):
|
||||
""" ARM attribute object - representing a build attribute of ARM ELF files.
|
||||
"""
|
||||
def __init__(self, structs, stream):
|
||||
self._tag = struct_parse(structs.Elf_Attribute_Tag, stream)
|
||||
self.extra = None
|
||||
|
||||
if self.tag in ('TAG_FILE', 'TAG_SECTION', 'TAG_SYMBOL'):
|
||||
self.value = struct_parse(structs.Elf_word('value'), stream)
|
||||
|
||||
if self.tag != 'TAG_FILE':
|
||||
self.extra = []
|
||||
s_number = struct_parse(structs.Elf_uleb128('s_number'), stream)
|
||||
|
||||
while s_number != 0:
|
||||
self.extra.append(s_number)
|
||||
s_number = struct_parse(structs.Elf_uleb128('s_number'),
|
||||
stream
|
||||
)
|
||||
|
||||
elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'):
|
||||
self.value = struct_parse(structs.Elf_ntbs('value',
|
||||
encoding='utf-8'),
|
||||
stream)
|
||||
|
||||
elif self.tag == 'TAG_COMPATIBILITY':
|
||||
self.value = struct_parse(structs.Elf_uleb128('value'), stream)
|
||||
self.extra = struct_parse(structs.Elf_ntbs('vendor_name',
|
||||
encoding='utf-8'),
|
||||
stream)
|
||||
|
||||
elif self.tag == 'TAG_ALSO_COMPATIBLE_WITH':
|
||||
self.value = ARMAttribute(structs, stream)
|
||||
|
||||
if type(self.value.value) is not str:
|
||||
nul = struct_parse(structs.Elf_byte('nul'), stream)
|
||||
elf_assert(nul == 0,
|
||||
"Invalid terminating byte %r, expecting NUL." % nul)
|
||||
|
||||
else:
|
||||
self.value = struct_parse(structs.Elf_uleb128('value'), stream)
|
||||
|
||||
@property
|
||||
def tag(self):
|
||||
return self._tag['tag']
|
||||
|
||||
def __repr__(self):
|
||||
s = '<ARMAttribute (%s): %r>' % (self.tag, self.value)
|
||||
s += ' %s' % self.extra if self.extra is not None else ''
|
||||
return s
|
||||
|
||||
|
||||
class ARMAttributesSubsubsection(object):
|
||||
""" Subsubsection of an ELF .ARM.attributes section's subsection.
|
||||
"""
|
||||
def __init__(self, stream, structs, offset):
|
||||
self.stream = stream
|
||||
self.offset = offset
|
||||
self.structs = structs
|
||||
|
||||
self.header = ARMAttribute(self.structs, self.stream)
|
||||
|
||||
self.attr_start = self.stream.tell()
|
||||
|
||||
def iter_attributes(self, tag=None):
|
||||
""" Yield all attributes (limit to |tag| if specified).
|
||||
"""
|
||||
for attribute in self._make_attributes():
|
||||
if tag is None or attribute.tag == tag:
|
||||
yield attribute
|
||||
|
||||
@property
|
||||
def num_attributes(self):
|
||||
""" Number of attributes in the subsubsection.
|
||||
"""
|
||||
return sum(1 for _ in self.iter_attributes()) + 1
|
||||
|
||||
@property
|
||||
def attributes(self):
|
||||
""" List of all attributes in the subsubsection.
|
||||
"""
|
||||
return [self.header] + list(self.iter_attributes())
|
||||
|
||||
def _make_attributes(self):
|
||||
""" Create all attributes for this subsubsection except the first one
|
||||
which is the header.
|
||||
"""
|
||||
end = self.offset + self.header.value
|
||||
|
||||
self.stream.seek(self.attr_start)
|
||||
|
||||
while self.stream.tell() != end:
|
||||
yield ARMAttribute(self.structs, self.stream)
|
||||
|
||||
def __repr__(self):
|
||||
s = "<ARMAttributesSubsubsection (%s): %d bytes>"
|
||||
return s % (self.header.tag[4:], self.header.value)
|
||||
|
||||
|
||||
class ARMAttributesSubsection(object):
|
||||
""" Subsection of an ELF .ARM.attributes section.
|
||||
"""
|
||||
def __init__(self, stream, structs, offset):
|
||||
self.stream = stream
|
||||
self.offset = offset
|
||||
self.structs = structs
|
||||
|
||||
self.header = struct_parse(self.structs.Elf_Attr_Subsection_Header,
|
||||
self.stream,
|
||||
self.offset
|
||||
)
|
||||
|
||||
self.subsubsec_start = self.stream.tell()
|
||||
|
||||
def iter_subsubsections(self, scope=None):
|
||||
""" Yield all subsubsections (limit to |scope| if specified).
|
||||
"""
|
||||
for subsubsec in self._make_subsubsections():
|
||||
if scope is None or subsubsec.header.tag == scope:
|
||||
yield subsubsec
|
||||
|
||||
@property
|
||||
def num_subsubsections(self):
|
||||
""" Number of subsubsections in the subsection.
|
||||
"""
|
||||
return sum(1 for _ in self.iter_subsubsections())
|
||||
|
||||
@property
|
||||
def subsubsections(self):
|
||||
""" List of all subsubsections in the subsection.
|
||||
"""
|
||||
return list(self.iter_subsubsections())
|
||||
|
||||
def _make_subsubsections(self):
|
||||
""" Create all subsubsections for this subsection.
|
||||
"""
|
||||
end = self.offset + self['length']
|
||||
|
||||
self.stream.seek(self.subsubsec_start)
|
||||
|
||||
while self.stream.tell() != end:
|
||||
subsubsec = ARMAttributesSubsubsection(self.stream,
|
||||
self.structs,
|
||||
self.stream.tell())
|
||||
self.stream.seek(self.subsubsec_start + subsubsec.header.value)
|
||||
yield subsubsec
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to header entries.
|
||||
"""
|
||||
return self.header[name]
|
||||
|
||||
def __repr__(self):
|
||||
s = "<ARMAttributesSubsection (%s): %d bytes>"
|
||||
return s % (self.header['vendor_name'], self.header['length'])
|
||||
|
||||
|
||||
class ARMAttributesSection(Section):
|
||||
""" ELF .ARM.attributes section.
|
||||
"""
|
||||
def __init__(self, header, name, elffile):
|
||||
super(ARMAttributesSection, self).__init__(header, name, elffile)
|
||||
|
||||
fv = struct_parse(self.structs.Elf_byte('format_version'),
|
||||
self.stream,
|
||||
self['sh_offset']
|
||||
)
|
||||
|
||||
elf_assert(chr(fv) == 'A',
|
||||
"Unknown attributes version %s, expecting 'A'." % chr(fv)
|
||||
)
|
||||
|
||||
self.subsec_start = self.stream.tell()
|
||||
|
||||
def iter_subsections(self, vendor_name=None):
|
||||
""" Yield all subsections (limit to |vendor_name| if specified).
|
||||
"""
|
||||
for subsec in self._make_subsections():
|
||||
if vendor_name is None or subsec['vendor_name'] == vendor_name:
|
||||
yield subsec
|
||||
|
||||
@property
|
||||
def num_subsections(self):
|
||||
""" Number of subsections in the section.
|
||||
"""
|
||||
return sum(1 for _ in self.iter_subsections())
|
||||
|
||||
@property
|
||||
def subsections(self):
|
||||
""" List of all subsections in the section.
|
||||
"""
|
||||
return list(self.iter_subsections())
|
||||
|
||||
def _make_subsections(self):
|
||||
""" Create all subsections for this section.
|
||||
"""
|
||||
end = self['sh_offset'] + self.data_size
|
||||
|
||||
self.stream.seek(self.subsec_start)
|
||||
|
||||
while self.stream.tell() != end:
|
||||
subsec = ARMAttributesSubsection(self.stream,
|
||||
self.structs,
|
||||
self.stream.tell())
|
||||
self.stream.seek(self.subsec_start + subsec['length'])
|
||||
yield subsec
|
||||
@@ -1,121 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/segments.py
|
||||
#
|
||||
# ELF segments
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..construct import CString
|
||||
from ..common.utils import struct_parse
|
||||
from .constants import SH_FLAGS
|
||||
from .notes import iter_notes
|
||||
|
||||
|
||||
class Segment(object):
|
||||
def __init__(self, header, stream):
|
||||
self.header = header
|
||||
self.stream = stream
|
||||
|
||||
def data(self):
|
||||
""" The segment data from the file.
|
||||
"""
|
||||
self.stream.seek(self['p_offset'])
|
||||
return self.stream.read(self['p_filesz'])
|
||||
|
||||
def __getitem__(self, name):
|
||||
""" Implement dict-like access to header entries
|
||||
"""
|
||||
return self.header[name]
|
||||
|
||||
def section_in_segment(self, section):
|
||||
""" Is the given section contained in this segment?
|
||||
|
||||
Note: this tries to reproduce the intricate rules of the
|
||||
ELF_SECTION_IN_SEGMENT_STRICT macro of the header
|
||||
elf/include/internal.h in the source of binutils.
|
||||
"""
|
||||
# Only the 'strict' checks from ELF_SECTION_IN_SEGMENT_1 are included
|
||||
segtype = self['p_type']
|
||||
sectype = section['sh_type']
|
||||
secflags = section['sh_flags']
|
||||
|
||||
# Only PT_LOAD, PT_GNU_RELRO and PT_TLS segments can contain SHF_TLS
|
||||
# sections
|
||||
if ( secflags & SH_FLAGS.SHF_TLS and
|
||||
segtype in ('PT_TLS', 'PT_GNU_RELRO', 'PT_LOAD')):
|
||||
pass
|
||||
# PT_TLS segment contains only SHF_TLS sections, PT_PHDR no sections
|
||||
# at all
|
||||
elif ( (secflags & SH_FLAGS.SHF_TLS) == 0 and
|
||||
segtype not in ('PT_TLS', 'PT_PHDR')):
|
||||
pass
|
||||
else:
|
||||
return False
|
||||
|
||||
# PT_LOAD and similar segments only have SHF_ALLOC sections.
|
||||
if ( (secflags & SH_FLAGS.SHF_ALLOC) == 0 and
|
||||
segtype in ('PT_LOAD', 'PT_DYNAMIC', 'PT_GNU_EH_FRAME',
|
||||
'PT_GNU_RELRO', 'PT_GNU_STACK')):
|
||||
return False
|
||||
|
||||
# In ELF_SECTION_IN_SEGMENT_STRICT the flag check_vma is on, so if
|
||||
# this is an alloc section, check whether its VMA is in bounds.
|
||||
if secflags & SH_FLAGS.SHF_ALLOC:
|
||||
secaddr = section['sh_addr']
|
||||
vaddr = self['p_vaddr']
|
||||
|
||||
# This checks that the section is wholly contained in the segment.
|
||||
# The third condition is the 'strict' one - an empty section will
|
||||
# not match at the very end of the segment (unless the segment is
|
||||
# also zero size, which is handled by the second condition).
|
||||
if not (secaddr >= vaddr and
|
||||
secaddr - vaddr + section['sh_size'] <= self['p_memsz'] and
|
||||
secaddr - vaddr <= self['p_memsz'] - 1):
|
||||
return False
|
||||
|
||||
# If we've come this far and it's a NOBITS section, it's in the segment
|
||||
if sectype == 'SHT_NOBITS':
|
||||
return True
|
||||
|
||||
secoffset = section['sh_offset']
|
||||
poffset = self['p_offset']
|
||||
|
||||
# Same logic as with secaddr vs. vaddr checks above, just on offsets in
|
||||
# the file
|
||||
return (secoffset >= poffset and
|
||||
secoffset - poffset + section['sh_size'] <= self['p_filesz'] and
|
||||
secoffset - poffset <= self['p_filesz'] - 1)
|
||||
|
||||
|
||||
class InterpSegment(Segment):
|
||||
""" INTERP segment. Knows how to obtain the path to the interpreter used
|
||||
for this ELF file.
|
||||
"""
|
||||
def __init__(self, header, stream):
|
||||
super(InterpSegment, self).__init__(header, stream)
|
||||
|
||||
def get_interp_name(self):
|
||||
""" Obtain the interpreter path used for this ELF file.
|
||||
"""
|
||||
path_offset = self['p_offset']
|
||||
return struct_parse(
|
||||
CString('', encoding='utf-8'),
|
||||
self.stream,
|
||||
stream_pos=path_offset)
|
||||
|
||||
|
||||
class NoteSegment(Segment):
|
||||
""" NOTE segment. Knows how to parse notes.
|
||||
"""
|
||||
def __init__(self, header, stream, elffile):
|
||||
super(NoteSegment, self).__init__(header, stream)
|
||||
self.elffile = elffile
|
||||
|
||||
def iter_notes(self):
|
||||
|
||||
""" Yield all the notes in the segment. Each result is a dictionary-
|
||||
like object with "n_name", "n_type", and "n_desc" fields, amongst
|
||||
others.
|
||||
"""
|
||||
return iter_notes(self.elffile, self['p_offset'], self['p_filesz'])
|
||||
@@ -1,531 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools: elf/structs.py
|
||||
#
|
||||
# Encapsulation of Construct structs for parsing an ELF file, adjusted for
|
||||
# correct endianness and word-size.
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from ..construct import (
|
||||
UBInt8, UBInt16, UBInt32, UBInt64,
|
||||
ULInt8, ULInt16, ULInt32, ULInt64,
|
||||
SBInt32, SLInt32, SBInt64, SLInt64,
|
||||
Struct, Array, Enum, Padding, BitStruct, BitField, Value, String, CString,
|
||||
Switch, Field
|
||||
)
|
||||
from ..common.construct_utils import ULEB128
|
||||
from ..common.utils import roundup
|
||||
from .enums import *
|
||||
|
||||
|
||||
class ELFStructs(object):
|
||||
""" Accessible attributes:
|
||||
|
||||
Elf_{byte|half|word|word64|addr|offset|sword|xword|xsword}:
|
||||
Data chunks, as specified by the ELF standard, adjusted for
|
||||
correct endianness and word-size.
|
||||
|
||||
Elf_Ehdr:
|
||||
ELF file header
|
||||
|
||||
Elf_Phdr:
|
||||
Program header
|
||||
|
||||
Elf_Shdr:
|
||||
Section header
|
||||
|
||||
Elf_Sym:
|
||||
Symbol table entry
|
||||
|
||||
Elf_Rel, Elf_Rela:
|
||||
Entries in relocation sections
|
||||
"""
|
||||
def __init__(self, little_endian=True, elfclass=32):
|
||||
assert elfclass == 32 or elfclass == 64
|
||||
self.little_endian = little_endian
|
||||
self.elfclass = elfclass
|
||||
self.e_type = None
|
||||
self.e_machine = None
|
||||
self.e_ident_osabi = None
|
||||
|
||||
def __getstate__(self):
|
||||
return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state
|
||||
self.create_basic_structs()
|
||||
self.create_advanced_structs(e_type, e_machine, e_osabi)
|
||||
|
||||
def create_basic_structs(self):
|
||||
""" Create word-size related structs and ehdr struct needed for
|
||||
initial determining of ELF type.
|
||||
"""
|
||||
if self.little_endian:
|
||||
self.Elf_byte = ULInt8
|
||||
self.Elf_half = ULInt16
|
||||
self.Elf_word = ULInt32
|
||||
self.Elf_word64 = ULInt64
|
||||
self.Elf_addr = ULInt32 if self.elfclass == 32 else ULInt64
|
||||
self.Elf_offset = self.Elf_addr
|
||||
self.Elf_sword = SLInt32
|
||||
self.Elf_xword = ULInt32 if self.elfclass == 32 else ULInt64
|
||||
self.Elf_sxword = SLInt32 if self.elfclass == 32 else SLInt64
|
||||
else:
|
||||
self.Elf_byte = UBInt8
|
||||
self.Elf_half = UBInt16
|
||||
self.Elf_word = UBInt32
|
||||
self.Elf_word64 = UBInt64
|
||||
self.Elf_addr = UBInt32 if self.elfclass == 32 else UBInt64
|
||||
self.Elf_offset = self.Elf_addr
|
||||
self.Elf_sword = SBInt32
|
||||
self.Elf_xword = UBInt32 if self.elfclass == 32 else UBInt64
|
||||
self.Elf_sxword = SBInt32 if self.elfclass == 32 else SBInt64
|
||||
self._create_ehdr()
|
||||
self._create_leb128()
|
||||
self._create_ntbs()
|
||||
|
||||
def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=None):
|
||||
""" Create all ELF structs except the ehdr. They may possibly depend
|
||||
on provided e_type and/or e_machine parsed from ehdr.
|
||||
"""
|
||||
self.e_type = e_type
|
||||
self.e_machine = e_machine
|
||||
self.e_ident_osabi = e_ident_osabi
|
||||
|
||||
self._create_phdr()
|
||||
self._create_shdr()
|
||||
self._create_chdr()
|
||||
self._create_sym()
|
||||
self._create_rel()
|
||||
self._create_dyn()
|
||||
self._create_sunw_syminfo()
|
||||
self._create_gnu_verneed()
|
||||
self._create_gnu_verdef()
|
||||
self._create_gnu_versym()
|
||||
self._create_gnu_abi()
|
||||
self._create_gnu_property()
|
||||
self._create_note(e_type)
|
||||
self._create_stabs()
|
||||
self._create_arm_attributes()
|
||||
self._create_elf_hash()
|
||||
self._create_gnu_hash()
|
||||
|
||||
#-------------------------------- PRIVATE --------------------------------#
|
||||
|
||||
def _create_ehdr(self):
|
||||
self.Elf_Ehdr = Struct('Elf_Ehdr',
|
||||
Struct('e_ident',
|
||||
Array(4, self.Elf_byte('EI_MAG')),
|
||||
Enum(self.Elf_byte('EI_CLASS'), **ENUM_EI_CLASS),
|
||||
Enum(self.Elf_byte('EI_DATA'), **ENUM_EI_DATA),
|
||||
Enum(self.Elf_byte('EI_VERSION'), **ENUM_E_VERSION),
|
||||
Enum(self.Elf_byte('EI_OSABI'), **ENUM_EI_OSABI),
|
||||
self.Elf_byte('EI_ABIVERSION'),
|
||||
Padding(7)
|
||||
),
|
||||
Enum(self.Elf_half('e_type'), **ENUM_E_TYPE),
|
||||
Enum(self.Elf_half('e_machine'), **ENUM_E_MACHINE),
|
||||
Enum(self.Elf_word('e_version'), **ENUM_E_VERSION),
|
||||
self.Elf_addr('e_entry'),
|
||||
self.Elf_offset('e_phoff'),
|
||||
self.Elf_offset('e_shoff'),
|
||||
self.Elf_word('e_flags'),
|
||||
self.Elf_half('e_ehsize'),
|
||||
self.Elf_half('e_phentsize'),
|
||||
self.Elf_half('e_phnum'),
|
||||
self.Elf_half('e_shentsize'),
|
||||
self.Elf_half('e_shnum'),
|
||||
self.Elf_half('e_shstrndx'),
|
||||
)
|
||||
|
||||
def _create_leb128(self):
|
||||
self.Elf_uleb128 = ULEB128
|
||||
|
||||
def _create_ntbs(self):
|
||||
self.Elf_ntbs = CString
|
||||
|
||||
def _create_phdr(self):
|
||||
p_type_dict = ENUM_P_TYPE_BASE
|
||||
if self.e_machine == 'EM_ARM':
|
||||
p_type_dict = ENUM_P_TYPE_ARM
|
||||
elif self.e_machine == 'EM_AARCH64':
|
||||
p_type_dict = ENUM_P_TYPE_AARCH64
|
||||
elif self.e_machine == 'EM_MIPS':
|
||||
p_type_dict = ENUM_P_TYPE_MIPS
|
||||
|
||||
if self.elfclass == 32:
|
||||
self.Elf_Phdr = Struct('Elf_Phdr',
|
||||
Enum(self.Elf_word('p_type'), **p_type_dict),
|
||||
self.Elf_offset('p_offset'),
|
||||
self.Elf_addr('p_vaddr'),
|
||||
self.Elf_addr('p_paddr'),
|
||||
self.Elf_word('p_filesz'),
|
||||
self.Elf_word('p_memsz'),
|
||||
self.Elf_word('p_flags'),
|
||||
self.Elf_word('p_align'),
|
||||
)
|
||||
else: # 64
|
||||
self.Elf_Phdr = Struct('Elf_Phdr',
|
||||
Enum(self.Elf_word('p_type'), **p_type_dict),
|
||||
self.Elf_word('p_flags'),
|
||||
self.Elf_offset('p_offset'),
|
||||
self.Elf_addr('p_vaddr'),
|
||||
self.Elf_addr('p_paddr'),
|
||||
self.Elf_xword('p_filesz'),
|
||||
self.Elf_xword('p_memsz'),
|
||||
self.Elf_xword('p_align'),
|
||||
)
|
||||
|
||||
def _create_shdr(self):
|
||||
"""Section header parsing.
|
||||
|
||||
Depends on e_machine because of machine-specific values in sh_type.
|
||||
"""
|
||||
sh_type_dict = ENUM_SH_TYPE_BASE
|
||||
if self.e_machine == 'EM_ARM':
|
||||
sh_type_dict = ENUM_SH_TYPE_ARM
|
||||
elif self.e_machine == 'EM_X86_64':
|
||||
sh_type_dict = ENUM_SH_TYPE_AMD64
|
||||
elif self.e_machine == 'EM_MIPS':
|
||||
sh_type_dict = ENUM_SH_TYPE_MIPS
|
||||
|
||||
self.Elf_Shdr = Struct('Elf_Shdr',
|
||||
self.Elf_word('sh_name'),
|
||||
Enum(self.Elf_word('sh_type'), **sh_type_dict),
|
||||
self.Elf_xword('sh_flags'),
|
||||
self.Elf_addr('sh_addr'),
|
||||
self.Elf_offset('sh_offset'),
|
||||
self.Elf_xword('sh_size'),
|
||||
self.Elf_word('sh_link'),
|
||||
self.Elf_word('sh_info'),
|
||||
self.Elf_xword('sh_addralign'),
|
||||
self.Elf_xword('sh_entsize'),
|
||||
)
|
||||
|
||||
def _create_chdr(self):
|
||||
# Structure of compressed sections header. It is documented in Oracle
|
||||
# "Linker and Libraries Guide", Part IV ELF Application Binary
|
||||
# Interface, Chapter 13 Object File Format, Section Compression:
|
||||
# https://docs.oracle.com/cd/E53394_01/html/E54813/section_compression.html
|
||||
fields = [
|
||||
Enum(self.Elf_word('ch_type'), **ENUM_ELFCOMPRESS_TYPE),
|
||||
self.Elf_xword('ch_size'),
|
||||
self.Elf_xword('ch_addralign'),
|
||||
]
|
||||
if self.elfclass == 64:
|
||||
fields.insert(1, self.Elf_word('ch_reserved'))
|
||||
self.Elf_Chdr = Struct('Elf_Chdr', *fields)
|
||||
|
||||
def _create_rel(self):
|
||||
# r_info is also taken apart into r_info_sym and r_info_type. This is
|
||||
# done in Value to avoid endianity issues while parsing.
|
||||
if self.elfclass == 32:
|
||||
fields = [self.Elf_xword('r_info'),
|
||||
Value('r_info_sym',
|
||||
lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF),
|
||||
Value('r_info_type',
|
||||
lambda ctx: ctx['r_info'] & 0xFF)]
|
||||
elif self.e_machine == 'EM_MIPS': # ELF64 MIPS
|
||||
fields = [
|
||||
# The MIPS ELF64 specification
|
||||
# (https://www.linux-mips.org/pub/linux/mips/doc/ABI/elf64-2.4.pdf)
|
||||
# provides a non-standard relocation structure definition.
|
||||
self.Elf_word('r_sym'),
|
||||
self.Elf_byte('r_ssym'),
|
||||
self.Elf_byte('r_type3'),
|
||||
self.Elf_byte('r_type2'),
|
||||
self.Elf_byte('r_type'),
|
||||
|
||||
# Synthetize usual fields for compatibility with other
|
||||
# architectures. This allows relocation consumers (including
|
||||
# our readelf tests) to work without worrying about MIPS64
|
||||
# oddities.
|
||||
Value('r_info_sym', lambda ctx: ctx['r_sym']),
|
||||
Value('r_info_ssym', lambda ctx: ctx['r_ssym']),
|
||||
Value('r_info_type', lambda ctx: ctx['r_type']),
|
||||
Value('r_info_type2', lambda ctx: ctx['r_type2']),
|
||||
Value('r_info_type3', lambda ctx: ctx['r_type3']),
|
||||
Value('r_info',
|
||||
lambda ctx: (ctx['r_sym'] << 32)
|
||||
| (ctx['r_ssym'] << 24)
|
||||
| (ctx['r_type3'] << 16)
|
||||
| (ctx['r_type2'] << 8)
|
||||
| ctx['r_type']),
|
||||
]
|
||||
else: # Other 64 ELFs
|
||||
fields = [self.Elf_xword('r_info'),
|
||||
Value('r_info_sym',
|
||||
lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF),
|
||||
Value('r_info_type',
|
||||
lambda ctx: ctx['r_info'] & 0xFFFFFFFF)]
|
||||
|
||||
self.Elf_Rel = Struct('Elf_Rel',
|
||||
self.Elf_addr('r_offset'),
|
||||
*fields)
|
||||
|
||||
fields_and_addend = fields + [self.Elf_sxword('r_addend')]
|
||||
self.Elf_Rela = Struct('Elf_Rela',
|
||||
self.Elf_addr('r_offset'),
|
||||
*fields_and_addend
|
||||
)
|
||||
|
||||
def _create_dyn(self):
|
||||
d_tag_dict = dict(ENUM_D_TAG_COMMON)
|
||||
if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE:
|
||||
d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine])
|
||||
elif self.e_ident_osabi == 'ELFOSABI_SOLARIS':
|
||||
d_tag_dict.update(ENUM_D_TAG_SOLARIS)
|
||||
|
||||
self.Elf_Dyn = Struct('Elf_Dyn',
|
||||
Enum(self.Elf_sxword('d_tag'), **d_tag_dict),
|
||||
self.Elf_xword('d_val'),
|
||||
Value('d_ptr', lambda ctx: ctx['d_val']),
|
||||
)
|
||||
|
||||
def _create_sym(self):
|
||||
# st_info is hierarchical. To access the type, use
|
||||
# container['st_info']['type']
|
||||
st_info_struct = BitStruct('st_info',
|
||||
Enum(BitField('bind', 4), **ENUM_ST_INFO_BIND),
|
||||
Enum(BitField('type', 4), **ENUM_ST_INFO_TYPE))
|
||||
# st_other is hierarchical. To access the visibility,
|
||||
# use container['st_other']['visibility']
|
||||
st_other_struct = BitStruct('st_other',
|
||||
# https://openpowerfoundation.org/wp-content/uploads/2016/03/ABI64BitOpenPOWERv1.1_16July2015_pub4.pdf
|
||||
# See 3.4.1 Symbol Values.
|
||||
Enum(BitField('local', 3), **ENUM_ST_LOCAL),
|
||||
Padding(2),
|
||||
Enum(BitField('visibility', 3), **ENUM_ST_VISIBILITY))
|
||||
if self.elfclass == 32:
|
||||
self.Elf_Sym = Struct('Elf_Sym',
|
||||
self.Elf_word('st_name'),
|
||||
self.Elf_addr('st_value'),
|
||||
self.Elf_word('st_size'),
|
||||
st_info_struct,
|
||||
st_other_struct,
|
||||
Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX),
|
||||
)
|
||||
else:
|
||||
self.Elf_Sym = Struct('Elf_Sym',
|
||||
self.Elf_word('st_name'),
|
||||
st_info_struct,
|
||||
st_other_struct,
|
||||
Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX),
|
||||
self.Elf_addr('st_value'),
|
||||
self.Elf_xword('st_size'),
|
||||
)
|
||||
|
||||
def _create_sunw_syminfo(self):
|
||||
self.Elf_Sunw_Syminfo = Struct('Elf_Sunw_Syminfo',
|
||||
Enum(self.Elf_half('si_boundto'), **ENUM_SUNW_SYMINFO_BOUNDTO),
|
||||
self.Elf_half('si_flags'),
|
||||
)
|
||||
|
||||
def _create_gnu_verneed(self):
|
||||
# Structure of "version needed" entries is documented in
|
||||
# Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
|
||||
self.Elf_Verneed = Struct('Elf_Verneed',
|
||||
self.Elf_half('vn_version'),
|
||||
self.Elf_half('vn_cnt'),
|
||||
self.Elf_word('vn_file'),
|
||||
self.Elf_word('vn_aux'),
|
||||
self.Elf_word('vn_next'),
|
||||
)
|
||||
self.Elf_Vernaux = Struct('Elf_Vernaux',
|
||||
self.Elf_word('vna_hash'),
|
||||
self.Elf_half('vna_flags'),
|
||||
self.Elf_half('vna_other'),
|
||||
self.Elf_word('vna_name'),
|
||||
self.Elf_word('vna_next'),
|
||||
)
|
||||
|
||||
def _create_gnu_verdef(self):
|
||||
# Structure of "version definition" entries are documented in
|
||||
# Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
|
||||
self.Elf_Verdef = Struct('Elf_Verdef',
|
||||
self.Elf_half('vd_version'),
|
||||
self.Elf_half('vd_flags'),
|
||||
self.Elf_half('vd_ndx'),
|
||||
self.Elf_half('vd_cnt'),
|
||||
self.Elf_word('vd_hash'),
|
||||
self.Elf_word('vd_aux'),
|
||||
self.Elf_word('vd_next'),
|
||||
)
|
||||
self.Elf_Verdaux = Struct('Elf_Verdaux',
|
||||
self.Elf_word('vda_name'),
|
||||
self.Elf_word('vda_next'),
|
||||
)
|
||||
|
||||
def _create_gnu_versym(self):
|
||||
# Structure of "version symbol" entries are documented in
|
||||
# Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
|
||||
self.Elf_Versym = Struct('Elf_Versym',
|
||||
Enum(self.Elf_half('ndx'), **ENUM_VERSYM),
|
||||
)
|
||||
|
||||
def _create_gnu_abi(self):
|
||||
# Structure of GNU ABI notes is documented in
|
||||
# https://code.woboq.org/userspace/glibc/csu/abi-note.S.html
|
||||
self.Elf_abi = Struct('Elf_abi',
|
||||
Enum(self.Elf_word('abi_os'), **ENUM_NOTE_ABI_TAG_OS),
|
||||
self.Elf_word('abi_major'),
|
||||
self.Elf_word('abi_minor'),
|
||||
self.Elf_word('abi_tiny'),
|
||||
)
|
||||
|
||||
def _create_gnu_property(self):
|
||||
# Structure of GNU property notes is documented in
|
||||
# https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf
|
||||
def roundup_padding(ctx):
|
||||
if self.elfclass == 32:
|
||||
return roundup(ctx.pr_datasz, 2) - ctx.pr_datasz
|
||||
return roundup(ctx.pr_datasz, 3) - ctx.pr_datasz
|
||||
|
||||
def classify_pr_data(ctx):
|
||||
if type(ctx.pr_type) is not str:
|
||||
return None
|
||||
if ctx.pr_type.startswith('GNU_PROPERTY_X86_'):
|
||||
return ('GNU_PROPERTY_X86_*', 4, 0)
|
||||
return (ctx.pr_type, ctx.pr_datasz, self.elfclass)
|
||||
|
||||
self.Elf_Prop = Struct('Elf_Prop',
|
||||
Enum(self.Elf_word('pr_type'), **ENUM_NOTE_GNU_PROPERTY_TYPE),
|
||||
self.Elf_word('pr_datasz'),
|
||||
Switch('pr_data', classify_pr_data, {
|
||||
('GNU_PROPERTY_STACK_SIZE', 4, 32): self.Elf_word('pr_data'),
|
||||
('GNU_PROPERTY_STACK_SIZE', 8, 64): self.Elf_word64('pr_data'),
|
||||
('GNU_PROPERTY_X86_*', 4, 0): self.Elf_word('pr_data'),
|
||||
},
|
||||
default=Field('pr_data', lambda ctx: ctx.pr_datasz)
|
||||
),
|
||||
Padding(roundup_padding)
|
||||
)
|
||||
|
||||
def _create_note(self, e_type=None):
|
||||
# Structure of "PT_NOTE" section
|
||||
|
||||
self.Elf_ugid = self.Elf_half if self.elfclass == 32 and self.e_machine in {
|
||||
'EM_MN10300',
|
||||
'EM_ARM',
|
||||
'EM_CRIS',
|
||||
'EM_CYGNUS_FRV',
|
||||
'EM_386',
|
||||
'EM_M32R',
|
||||
'EM_68K',
|
||||
'EM_S390',
|
||||
'EM_SH',
|
||||
'EM_SPARC',
|
||||
} else self.Elf_word
|
||||
|
||||
self.Elf_Nhdr = Struct('Elf_Nhdr',
|
||||
self.Elf_word('n_namesz'),
|
||||
self.Elf_word('n_descsz'),
|
||||
Enum(self.Elf_word('n_type'),
|
||||
**(ENUM_NOTE_N_TYPE if e_type != "ET_CORE"
|
||||
else ENUM_CORE_NOTE_N_TYPE)),
|
||||
)
|
||||
|
||||
# A process psinfo structure according to
|
||||
# http://elixir.free-electrons.com/linux/v2.6.35/source/include/linux/elfcore.h#L84
|
||||
if self.elfclass == 32:
|
||||
self.Elf_Prpsinfo = Struct('Elf_Prpsinfo',
|
||||
self.Elf_byte('pr_state'),
|
||||
String('pr_sname', 1),
|
||||
self.Elf_byte('pr_zomb'),
|
||||
self.Elf_byte('pr_nice'),
|
||||
self.Elf_xword('pr_flag'),
|
||||
self.Elf_ugid('pr_uid'),
|
||||
self.Elf_ugid('pr_gid'),
|
||||
self.Elf_word('pr_pid'),
|
||||
self.Elf_word('pr_ppid'),
|
||||
self.Elf_word('pr_pgrp'),
|
||||
self.Elf_word('pr_sid'),
|
||||
String('pr_fname', 16),
|
||||
String('pr_psargs', 80),
|
||||
)
|
||||
else: # 64
|
||||
self.Elf_Prpsinfo = Struct('Elf_Prpsinfo',
|
||||
self.Elf_byte('pr_state'),
|
||||
String('pr_sname', 1),
|
||||
self.Elf_byte('pr_zomb'),
|
||||
self.Elf_byte('pr_nice'),
|
||||
Padding(4),
|
||||
self.Elf_xword('pr_flag'),
|
||||
self.Elf_ugid('pr_uid'),
|
||||
self.Elf_ugid('pr_gid'),
|
||||
self.Elf_word('pr_pid'),
|
||||
self.Elf_word('pr_ppid'),
|
||||
self.Elf_word('pr_pgrp'),
|
||||
self.Elf_word('pr_sid'),
|
||||
String('pr_fname', 16),
|
||||
String('pr_psargs', 80),
|
||||
)
|
||||
|
||||
# A PT_NOTE of type NT_FILE matching the definition in
|
||||
# https://chromium.googlesource.com/
|
||||
# native_client/nacl-binutils/+/upstream/master/binutils/readelf.c
|
||||
# Line 15121
|
||||
self.Elf_Nt_File = Struct('Elf_Nt_File',
|
||||
self.Elf_xword("num_map_entries"),
|
||||
self.Elf_xword("page_size"),
|
||||
Array(lambda ctx: ctx.num_map_entries,
|
||||
Struct('Elf_Nt_File_Entry',
|
||||
self.Elf_addr('vm_start'),
|
||||
self.Elf_addr('vm_end'),
|
||||
self.Elf_offset('page_offset'))),
|
||||
Array(lambda ctx: ctx.num_map_entries,
|
||||
CString('filename')))
|
||||
|
||||
def _create_stabs(self):
|
||||
# Structure of one stabs entry, see binutils/bfd/stabs.c
|
||||
# Names taken from https://sourceware.org/gdb/current/onlinedocs/stabs.html#Overview
|
||||
self.Elf_Stabs = Struct('Elf_Stabs',
|
||||
self.Elf_word('n_strx'),
|
||||
self.Elf_byte('n_type'),
|
||||
self.Elf_byte('n_other'),
|
||||
self.Elf_half('n_desc'),
|
||||
self.Elf_word('n_value'),
|
||||
)
|
||||
|
||||
def _create_arm_attributes(self):
|
||||
# Structure of a build attributes subsection header. A subsection is
|
||||
# either public to all tools that process the ELF file or private to
|
||||
# the vendor's tools.
|
||||
self.Elf_Attr_Subsection_Header = Struct('Elf_Attr_Subsection',
|
||||
self.Elf_word('length'),
|
||||
self.Elf_ntbs('vendor_name',
|
||||
encoding='utf-8')
|
||||
)
|
||||
|
||||
# Structure of a build attribute tag.
|
||||
self.Elf_Attribute_Tag = Struct('Elf_Attribute_Tag',
|
||||
Enum(self.Elf_uleb128('tag'),
|
||||
**ENUM_ATTR_TAG_ARM)
|
||||
)
|
||||
|
||||
def _create_elf_hash(self):
|
||||
# Structure of the old SYSV-style hash table header. It is documented
|
||||
# in the Oracle "Linker and Libraries Guide", Part IV ELF Application
|
||||
# Binary Interface, Chapter 14 Object File Format, Section Hash Table
|
||||
# Section:
|
||||
# https://docs.oracle.com/cd/E53394_01/html/E54813/chapter6-48031.html
|
||||
|
||||
self.Elf_Hash = Struct('Elf_Hash',
|
||||
self.Elf_word('nbuckets'),
|
||||
self.Elf_word('nchains'),
|
||||
Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')),
|
||||
Array(lambda ctx: ctx['nchains'], self.Elf_word('chains')))
|
||||
|
||||
def _create_gnu_hash(self):
|
||||
# Structure of the GNU-style hash table header. Documentation for this
|
||||
# table is mostly in the GLIBC source code, a good explanation of the
|
||||
# format can be found in this blog post:
|
||||
# https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/
|
||||
self.Gnu_Hash = Struct('Gnu_Hash',
|
||||
self.Elf_word('nbuckets'),
|
||||
self.Elf_word('symoffset'),
|
||||
self.Elf_word('bloom_size'),
|
||||
self.Elf_word('bloom_shift'),
|
||||
Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')),
|
||||
Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')))
|
||||
@@ -1,116 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools example: dwarf_decode_address.py
|
||||
#
|
||||
# Decode an address in an ELF file to find out which function it belongs to
|
||||
# and from which filename/line it comes in the original source file.
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
|
||||
# If pyelftools is not installed, the example can also run from the root or
|
||||
# examples/ dir of the source distribution.
|
||||
sys.path[0:0] = ['.', '..']
|
||||
|
||||
from elftools.common.py3compat import maxint, bytes2str
|
||||
from elftools.dwarf.descriptions import describe_form_class
|
||||
from elftools.elf.elffile import ELFFile
|
||||
|
||||
|
||||
def process_file(filename, address):
|
||||
print('Processing file:', filename)
|
||||
with open(filename, 'rb') as f:
|
||||
elffile = ELFFile(f)
|
||||
|
||||
if not elffile.has_dwarf_info():
|
||||
print(' file has no DWARF info')
|
||||
return
|
||||
|
||||
# get_dwarf_info returns a DWARFInfo context object, which is the
|
||||
# starting point for all DWARF-based processing in pyelftools.
|
||||
dwarfinfo = elffile.get_dwarf_info()
|
||||
|
||||
funcname = decode_funcname(dwarfinfo, address)
|
||||
file, line = decode_file_line(dwarfinfo, address)
|
||||
|
||||
print('Function:', bytes2str(funcname))
|
||||
print('File:', bytes2str(file))
|
||||
print('Line:', line)
|
||||
|
||||
|
||||
def decode_funcname(dwarfinfo, address):
|
||||
# Go over all DIEs in the DWARF information, looking for a subprogram
|
||||
# entry with an address range that includes the given address. Note that
|
||||
# this simplifies things by disregarding subprograms that may have
|
||||
# split address ranges.
|
||||
for CU in dwarfinfo.iter_CUs():
|
||||
for DIE in CU.iter_DIEs():
|
||||
try:
|
||||
if DIE.tag == 'DW_TAG_subprogram':
|
||||
lowpc = DIE.attributes['DW_AT_low_pc'].value
|
||||
|
||||
# DWARF v4 in section 2.17 describes how to interpret the
|
||||
# DW_AT_high_pc attribute based on the class of its form.
|
||||
# For class 'address' it's taken as an absolute address
|
||||
# (similarly to DW_AT_low_pc); for class 'constant', it's
|
||||
# an offset from DW_AT_low_pc.
|
||||
highpc_attr = DIE.attributes['DW_AT_high_pc']
|
||||
highpc_attr_class = describe_form_class(highpc_attr.form)
|
||||
if highpc_attr_class == 'address':
|
||||
highpc = highpc_attr.value
|
||||
elif highpc_attr_class == 'constant':
|
||||
highpc = lowpc + highpc_attr.value
|
||||
else:
|
||||
print('Error: invalid DW_AT_high_pc class:',
|
||||
highpc_attr_class)
|
||||
continue
|
||||
|
||||
if lowpc <= address < highpc:
|
||||
return DIE.attributes['DW_AT_name'].value
|
||||
except KeyError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def decode_file_line(dwarfinfo, address):
|
||||
# Go over all the line programs in the DWARF information, looking for
|
||||
# one that describes the given address.
|
||||
for CU in dwarfinfo.iter_CUs():
|
||||
# First, look at line programs to find the file/line for the address
|
||||
lineprog = dwarfinfo.line_program_for_CU(CU)
|
||||
prevstate = None
|
||||
for entry in lineprog.get_entries():
|
||||
# We're interested in those entries where a new state is assigned
|
||||
if entry.state is None:
|
||||
continue
|
||||
# Looking for a range of addresses in two consecutive states that
|
||||
# contain the required address.
|
||||
if prevstate and prevstate.address <= address < entry.state.address:
|
||||
filename = lineprog['file_entry'][prevstate.file - 1].name
|
||||
line = prevstate.line
|
||||
return filename, line
|
||||
if entry.state.end_sequence:
|
||||
# For the state with `end_sequence`, `address` means the address
|
||||
# of the first byte after the target machine instruction
|
||||
# sequence and other information is meaningless. We clear
|
||||
# prevstate so that it's not used in the next iteration. Address
|
||||
# info is used in the above comparison to see if we need to use
|
||||
# the line information for the prevstate.
|
||||
prevstate = None
|
||||
else:
|
||||
prevstate = entry.state
|
||||
return None, None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if sys.argv[1] == '--test':
|
||||
process_file(sys.argv[2], 0x400503)
|
||||
sys.exit(0)
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print('Expected usage: {0} <address> <executable>'.format(sys.argv[0]))
|
||||
sys.exit(1)
|
||||
addr = int(sys.argv[1], 0)
|
||||
process_file(sys.argv[2], addr)
|
||||
@@ -1,66 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools example: dwarf_die_tree.py
|
||||
#
|
||||
# In the .debug_info section, Dwarf Information Entries (DIEs) form a tree.
|
||||
# pyelftools provides easy access to this tree, as demonstrated here.
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
|
||||
# If pyelftools is not installed, the example can also run from the root or
|
||||
# examples/ dir of the source distribution.
|
||||
sys.path[0:0] = ['.', '..']
|
||||
|
||||
from elftools.elf.elffile import ELFFile
|
||||
|
||||
|
||||
def process_file(filename):
|
||||
print('Processing file:', filename)
|
||||
with open(filename, 'rb') as f:
|
||||
elffile = ELFFile(f)
|
||||
|
||||
if not elffile.has_dwarf_info():
|
||||
print(' file has no DWARF info')
|
||||
return
|
||||
|
||||
# get_dwarf_info returns a DWARFInfo context object, which is the
|
||||
# starting point for all DWARF-based processing in pyelftools.
|
||||
dwarfinfo = elffile.get_dwarf_info()
|
||||
|
||||
for CU in dwarfinfo.iter_CUs():
|
||||
# DWARFInfo allows to iterate over the compile units contained in
|
||||
# the .debug_info section. CU is a CompileUnit object, with some
|
||||
# computed attributes (such as its offset in the section) and
|
||||
# a header which conforms to the DWARF standard. The access to
|
||||
# header elements is, as usual, via item-lookup.
|
||||
print(' Found a compile unit at offset %s, length %s' % (
|
||||
CU.cu_offset, CU['unit_length']))
|
||||
|
||||
# Start with the top DIE, the root for this CU's DIE tree
|
||||
top_DIE = CU.get_top_DIE()
|
||||
print(' Top DIE with tag=%s' % top_DIE.tag)
|
||||
|
||||
# We're interested in the filename...
|
||||
print(' name=%s' % top_DIE.get_full_path())
|
||||
|
||||
# Display DIEs recursively starting with top_DIE
|
||||
die_info_rec(top_DIE)
|
||||
|
||||
|
||||
def die_info_rec(die, indent_level=' '):
|
||||
""" A recursive function for showing information about a DIE and its
|
||||
children.
|
||||
"""
|
||||
print(indent_level + 'DIE tag=%s' % die.tag)
|
||||
child_indent = indent_level + ' '
|
||||
for child in die.iter_children():
|
||||
die_info_rec(child, child_indent)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if sys.argv[1] == '--test':
|
||||
for filename in sys.argv[2:]:
|
||||
process_file(filename)
|
||||
@@ -1,95 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools example: dwarf_lineprogram_filenames.py
|
||||
#
|
||||
# In the .debug_line section, the Dwarf line program generates a matrix
|
||||
# of address-source references. This example demonstrates accessing the state
|
||||
# of each line program entry to retrieve the underlying filenames.
|
||||
#
|
||||
# William Woodruff (william@yossarian.net)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from __future__ import print_function
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import sys
|
||||
|
||||
# If pyelftools is not installed, the example can also run from the root or
|
||||
# examples/ dir of the source distribution.
|
||||
sys.path[0:0] = ['.', '..']
|
||||
|
||||
from elftools.elf.elffile import ELFFile
|
||||
|
||||
|
||||
def process_file(filename):
|
||||
print('Processing file:', filename)
|
||||
with open(filename, 'rb') as f:
|
||||
elffile = ELFFile(f)
|
||||
|
||||
if not elffile.has_dwarf_info():
|
||||
print(' file has no DWARF info')
|
||||
return
|
||||
|
||||
dwarfinfo = elffile.get_dwarf_info()
|
||||
for CU in dwarfinfo.iter_CUs():
|
||||
print(' Found a compile unit at offset %s, length %s' % (
|
||||
CU.cu_offset, CU['unit_length']))
|
||||
|
||||
# Every compilation unit in the DWARF information may or may not
|
||||
# have a corresponding line program in .debug_line.
|
||||
line_program = dwarfinfo.line_program_for_CU(CU)
|
||||
if line_program is None:
|
||||
print(' DWARF info is missing a line program for this CU')
|
||||
continue
|
||||
|
||||
# Print a reverse mapping of filename -> #entries
|
||||
line_entry_mapping(line_program)
|
||||
|
||||
|
||||
def line_entry_mapping(line_program):
|
||||
filename_map = defaultdict(int)
|
||||
|
||||
# The line program, when decoded, returns a list of line program
|
||||
# entries. Each entry contains a state, which we'll use to build
|
||||
# a reverse mapping of filename -> #entries.
|
||||
lp_entries = line_program.get_entries()
|
||||
for lpe in lp_entries:
|
||||
# We skip LPEs that don't have an associated file.
|
||||
# This can happen if instructions in the compiled binary
|
||||
# don't correspond directly to any original source file.
|
||||
if not lpe.state or lpe.state.file == 0:
|
||||
continue
|
||||
filename = lpe_filename(line_program, lpe.state.file)
|
||||
filename_map[filename] += 1
|
||||
|
||||
for filename, lpe_count in filename_map.items():
|
||||
print(" filename=%s -> %d entries" % (filename, lpe_count))
|
||||
|
||||
|
||||
def lpe_filename(line_program, file_index):
|
||||
# Retrieving the filename associated with a line program entry
|
||||
# involves two levels of indirection: we take the file index from
|
||||
# the LPE to grab the file_entry from the line program header,
|
||||
# then take the directory index from the file_entry to grab the
|
||||
# directory name from the line program header. Finally, we
|
||||
# join the (base) filename from the file_entry to the directory
|
||||
# name to get the absolute filename.
|
||||
lp_header = line_program.header
|
||||
file_entries = lp_header["file_entry"]
|
||||
|
||||
# File and directory indices are 1-indexed.
|
||||
file_entry = file_entries[file_index - 1]
|
||||
dir_index = file_entry["dir_index"]
|
||||
|
||||
# A dir_index of 0 indicates that no absolute directory was recorded during
|
||||
# compilation; return just the basename.
|
||||
if dir_index == 0:
|
||||
return file_entry.name.decode()
|
||||
|
||||
directory = lp_header["include_directory"][dir_index - 1]
|
||||
return os.path.join(directory, file_entry.name).decode()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if sys.argv[1] == '--test':
|
||||
for filename in sys.argv[2:]:
|
||||
process_file(filename)
|
||||
@@ -1,111 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools example: dwarf_location_info.py
|
||||
#
|
||||
# Examine DIE entries which have either location list values or location
|
||||
# expression values and decode that information.
|
||||
#
|
||||
# Location information can either be completely contained within a DIE
|
||||
# (using 'DW_FORM_exprloc' in DWARFv4 or 'DW_FORM_block1' in earlier
|
||||
# versions) or be a reference to a location list contained within
|
||||
# the .debug_loc section (using 'DW_FORM_sec_offset' in DWARFv4 or
|
||||
# 'DW_FORM_data4' / 'DW_FORM_data8' in earlier versions).
|
||||
#
|
||||
# The LocationParser object parses the DIE attributes and handles both
|
||||
# formats.
|
||||
#
|
||||
# The directory 'test/testfiles_for_location_info' contains test files with
|
||||
# location information represented in both DWARFv4 and DWARFv2 forms.
|
||||
#
|
||||
# Eli Bendersky (eliben@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
|
||||
# If pyelftools is not installed, the example can also run from the root or
|
||||
# examples/ dir of the source distribution.
|
||||
sys.path[0:0] = ['.', '..']
|
||||
|
||||
from elftools.common.py3compat import itervalues
|
||||
from elftools.elf.elffile import ELFFile
|
||||
from elftools.dwarf.descriptions import (
|
||||
describe_DWARF_expr, set_global_machine_arch)
|
||||
from elftools.dwarf.locationlists import (
|
||||
LocationEntry, LocationExpr, LocationParser)
|
||||
|
||||
def process_file(filename):
|
||||
print('Processing file:', filename)
|
||||
with open(filename, 'rb') as f:
|
||||
elffile = ELFFile(f)
|
||||
|
||||
if not elffile.has_dwarf_info():
|
||||
print(' file has no DWARF info')
|
||||
return
|
||||
|
||||
# get_dwarf_info returns a DWARFInfo context object, which is the
|
||||
# starting point for all DWARF-based processing in pyelftools.
|
||||
dwarfinfo = elffile.get_dwarf_info()
|
||||
|
||||
# The location lists are extracted by DWARFInfo from the .debug_loc
|
||||
# section, and returned here as a LocationLists object.
|
||||
location_lists = dwarfinfo.location_lists()
|
||||
|
||||
# This is required for the descriptions module to correctly decode
|
||||
# register names contained in DWARF expressions.
|
||||
set_global_machine_arch(elffile.get_machine_arch())
|
||||
|
||||
# Create a LocationParser object that parses the DIE attributes and
|
||||
# creates objects representing the actual location information.
|
||||
loc_parser = LocationParser(location_lists)
|
||||
|
||||
for CU in dwarfinfo.iter_CUs():
|
||||
# DWARFInfo allows to iterate over the compile units contained in
|
||||
# the .debug_info section. CU is a CompileUnit object, with some
|
||||
# computed attributes (such as its offset in the section) and
|
||||
# a header which conforms to the DWARF standard. The access to
|
||||
# header elements is, as usual, via item-lookup.
|
||||
print(' Found a compile unit at offset %s, length %s' % (
|
||||
CU.cu_offset, CU['unit_length']))
|
||||
|
||||
# A CU provides a simple API to iterate over all the DIEs in it.
|
||||
for DIE in CU.iter_DIEs():
|
||||
# Go over all attributes of the DIE. Each attribute is an
|
||||
# AttributeValue object (from elftools.dwarf.die), which we
|
||||
# can examine.
|
||||
for attr in itervalues(DIE.attributes):
|
||||
# Check if this attribute contains location information
|
||||
if loc_parser.attribute_has_location(attr, CU['version']):
|
||||
print(' DIE %s. attr %s.' % (DIE.tag, attr.name))
|
||||
loc = loc_parser.parse_from_attribute(attr,
|
||||
CU['version'])
|
||||
# We either get a list (in case the attribute is a
|
||||
# reference to the .debug_loc section) or a LocationExpr
|
||||
# object (in case the attribute itself contains location
|
||||
# information).
|
||||
if isinstance(loc, LocationExpr):
|
||||
print(' %s' % (
|
||||
describe_DWARF_expr(loc.loc_expr,
|
||||
dwarfinfo.structs, CU.cu_offset)))
|
||||
elif isinstance(loc, list):
|
||||
print(show_loclist(loc,
|
||||
dwarfinfo,
|
||||
' ', CU.cu_offset))
|
||||
|
||||
def show_loclist(loclist, dwarfinfo, indent, cu_offset):
|
||||
""" Display a location list nicely, decoding the DWARF expressions
|
||||
contained within.
|
||||
"""
|
||||
d = []
|
||||
for loc_entity in loclist:
|
||||
if isinstance(loc_entity, LocationEntry):
|
||||
d.append('%s <<%s>>' % (
|
||||
loc_entity,
|
||||
describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs, cu_offset)))
|
||||
else:
|
||||
d.append(str(loc_entity))
|
||||
return '\n'.join(indent + s for s in d)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if sys.argv[1] == '--test':
|
||||
for filename in sys.argv[2:]:
|
||||
process_file(filename)
|
||||
@@ -1,116 +0,0 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
# elftools example: dwarf_pubnames_types.py
|
||||
#
|
||||
# Dump the contents of .debug_pubnames and .debug_pubtypes sections from the
|
||||
# ELF file.
|
||||
#
|
||||
# Note: sample_exe64.elf doesn't have a .debug_pubtypes section.
|
||||
#
|
||||
# Vijay Ramasami (rvijayc@gmail.com)
|
||||
# This code is in the public domain
|
||||
#-------------------------------------------------------------------------------
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
|
||||
# If pyelftools is not installed, the example can also run from the root or
|
||||
# examples/ dir of the source distribution.
|
||||
sys.path[0:0] = ['.', '..']
|
||||
|
||||
from elftools.elf.elffile import ELFFile
|
||||
from elftools.common.py3compat import bytes2str
|
||||
|
||||
def process_file(filename):
|
||||
print('Processing file:', filename)
|
||||
with open(filename, 'rb') as f:
|
||||
elffile = ELFFile(f)
|
||||
|
||||
if not elffile.has_dwarf_info():
|
||||
print(' file has no DWARF info')
|
||||
return
|
||||
|
||||
# get_dwarf_info returns a DWARFInfo context object, which is the
|
||||
# starting point for all DWARF-based processing in pyelftools.
|
||||
dwarfinfo = elffile.get_dwarf_info()
|
||||
|
||||
# get .debug_pubtypes section.
|
||||
pubnames = dwarfinfo.get_pubnames()
|
||||
if pubnames is None:
|
||||
print('ERROR: No .debug_pubnames section found in ELF.')
|
||||
else:
|
||||
print('%d entries found in .debug_pubnames' % len(pubnames))
|
||||
|
||||
print('Trying pubnames example ...')
|
||||
for name, entry in pubnames.items():
|
||||
print('%s: cu_ofs = %d, die_ofs = %d' %
|
||||
(name, entry.cu_ofs, entry.die_ofs))
|
||||
|
||||
# get the actual CU/DIE that has this information.
|
||||
print('Fetching the actual die for %s ...' % name)
|
||||
for cu in dwarfinfo.iter_CUs():
|
||||
if cu.cu_offset == entry.cu_ofs:
|
||||
for die in cu.iter_DIEs():
|
||||
if die.offset == entry.die_ofs:
|
||||
print('Die Name: %s' %
|
||||
bytes2str(die.attributes['DW_AT_name'].value))
|
||||
|
||||
# dump all entries in .debug_pubnames section.
|
||||
print('Dumping .debug_pubnames table ...')
|
||||
print('-' * 66)
|
||||
print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
|
||||
print('-' * 66)
|
||||
for (name, entry) in pubnames.items():
|
||||
print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
|
||||
print('-' * 66)
|
||||
|
||||
# get .debug_pubtypes section.
|
||||
pubtypes = dwarfinfo.get_pubtypes()
|
||||
if pubtypes is None:
|
||||
print('ERROR: No .debug_pubtypes section found in ELF')
|
||||
else:
|
||||
print('%d entries found in .debug_pubtypes' % len(pubtypes))
|
||||
|
||||
for name, entry in pubtypes.items():
|
||||
print('%s: cu_ofs = %d, die_ofs = %d' %
|
||||
(name, entry.cu_ofs, entry.die_ofs))
|
||||
|
||||
# get the actual CU/DIE that has this information.
|
||||
print('Fetching the actual die for %s ...' % name)
|
||||
for cu in dwarfinfo.iter_CUs():
|
||||
if cu.cu_offset == entry.cu_ofs:
|
||||
for die in cu.iter_DIEs():
|
||||
if die.offset == entry.die_ofs:
|
||||
print('Die Name: %s' %
|
||||
bytes2str(die.attributes['DW_AT_name'].value))
|
||||
die_info_rec(die)
|
||||
|
||||
# dump all entries in .debug_pubtypes section.
|
||||
print('Dumping .debug_pubtypes table ...')
|
||||
print('-' * 66)
|
||||
print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
|
||||
print('-' * 66)
|
||||
for (name, entry) in pubtypes.items():
|
||||
print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
|
||||
print('-' * 66)
|
||||
|
||||
|
||||
def die_info_rec(die, indent_level=' '):
|
||||
""" A recursive function for showing information about a DIE and its
|
||||
children.
|
||||
"""
|
||||
print(indent_level + 'DIE tag=%s, attrs=' % die.tag)
|
||||
for name, val in die.attributes.items():
|
||||
print(indent_level + ' %s = %s' % (name, val))
|
||||
child_indent = indent_level + ' '
|
||||
for child in die.iter_children():
|
||||
die_info_rec(child, child_indent)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if sys.argv[1] == '--test':
|
||||
process_file(sys.argv[2])
|
||||
sys.exit(0)
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print('Expected usage: {0} <executable>'.format(sys.argv[0]))
|
||||
sys.exit(1)
|
||||
process_file(sys.argv[1])
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user