Merge pull request #4 from JHUAPL/pyghidra_sept25

Pyghidra sept25
This commit is contained in:
evm-sec
2025-09-27 11:49:38 -04:00
committed by GitHub
147 changed files with 1565 additions and 77620 deletions

View File

@@ -1,848 +0,0 @@
#-------------------------------------------------------------------------------
#
# IDAPython script to show many features extracted from debugging strings. It's
# also able to rename functions based on the guessed function name & rename
# functions based on the source code file they belong to.
#
# Copyright (c) 2018-2019, Joxean Koret
# Licensed under the GNU Affero General Public License v3.
#
#-------------------------------------------------------------------------------
from __future__ import print_function
import os
import re
from collections import Counter
import idaapi
from idc import *
from idaapi import *
from idautils import *
from PyQt5 import QtCore, QtGui, QtWidgets
try:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
has_nltk = True
except ImportError:
has_nltk = False
#-------------------------------------------------------------------------------
PROGRAM_NAME = "IMS"
#-------------------------------------------------------------------------------
SOURCE_FILES_REGEXP = r"([a-z_\/\\][a-z0-9_/\\:\-\.@]+\.(c|cc|cxx|c\+\+|cpp|h|hpp|m|rs|go|ml))($|:| )"
LANGS = {}
LANGS["C/C++"] = ["c", "cc", "cxx", "cpp", "h", "hpp"]
LANGS["C"] = ["c"]
LANGS["C++"] = ["cc", "cxx", "cpp", "hpp", "c++"]
LANGS["Obj-C"] = ["m"]
LANGS["Rust"] = ["rs"]
LANGS["Golang"] = ["go"]
LANGS["OCaml"] = ["ml"]
#-------------------------------------------------------------------------------
FUNCTION_NAMES_REGEXP = r"([a-z_][a-z0-9_]+((::)+[a-z_][a-z0-9_]+)*)"
CLASS_NAMES_REGEXP = r"([a-z_][a-z0-9_]+(::(<[a-z0-9_]+>|~{0,1}[a-z0-9_]+))+)\({0,1}"
NOT_FUNCTION_NAMES = ["copyright", "char", "bool", "int", "unsigned", "long",
"double", "float", "signed", "license", "version", "cannot", "error",
"invalid", "null", "warning", "general", "argument", "written", "report",
"failed", "assert", "object", "integer", "unknown", "localhost", "native",
"memory", "system", "write", "read", "open", "close", "help", "exit", "test",
"return", "libs", "home", "ambiguous", "internal", "request", "inserting",
"deleting", "removing", "updating", "adding", "assertion", "flags",
"overflow", "enabled", "disabled", "enable", "disable", "virtual", "client",
"server", "switch", "while", "offset", "abort", "panic", "static", "updated",
"pointer", "reason", "month", "year", "week", "hour", "minute", "second",
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
'september', 'october', 'november', 'december', "arguments", "corrupt",
"corrupted", "default", "success", "expecting", "missing", "phrase",
"unrecognized", "undefined",
]
#-------------------------------------------------------------------------------
FOUND_TOKENS = {}
TOKEN_TYPES = ["NN", "NNS", "NNP", "JJ", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
def nltk_preprocess(strings):
if not has_nltk:
return
strings = "\n".join(map(str, list(strings)))
tokens = re.findall(FUNCTION_NAMES_REGEXP, strings)
l = []
for token in tokens:
l.append(token[0])
word_tags = nltk.pos_tag(l)
for word, tag in word_tags:
try:
FOUND_TOKENS[word.lower()].add(tag)
except:
FOUND_TOKENS[word.lower()] = set([tag])
#-------------------------------------------------------------------------------
def get_strings(strtypes = [0, 1]):
strings = Strings()
strings.setup(strtypes = strtypes)
return strings
#-------------------------------------------------------------------------------
def get_lang(full_path):
_, file_ext = os.path.splitext(full_path.lower())
file_ext = file_ext.strip(".")
for key in LANGS:
if file_ext in LANGS[key]:
return key
return None
#-------------------------------------------------------------------------------
def add_source_file_to(d, src_langs, refs, full_path, s):
if full_path not in d:
d[full_path] = []
lang = get_lang(full_path)
if lang is not None:
src_langs[lang] += 1
for ref in refs:
d[full_path].append([ref, get_func_name(ref), str(s)])
return d, src_langs
#-------------------------------------------------------------------------------
def get_source_strings(min_len = 4, strtypes = [0, 1]):
strings = get_strings(strtypes)
# Search string references to source files
src_langs = Counter()
total_files = 0
d = {}
for s in strings:
if s and s.length > min_len:
ret = re.findall(SOURCE_FILES_REGEXP, str(s), re.IGNORECASE)
if ret and len(ret) > 0:
refs = list(DataRefsTo(s.ea))
if len(refs) > 0:
total_files += 1
full_path = ret[0][0]
d, src_langs = add_source_file_to(d, src_langs, refs, full_path, s)
# Use the loaded debugging information (if any) to find source files
for f in list(Functions()):
done = False
func = idaapi.get_func(f)
if func is not None:
cfg = idaapi.FlowChart(func)
for block in cfg:
if done:
break
for head in list(Heads(block.start_ea, block.end_ea)):
full_path = get_sourcefile(head)
if full_path is not None:
total_files += 1
d, src_langs = add_source_file_to(d, src_langs, [head], full_path, "Symbol: %s" % full_path)
nltk_preprocess(strings)
if len(d) > 0 and total_files > 0:
print("Programming languages found:\n")
for key in src_langs:
print(" %s %f%%" % (key.ljust(10), src_langs[key] * 100. / total_files))
print("\n")
return d, strings
#-------------------------------------------------------------------------------
def handler(item, column_no):
ea = item.ea
if is_mapped(ea):
jumpto(ea)
#-------------------------------------------------------------------------------
class CBaseTreeViewer(PluginForm):
def populate_tree(self, d):
# Clear previous items
self.tree.clear()
# Build the tree
for key in d:
src_file_item = QtWidgets.QTreeWidgetItem(self.tree)
src_file_item.setText(0, key)
src_file_item.ea = BADADDR
for ea, name, str_data in d[key]:
item = QtWidgets.QTreeWidgetItem(src_file_item)
item.setText(0, "%s [0x%08x] %s" % (name, ea, str_data))
item.ea = ea
self.tree.itemDoubleClicked.connect(handler)
def OnCreate(self, form):
# Get parent widget
self.parent = idaapi.PluginForm.FormToPyQtWidget(form)
# Create tree control
self.tree = QtWidgets.QTreeWidget()
self.tree.setHeaderLabels(("Names",))
self.tree.setColumnWidth(0, 100)
if self.d is None:
self.d, self.s = get_source_strings()
d = self.d
# Create layout
layout = QtWidgets.QVBoxLayout()
layout.addWidget(self.tree)
self.populate_tree(d)
# Populate PluginForm
self.parent.setLayout(layout)
def Show(self, title, d = None):
self.d = d
return PluginForm.Show(self, title, options = PluginForm.WOPN_PERSIST)
#-------------------------------------------------------------------------------
def basename(path):
pos1 = path[::-1].find("\\")
pos2 = path[::-1].find("/")
if pos1 == -1: pos1 = len(path)
if pos2 == -1: pos2 = len(path)
pos = min(pos1, pos2)
return path[len(path)-pos:]
#-------------------------------------------------------------------------------
class command_handler_t(ida_kernwin.action_handler_t):
def __init__(self, obj, cmd_id, num_args = 1):
self.obj = obj
self.cmd_id = cmd_id
self.num_args = num_args
ida_kernwin.action_handler_t.__init__(self)
def activate(self, ctx):
if self.num_args == 1:
return self.obj.OnCommand(self.cmd_id)
return self.obj.OnCommand(self.obj, self.cmd_id)
def update(self, ctx):
return idaapi.AST_ENABLE_ALWAYS
#-------------------------------------------------------------------------------
class CIDAMagicStringsChooser(Choose):
def __init__(self, title, columns, options):
Choose.__init__(self, title, columns, options)
self.actions = []
def AddCommand(self, menu_name, shortcut=None):
action_name = "IDAMagicStrings:%s" % menu_name.replace(" ", "")
self.actions.append([len(self.actions), action_name, menu_name, shortcut])
return len(self.actions)-1
def OnPopup(self, form, popup_handle):
for num, action_name, menu_name, shortcut in self.actions:
handler = command_handler_t(self, num, 2)
desc = ida_kernwin.action_desc_t(action_name, menu_name, handler, shortcut)
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
#-------------------------------------------------------------------------------
class CSourceFilesChooser(CIDAMagicStringsChooser):
def __init__(self, title):
columns = [ ["Line", 4], ["Full path", 20], ["Filename", 15], ["EA", 16], ["Function Name", 18], ["String data", 40], ]
CIDAMagicStringsChooser.__init__(self, title, columns, Choose.CH_MULTI)
self.n = 0
self.icon = -1
self.selcount = 0
self.modal = False
self.items = []
self.selected_items = []
d, s = get_source_strings()
keys = list(d.keys())
keys.sort()
i = 0
for key in keys:
for ea, name, str_data in d[key]:
line = ["%03d" % i, key, basename(key), "0x%08x" % ea, name, str_data]
self.items.append(line)
i += 1
self.d = d
self.s = s
def show(self):
ret = self.Show(False)
if ret < 0:
return False
self.cmd_all = self.AddCommand("Rename all to filename_EA")
self.cmd_all_sub = self.AddCommand("Rename all sub_* to filename_EA")
self.cmd_selected = self.AddCommand("Rename selected to filename_EA")
self.cmd_selected_sub = self.AddCommand("Rename selected sub_* to filename_EA")
return self.d
def OnCommand(self, n, cmd_id):
# Aditional right-click-menu commands handles
if cmd_id == self.cmd_all:
l = list(range(len(self.items)))
elif cmd_id == self.cmd_all_sub:
l = []
for i, item in enumerate(self.items):
if item[4] is not None and item[4].startswith("sub_"):
l.append(i)
elif cmd_id == self.cmd_selected:
l = list(self.selected_items)
elif cmd_id == self.cmd_selected_sub:
l = []
for i, item in enumerate(self.items):
if item[4].startswith("sub_"):
if i in self.selected_items:
l.append(i)
self.rename_items(l)
def rename_items(self, items):
for i in items:
item = self.items[i]
ea = int(item[3], 16)
candidate, _ = os.path.splitext(item[2])
name = "%s_%08x" % (candidate, ea)
func = idaapi.get_func(ea)
if func is not None:
ea = func.start_ea
set_name(ea, name, SN_CHECK)
else:
line = "WARNING: Cannot rename 0x%08x to %s because there is no function associated."
print(line % (ea, name))
def OnGetLine(self, n):
return self.items[n]
def OnGetSize(self):
n = len(self.items)
return n
def OnDeleteLine(self, n):
del self.items[n]
return n
def OnRefresh(self, n):
return n
def OnSelectLine(self, n):
self.selcount += 1
row = self.items[n[0]]
ea = int(row[3], 16)
if is_mapped(ea):
jumpto(ea)
def OnSelectionChange(self, sel_list):
self.selected_items = sel_list
#-------------------------------------------------------------------------------
class CCandidateFunctionNames(CIDAMagicStringsChooser):
def __init__(self, title, l):
columns = [ ["Line", 4], ["EA", 16], ["Function Name", 25], ["Candidate", 25], ["FP?", 2], ["Strings", 50], ]
CIDAMagicStringsChooser.__init__(self, title, columns, Choose.CH_MULTI)
self.n = 0
self.icon = -1
self.selcount = 0
self.modal = False
self.items = []
self.selected_items = []
i = 0
for item in l:
bin_func = item[1]
candidate = item[2]
seems_false = str(int(self.looks_false(bin_func, candidate)))
line = ["%03d" % i, "0x%08x" % item[0], item[1], item[2], seems_false, ", ".join(item[3]) ]
self.items.append(line)
i += 1
self.items = sorted(self.items, key=lambda x: x[4])
def show(self):
ret = self.Show(False)
if ret < 0:
return False
self.cmd_rename_all = self.AddCommand("Rename all functions")
self.cmd_rename_sub = self.AddCommand("Rename all sub_* functions")
self.cmd_rename_selected = self.AddCommand("Rename selected function(s)")
self.cmd_rename_sub_sel = self.AddCommand("Rename selected sub_* function(s)")
def OnCommand(self, n, cmd_id):
# Aditional right-click-menu commands handles
if cmd_id == self.cmd_rename_all:
l = list(range(len(self.items)))
elif cmd_id == self.cmd_rename_selected:
l = list(self.selected_items)
elif cmd_id == self.cmd_rename_sub:
l = []
for i, item in enumerate(self.items):
if item[2].startswith("sub_"):
l.append(i)
elif cmd_id == self.cmd_rename_sub_sel:
l = []
for i, item in enumerate(self.items):
if item[2].startswith("sub_"):
if i in self.selected_items:
l.append(i)
else:
raise Exception("Unknown menu command!")
self.rename_items(l)
def rename_items(self, items):
for i in items:
item = self.items[i]
ea = int(item[1], 16)
candidate = item[3]
set_name(ea, candidate, SN_CHECK)
def OnGetLine(self, n):
return self.items[n]
def OnGetSize(self):
n = len(self.items)
return n
def OnDeleteLine(self, n):
del self.items[n]
return n
def OnRefresh(self, n):
return n
def OnSelectLine(self, n):
self.selcount += 1
row = self.items[n[0]]
ea = int(row[1], 16)
if is_mapped(ea):
jumpto(ea)
def OnSelectionChange(self, sel_list):
self.selected_items = sel_list
def looks_false(self, bin_func, candidate):
bin_func = bin_func.lower()
candidate = candidate.lower()
if not bin_func.startswith("sub_"):
if bin_func.find(candidate) == -1 and candidate.find(bin_func) == -1:
return True
return False
def OnGetLineAttr(self, n):
item = self.items[n]
bin_func = item[2]
candidate = item[3]
if self.looks_false(bin_func, candidate):
return [0x026AFD, 0]
return [0xFFFFFF, 0]
#-------------------------------------------------------------------------------
class CClassXRefsChooser(idaapi.Choose):
def __init__(self, title, items):
idaapi.Choose.__init__(self,
title,
[ ["Address", 8], ["String", 80] ])
self.items = items
def OnGetLine(self, n):
return self.items[n]
def OnGetSize(self):
return len(self.items)
#-------------------------------------------------------------------------------
def get_string(ea):
tmp = idc.get_strlit_contents(ea, strtype=0)
if tmp is None or len(tmp) == 1:
unicode_tmp = idc.get_strlit_contents(ea, strtype=1)
if unicode_tmp is not None and len(unicode_tmp) > len(tmp):
tmp = unicode_tmp
if tmp is None:
tmp = ""
elif type(tmp) != str:
tmp = tmp.decode("utf-8")
return tmp
#-------------------------------------------------------------------------------
def classes_handler(item, column_no):
if item.childCount() == 0:
ea = item.ea
if is_mapped(ea):
jumpto(ea)
#-------------------------------------------------------------------------------
class CClassesTreeViewer(PluginForm):
def populate_tree(self):
# Clear previous items
self.tree.clear()
self.nodes = {}
self.classes = sorted(self.classes, key=lambda x: x[1][0])
for ea, tokens in self.classes:
for i, node_name in enumerate(tokens):
full_name = "::".join(tokens[:tokens.index(node_name)+1])
if full_name not in self.nodes:
if full_name.find("::") == -1:
parent = self.tree
else:
parent_name = "::".join(tokens[:tokens.index(node_name)])
try:
parent = self.nodes[parent_name]
except:
print("Error adding node?", self.nodes, parent_name, str(sys.exc_info()[1]))
node = QtWidgets.QTreeWidgetItem(parent)
node.setText(0, full_name)
node.ea = ea
self.nodes[full_name] = node
self.tree.itemDoubleClicked.connect(classes_handler)
def OnCreate(self, form):
# Get parent widget
self.parent = idaapi.PluginForm.FormToPyQtWidget(form)
# Create tree control
self.tree = QtWidgets.QTreeWidget()
self.tree.setHeaderLabels(("Classes",))
self.tree.setColumnWidth(0, 100)
# Create layout
layout = QtWidgets.QVBoxLayout()
layout.addWidget(self.tree)
self.populate_tree()
# Populate PluginForm
self.parent.setLayout(layout)
def Show(self, title, classes):
self.classes = classes
return PluginForm.Show(self, title, options = PluginForm.WOPN_PERSIST)
#-------------------------------------------------------------------------------
class CClassesGraph(idaapi.GraphViewer):
def __init__(self, title, classes, final_list):
idaapi.GraphViewer.__init__(self, title)
self.selected = None
self.classes = classes
self.final_list = final_list
self.nodes = {}
self.nodes_ea = {}
self.graph = {}
self.last_cmd = 0
dones = set()
for ea, tokens in self.classes:
refs = DataRefsTo(ea)
refs_funcs = set()
for ref in refs:
func = idaapi.get_func(ref)
if func is not None:
refs_funcs.add(func.start_ea)
if len(refs_funcs) == 1:
func_ea = list(refs_funcs)[0]
if func_ea in dones:
continue
dones.add(func_ea)
func_name = get_func_name(func_ea)
tmp = demangle_name(func_name, INF_SHORT_DN)
if tmp is not None:
func_name = tmp
element = [func_ea, func_name, "::".join(tokens), [get_string(ea)]]
self.final_list.append(element)
def OnRefresh(self):
self.Clear()
self.graph = {}
for ea, tokens in self.classes:
for node_name in tokens:
full_name = "::".join(tokens[:tokens.index(node_name)+1])
if full_name not in self.nodes:
node_id = self.AddNode(node_name)
self.nodes[full_name] = node_id
self.graph[node_id] = []
else:
node_id = self.nodes[full_name]
try:
self.nodes_ea[node_id].add(ea)
except KeyError:
self.nodes_ea[node_id] = set([ea])
parent_name = "::".join(tokens[:tokens.index(node_name)])
if parent_name != "" and parent_name in self.nodes:
parent_id = self.nodes[parent_name]
self.AddEdge(parent_id, node_id)
self.graph[parent_id].append(node_id)
return True
def OnGetText(self, node_id):
return str(self[node_id])
def OnDblClick(self, node_id):
eas = self.nodes_ea[node_id]
if len(eas) == 1:
jumpto(list(eas)[0])
else:
items = []
for ea in eas:
func = idaapi.get_func(ea)
if func is None:
s = get_strlit_contents(ea)
s = s.decode("utf-8")
if s is not None and s.find(str(self[node_id])) == -1:
s = get_strlit_contents(ea, strtype=1)
else:
s = GetDisasm(ea)
else:
s = get_func_name(func.start_ea)
items.append(["0x%08x" % ea, repr(s)])
chooser = CClassXRefsChooser("XRefs to %s" % str(self[node_id]), items)
idx = chooser.Show(1)
if idx > -1:
jumpto(list(eas)[idx])
def OnCommand(self, cmd_id):
if self.cmd_dot == cmd_id:
fname = ask_file(1, "*.dot", "Dot file name")
if fname:
f = open(fname, "w")
buf = 'digraph G {\n graph [overlap=scale]; node [fontname=Courier]; \n\n'
for n in self.graph:
name = str(self[n])
buf += ' a%s [shape=box, label = "%s", color="blue"]\n' % (n, name)
buf += '\n'
dones = set()
for node_id in self.graph:
for child_id in self.graph[node_id]:
s = str([node_id, child_id])
if s in dones:
continue
dones.add(s)
buf += " a%s -> a%s [style = bold]\n" % (node_id, child_id)
buf += '\n'
buf += '}'
f.write(buf)
f.close()
elif self.cmd_gml == cmd_id:
fname = ask_file(1, "*.gml", "GML file name")
if fname:
f = open(fname, "w")
buf = 'graph [ \n'
for n in self.graph:
name = str(self[n])
buf += 'node [ id %s \n label "%s"\n fill "blue" \n type "oval"\n LabelGraphics [ type "text" ] ] \n' % (n, name)
buf += '\n'
dones = set()
for node_id in self.graph:
for child_id in self.graph[node_id]:
s = str([node_id, child_id])
if s in dones:
continue
dones.add(s)
buf += " edge [ source %s \n target %s ]\n" % (node_id, child_id)
buf += '\n'
buf += ']'
f.write(buf)
f.close()
def OnPopup(self, form, popup_handle):
self.cmd_dot = 0
cmd_handler = command_handler_t(self, self.cmd_dot)
desc = ida_kernwin.action_desc_t("IDAMagicStrings:GraphvizExport", "Export to Graphviz",
cmd_handler, "F2")
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
self.cmd_gml = 1
cmd_handler = command_handler_t(self, self.cmd_gml)
desc = ida_kernwin.action_desc_t("IDAMagicStrings:GmlExport","Export to GML",
cmd_handler, "F3")
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
def OnClick(self, item):
self.selected = item
return True
def Show(self):
if not idaapi.GraphViewer.Show(self):
return False
return True
#-------------------------------------------------------------------------------
def show_tree(d = None):
tree_frm = CBaseTreeViewer()
tree_frm.Show(PROGRAM_NAME + ": Source code tree", d)
#-------------------------------------------------------------------------------
def seems_function_name(candidate):
if len(candidate) >= 6 and candidate.lower() not in NOT_FUNCTION_NAMES:
if candidate.upper() != candidate:
return True
return False
#-------------------------------------------------------------------------------
class CFakeString:
def __init__(self, ea, s):
self.ea = ea
self.s = s
def __str__(self):
return str(self.s)
def __repr__(self):
return self.__str__()
#-------------------------------------------------------------------------------
def find_function_names(strings_list):
rarity = {}
func_names = {}
raw_func_strings = {}
class_objects = []
class_tmp_names = []
for ea, name in Names():
func = idaapi.get_func(ea)
if func is None:
continue
true_name = name
if name.find("::") == -1:
name = demangle_name(name, INF_SHORT_DN)
if name is not None and name != "" and name.find("::") > -1:
true_name = name
if true_name.find("::") > -1:
s = CFakeString(ea, true_name)
class_tmp_names.append(s)
class_tmp_names.extend(strings_list)
for s in class_tmp_names:
# Find class members
class_ret = re.findall(CLASS_NAMES_REGEXP, str(s), re.IGNORECASE)
if len(class_ret) > 0:
for element in class_ret:
candidate = element[0]
if candidate.find("::") > 0:
tokens = candidate.split("::")
if tokens not in class_objects:
class_objects.append([s.ea, tokens])
# Find just function names
ret = re.findall(FUNCTION_NAMES_REGEXP, str(s), re.IGNORECASE)
if len(ret) > 0:
candidate = ret[0][0]
if seems_function_name(candidate):
ea = s.ea
refs = DataRefsTo(ea)
found = False
for ref in refs:
func = idaapi.get_func(ref)
if func is not None:
found = True
key = func.start_ea
if has_nltk:
if candidate not in FOUND_TOKENS:
continue
found = False
for tkn_type in TOKEN_TYPES:
if tkn_type in FOUND_TOKENS[candidate]:
found = True
break
if not found:
continue
try:
rarity[candidate].add(key)
except KeyError:
rarity[candidate] = set([key])
try:
func_names[key].add(candidate)
except KeyError:
func_names[key] = set([candidate])
try:
raw_func_strings[key].add(str(s))
except:
raw_func_strings[key] = set([str(s)])
return func_names, raw_func_strings, rarity, class_objects
#-------------------------------------------------------------------------------
def show_function_names(strings_list):
l = find_function_names(strings_list)
func_names, raw_func_strings, rarity, classes = l
final_list = []
for key in func_names:
candidates = set()
for candidate in func_names[key]:
if len(rarity[candidate]) == 1:
candidates.add(candidate)
if len(candidates) == 1:
raw_strings = list(raw_func_strings[key])
raw_strings = list(map(repr, raw_strings))
func_name = get_func_name(key)
tmp = demangle_name(func_name, INF_SHORT_DN)
if tmp is not None:
func_name = tmp
final_list.append([key, func_name, list(candidates)[0], raw_strings])
if len(classes) > 0:
class_graph = CClassesGraph(PROGRAM_NAME + ": Classes Hierarchy", classes, final_list)
class_graph.Show()
class_tree = CClassesTreeViewer()
class_tree.Show(PROGRAM_NAME + ": Classes Tree", classes)
final_list = class_graph.final_list
if len(final_list) > 0:
cfn = CCandidateFunctionNames(PROGRAM_NAME + ": Candidate Function Names", final_list)
cfn.show()
#-------------------------------------------------------------------------------
def main():
ch = CSourceFilesChooser(PROGRAM_NAME + ": Source code files")
if len(ch.items) > 0:
ch.show()
d = ch.d
if len(d) > 0:
show_tree(d)
show_function_names(ch.s)
if __name__ == "__main__":
main()

View File

@@ -1,153 +0,0 @@
##############################################################################################
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
#################################################################
#### CodeCut - Detecting Object File Boundaries in IDA Pro ####
#################################################################
**** Terminology ****
I tend to use the term "module" for a set of related functions within a binary
that came from a single object file. So you will see the terms "module" and
"object file" used interchangeabley in the CC source and documentation.
**** Dependencies ****
CodeCut relies on:
Natural Language Toolkit (NLTK) - https://www.nltk.org
Snap.py - https://snap.stanford.edu/snappy/
**** Source Files ****
cc_main.py - Main entry point - simply load this up with the
"File -> Script file..." option in IDA.
lfa.py - Analysis engine for LFA.
mc.py - Analysis engine for MaxCut.
basicutils_7x.py - Provides an API to IDA - maybe one day we'll get this
ported to Ghidra!
map_read.py - For research purposes - compares a ground truth .map
file (from ld) to a .map file from CC and produces
a score. See RECON slides or the code itself for more
info. You need to add the option -Map=<target>.map to
the linker options in a Makefile to get a .map file.
The syntax to map_read is:
python map_read.py <ground truth file> <CC map file>
**** MaxCut Parameters ****
- Right now there is only one parameter for MaxCut, a value for the maximum
module size (currently set to 16K).
**** LFA Parameters & Interpolation ****
A couple areas for research:
- The idea behind LFA is that we throw out "external" calls - we can't
determine this exactly in a binary so we throw out calls that are above a
certain threshold. This is set to 4K in the code but it could be tweaked.
- There is a threshold set for edge detection - plus a little bit of extra
logic (value has to be positive and 2 of last 3 values were negative). You
can either vary this threshold or write your own edge_detect() function.
- Currently "calls to" affinity and "calls from" affinity are treated as
separate scores. If one of these scores is zero an interpolation from
the previous score is used - just a simple linear equation assuming
decreasing scores. This could be improved a number of ways but could
be replaced with an actual interpolation between scores.
- If both "calls to" affinity and "calls from" affinity for a function are 0
the function is skipped and is essentially treated like it's not there.
This happens for functions with no references or where all references are
above the "external" threshold. This means there can be gaps between the
modules in the output list.
- The portion of code that tries to name object files based on common strings
is completely researchy and open ended. Lots of things to play with there.
**** MaxCut Parameters & Interpolation ****
- The only real parameter for MaxCut is a THRESHOLD variable that corresponds to the size at which the algorithm will stop subdividing modules. A threshold of 4K (0x1000) seems to provide similar sized modules to LFA. A threshold of 8K (0x2000) seems to be a good upper bound. A good area of research would be making this not a static cutoff but maybe deciding to stop subdividing based on a connectedness measurement or something along those lines.
**** Output Files ****
CodeCut produces 7 files:
<target>_cc_results.csv - Raw score output from LFA and MaxCut, including where
edges are detected. Graphs can fairly easily be
generated in your favorite spreadsheet program.
<target>_{lfa,mc}_labels.py - Script that can be used to label your DB with CC's
output. After determining module boundaries, CC
attempts to guess the name (fun!) by looking at
common strings used by the module, for both the
LFA and MaxCut module lists. You can use this
script as a scratchpad to name unnamed modules as you
determine what they are, or you can also use other
functions in basicutils to change module names later.
<target>_{lfa,mc}_map.map - A .map file similar to the output from the ld. This is
for the purposes of comparing to a ground truth .map
file to test CC when you have source code.
<target>_{lfa,mc}_mod_graph.gv - a Graphviz graph file of the module relationships
This is a directed graph where a -> b indicates
that a function in module a calls a function in
module b. This may take a long time to render if
you have a large binary (more than a couple
hundred modules detected). For smaller binaries
this can pretty clearly communicate the software
architecture immediately. For larger binaries
this will show you graphically the most heavily
used modules in the binary.
You can use sfdp to render the graph into a PNG file with a command line like:
sfdp -x -Goverlap=scale -Tpng -Goutputorder=edgesfirst -Nstyle=filled -Nfillcolor=white <target>_lfa_mod_graph.gv > <target>.png
A really nice hierarchical graph can be obtained by adding:
ranksep=0
nodesep=0
to the .gv file and running:
dot -x -Goverlap=scale -Tpng -Goutputorder=edgesfirst -Nstyle=filled -Nfillcolor=white <target>.gv > <target>.png
**** "Canonical" Names ****
NOTE on IDA and Canonical Names:
AFAICT IDA doesn't really have a concept of source file / object files in
the database (it does with source-level debugging but that's it I think).
In my ideal world, I'd write a nice GUI plugin to manage the object file
names and regions, and then you'd be able to select how to display object/
function names in the disassembly. For now though I have to save both the
object name and function name in the filename.
For now, my hacky workaround is to name modules and functions in camel case
(e.g. ReadNetworkString, or HtmlParsingEngine), and then combine them together
in a nasty snake case "canonical" format, that looks like:
<ObjectName>_<FunctionName>_<Address>
That way I can parse out function and object names to be able to rename
objects. I am open to suggestions on better ways to do this.

View File

@@ -1,366 +0,0 @@
##############################################################################################
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
# basicutils - a version-agnostic API for IDA Pro with some (slightly) higher level functionality
# This is the 7.x version - see basicutils_6x for the 7.x version
import os
import ida_bytes
import ida_funcs
import ida_nalt
import ida_ua
import ida_name
import idc
import struct
import idautils
import ida_idaapi
import ida_segment
import re
BADADDR = ida_idaapi.BADADDR
def SegByName(n):
t = ida_segment.get_segm_by_name(n)
if (t and t.start_ea != ida_idaapi.BADADDR):
start = t.start_ea
end = t.end_ea
else:
start = ida_idaapi.BADADDR
end = ida_idaapi.BADADDR
return (start,end)
def GetFunctionName(x):
return idc.get_func_name(x)
def GetInputFile():
return idc.get_root_filename()
def GetIdbFile():
return idc.get_idb_path()
def GetRootName():
return os.path.join(os.path.dirname(GetIdbFile()), os.path.basename(GetInputFile()))
def NextFunction(x):
return idc.get_next_func(x)
def PrevFunction(x):
return idc.get_prev_func(x)
MAX_OPCODE_LEN = 15
def PrevInstr(ea):
# TODO this will return an inst_t type. Need to figure out how to populate it/make workflow happy
out=ida_ua.insn_t()
ida_ua.decode_prev_insn(out, ea)
return out.ea
def CodeRefsTo(target):
return idautils.CodeRefsTo(target,0)
def ForEveryUniqXrefTo( target, fun ):
a = 0
for xref in idautils.CodeRefsTo(target,0):
if idc.get_func_attr(xref,idc.FUNCATTR_START) != a :
fun(xref)
a = idc.get_func_attr(xref, idc.FUNCATTR_START);
def ForEveryXrefTo( target, fun ):
for xref in idautils.CodeRefsTo(target,0):
fun(xref)
def ForEveryUniqXrefToD( target, fun ):
a = 0
for xref in idautils.CodeRefsTo(target,0):
if idc.get_func_attr(xref,idc.FUNCATTR_START) != a :
fun(xref, target)
a = idc.get_func_attr(xref, idc.FUNCATTR_START);
def ForEveryXrefToD( target, fun ):
for xref in idautils.CodeRefsTo(target,0):
fun(xref, target)
def ForEveryFuncInDb( fun ):
f = NextFunction(0)
while (f != ida_idaapi.BADADDR):
"""print "ev: %#x" % f"""
fun(f)
f=NextFunction(f)
def ForEveryFuncInSeg( seg, fun ):
start,end = SegByName(".text")
if (start == BADADDR):
start = NextFunction(0)
end = BADADDR
f = start
while (f < end):
"""print "ev: %#x" % f"""
print(f)
fun(f)
f=NextFunction(f)
def NFuncUp( fun, n ) :
i=0
f=fun
while ((i<n) and (f!=ida_idaapi.BADADDR)):
f=PrevFunction(f)
i=i+1
return f
def NFuncDown( fun, n ) :
i=0
f=fun
while ((i<n) and (f!=ida_idaapi.BADADDR)):
f=NextFunction(f)
i=i+1
return f
def FuncMidPt( fun ):
fstart = idc.get_func_attr(fun, idc.FUNCATTR_START)
fend = idc.get_func_attr(fun, idc.FUNCATTR_END)
return fstart+((fend-fstart)/2)
def FuncXrefsFrom ( fun ) :
f = set()
for item in idautils.FuncItems(fun):
for x in idautils.CodeRefsFrom(item,0):
s = idc.get_func_attr(x, idc.FUNCATTR_START)
if (x == s):
f.add(x)
#print "func xrefs from"
#print f
return f
def XrefFromRange ( fun ) :
f = FuncXrefsFrom(fun)
if f:
return (min(f),max(f))
else:
return (0,0)
def ProgramAddrRange() :
return ida_funcs.get_prev_func(ida_idaapi.BADADDR) - ida_funcs.get_next_func(0)
def MemCopy( dest, src, length ) :
for i in range(0, length):
#if (i < 20):
# print "set byte at %#x to %#x" % (dest+i, idc.Byte(src+i))
ida_bytes.patch_byte(dest+i,ida_bytes.get_byte(src+i))
def PrefixRange(start, end, prefix) :
x = start
while x < end:
n = idc.get_func_name(x)
if n.startswith("sub_"):
nn = prefix + n
print("Renaming %s to %s\n" % (n, nn))
ida_name.set_name(x,nn)
x = NextFunction(x)
def snakeToCamelCase(s):
f = s.lstrip("_")
nf = ""
nx = 0
x=0
while (x<len(f)):
#print "%s" % (f[x])
if f[x] == '_':
nf+=(f[x+1].upper())
x+=2
else:
nf+=f[x]
x+=1
nx+=1
return nf
def isSnakeCase(s) :
p = re.compile("[a-zA-Z0-9]+(_[a-zA-Z0-9]+)+\Z")
if p.match(s):
return True
return False
#Todo - right now this is going to miss something like FooBARFunction
def isCamelCase(s) :
p = re.compile("([A-Z][a-z0-9]+)([A-Z][a-z0-9]+)+\Z")
if p.match(s):
return True
return False
#Todo - weed out if it's all uppercase or all uppercase and _, etc.
def isUCSnakeCase(s):
p = re.compile("[A-Z0-9]+(_[A-Z0-9]+)+\Z")
if p.match(s):
return True
return False
def isPlausibleFunction(s):
if isSnakeCase(s):
if isUCSnakeCase(s):
return False
return True
if isCamelCase(s):
return True
return False
def PrependStrToFuncName(f,s):
n = idc.get_func_name(f)
n = s + n
ida_name.set_name(f,n)
#The "canonical" name format (for now) is <module name>_<func name>_<address>
#where <module_name> and <func_name> are in camel case.
#This is not ideal for a number of reasons but this is a workaround for now
#Return just the "function name" part of the canonical name
def GetCanonicalName(f):
n = idc.get_func_name(f)
parts = n.split("_")
if len(parts) == 3:
return parts[1]
else:
return None
#Put function in canonical format, given the function name and module name
def NameCanonical(f,mod_name,func_name):
n = "%s_%s_%08x" % (mod_name,func_name,f)
print("Renaming %s to %s\n" % (idc.get_func_name(f),n))
ida_name.force_name(f,n)
#Put function in canonical format when it doesn't have a name, but you know the module name
def RenameFuncWithAddr(f,s):
func_name = "unk"
NameCanonical(f,s,func_name)
#Use this if you have pre-existing named functions in the DB that are in non-canonical format
def RenameRangeWithAddr(start,end,s):
x = start
while (x<=end):
n = idc.get_func_name(x)
if (n.startswith("sub_")):
RenameFuncWithAddr(x,s)
else:
NameCanonical(x,s,n)
x = NextFunction(x)
#Rename a function in canonical format without changing the module name
def CanonicalFuncRename(f,name):
n = idc.get_func_name(f)
parts = n.split("_")
new_name = "%s_%s_%08x" % (parts[0],name,f)
print("Renaming %s to %s\n" % (n, new_name))
ida_name.set_name(f,new_name)
#Rename the module name without changing the function name
def RenameFuncWithNewMod(f,mod):
n = idc.get_func_name(f)
parts = n.split("_")
new_name = "%s_%s_%08x" % (mod,parts[1],f)
print("Renaming %s to %s\n" % (n, new_name))
ida_name.set_name(f,new_name)
#Rename a module (all functions that start with <mod>_)
def RenameMod(orig, new):
i = idc.get_next_func(0)
while (i != BADADDR):
n = idc.get_func_name(i)
if n.startswith(orig+"_"):
RenameFuncWithNewMod(i,new)
i = NextFunction(i)
#Just rename the module over a given range (can be used to split a module and give part a new name)
def RenameModRange(start, end, new):
x = start
while (x<=end):
n = idc.get_func_name(x)
RenameFuncWithNewMod(x,new)
x = NextFunction(x)
#Given a range of functions, some of which may have names and module names
# and a module name, put names in canonical format
def CanonicalizeRange(start,end,mod):
x = start
while (x<=end):
n = idc.get_func_name(x)
#if it already starts with mod name, assume it's canonical
if (not n.startswith(mod+"_")):
if (n.startswith("sub_")):
RenameFuncWithAddr(x,mod)
#this should be contains "_"
elif ("_" in n):
n = snakeToCamelCase(n)
NameCanonical(x,mod,n)
else:
NameCanonical(x,mod,n)
x = NextFunction(x)
#Returns a string that is the concatenation of all of the string references from a function, separated by <sep>
#Iterates through every item in function and looks for data references that are strings
def CompileTextFromFunction(f,sep):
s=""
faddr = list(idautils.FuncItems(f))
for c in range(len(faddr)):
for d in idautils.DataRefsFrom(faddr[c]):
t = ida_nalt.get_str_type(d)
if ((t==0) or (t==3)):
s += " "+ sep + " " + idc.GetStrLitContents(d)
return s
#Returns a string which is the concatenation all of the string references
# for an address range in the program, separated by <sep>
#Similar to above, but iterates over the whole set of functions in the given range
def CompileTextFromRange(start,end,sep):
x = start
s = ""
while (x<=end):
faddr = list(idautils.FuncItems(x))
#print "items list: %d" % len(faddr)
for c in range(len(faddr)):
for d in idautils.DataRefsFrom(faddr[c]):
#print "Found ref at %x: %x " % (faddr[c],d)
t = ida_nalt.get_str_type(d)
if ((t==0) or (t==3)):
s += " " + sep + " " + GetStrLitContents(d).decode("utf-8")
x = NextFunction(x)
return s
#Returns a string which is a concatenation of all the function names in the given range
# separated by <sep>
def CompileFuncNamesFromRangeAsText(start,end,sep):
x = start
s = ""
while (x<=end):
n = idc.get_func_name(x)
if (not n.startswith("sub_")):
s += " " + sep + " " + n
x = NextFunction(x)
return s
#helper function which checks for both ASCII and Unicode strings at the given ea
def GetStrLitContents(ea):
potential_len = ida_bytes.get_max_strlit_length(ea, ida_nalt.STRTYPE_C_16)
if(potential_len > 0):
# If we get a non zero length, this is likely our string
return ida_bytes.get_strlit_contents(ea, potential_len, ida_nalt.STRTYPE_C_16)
# If we didn't get a good length out of C_16, try 8 bit strings
potential_len = ida_bytes.get_max_strlit_length(ea, ida_nalt.STRTYPE_C)
if(potential_len > 0):
return ida_bytes.get_strlit_contents(ea, potential_len, ida_nalt.STRTYPE_C)
#print("Error! %lu not a string" % (ea))
return ""

View File

@@ -1,161 +0,0 @@
##############################################################################################
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
import basicutils_7x as basicutils
import json
import os
import modnaming
## Utilities
#escape_for_graphviz()
#Return the string escaped for usage in a GraphViz file
def escape_for_graphviz(string):
return json.dumps(string)
## CodeCut Basics
## A couple of functions for working with function and module lists and outputting results
#locate_module()
#Return the module information for a given function
#This assumes that the module list is in order, but not necessarily contiguous
def locate_module(module_list, f):
found=0
c=0
#print "Finding %08x in module list length: %d" % (f,len(module_list))
while ( (found != 1) and (c < len(module_list))):
m = module_list[c]
#print "\t%x - %x: %s" % (m.start,m.end,m.name)
#this is the case where a function falls in the cracks between modules (because it wasn't cool enough to get a score)
if (f < m.start):
found = 1
ret = None
elif ((f >= m.start) and (f <= m.end)):
found = 1
ret = m
c+=1
return m
#gen_mod_graph()
#Output a module-to-module call graph in GraphViz format
#For each module m_1
# For each function <f> in the module
# For each function that <f> calls
# Lookup the module info for <f> m_2
# If it's been assigned a module, add edge m_1 -> m_2 to the graph
def gen_mod_graph(module_list, suffix):
c=0
g=set()
while (c < len(module_list)):
m = module_list[c]
f = m.start
while (f <= m.end):
for xref in basicutils.FuncXrefsFrom(f):
target = locate_module(module_list,xref)
if (target):
g.add((m.name,target.name))
f = basicutils.NextFunction(f)
c+=1
root_name = basicutils.GetRootName()
file = open(root_name + "_" + suffix + "_mod_graph.gv", "w")
file.write("digraph g {\n")
for (node1,node2) in g:
line = "%s -> %s\n" % (escape_for_graphviz(node1),escape_for_graphviz(node2))
file.write(line)
file.write("}\n")
file.close()
#gen_rename_script()
#Output the module list with names as a Python script
#This script can then be run on the database if in the same directory as the basicutils libraries
#Look at basicutils.RenameRangeWithAddr to see the "canonical" name format -
# you can also tweak that function to use a different naming convention
def gen_rename_script(module_list, suffix):
c=0
root_name = basicutils.GetRootName()
file = open(root_name + "_" + suffix + "_labels.py", "w")
#if (IDA_VERSION < 7):
# file.write("import basicutils_6x as basicutils\n");
#else:
file.write("import basicutils_7x as basicutils\n");
file.write("\ndef go():\n");
while (c<len(module_list)):
m=module_list[c]
file.write("\tbasicutils.RenameRangeWithAddr(0x%x,0x%x,%r)\n"%(m.start,m.end,m.name))
c+=1
file.write("\n")
file.write("if __name__ == \"__main__\":\n")
file.write("\treload(basicutils)\n")
file.write("\tgo()\n")
file.close()
#gen_map_file()
#Produce a .map file similar to that produced by the ld option -Map=foo.map
#Use map_read.py to test accuracy when a ground truth map file is available
def gen_map_file(module_list, suffix):
c=0
root_name = basicutils.GetRootName()
file = open(root_name + "_" + suffix + "_map.map", "w")
while (c<len(module_list)):
m=module_list[c]
#mlen = basicutils.NextFunction(m.end) - m.start
mlen = m.end - m.start
mlen_str = "0x%x" % mlen
file.write("%s0x%016x%s %s\n" % (" .text".ljust(16),m.start,mlen_str.rjust(11),m.name))
c+=1
file.close()
#print_results():
#Write all of the results to <target>.csv - which can be opened in your favorite spreadsheet program
def print_results(function_list, module_list1, module_list2):
c=0
root_name = basicutils.GetRootName()
file = open(root_name + "_cc_results.csv", "w")
#write header
file.write("Function,Function #,LFA Score 1,LFA Score 2,LFA Total,LFA Edge,MC Edge,Function Name,Suggested Mod Name (LFA), Suggested Mod Name(MC),Source Str Ref\n");
while (c<len(function_list)):
f = function_list[c]
fname = basicutils.GetFunctionName(f.loc)
m1 = locate_module(module_list1, f.loc)
m2 = locate_module(module_list2, f.loc)
mname1 = m1.name
mname2 = m2.name
#hacky - should actually find the extent of the function
#for now we'll just skip the last one
if (c < (len(function_list) - 1)):
nf = basicutils.NextFunction(f.loc)
func_str_ref, score = modnaming.source_file_strings(f.loc, nf-1)
else:
func_str_ref=""
line = "0x%08x, %d , %f, %f, %f, %d, %d, %s, %s, %s, %s\n" % (f.loc,c+1,f.score1, f.score2, f.total_score,f.edge[0],f.edge[1],fname, mname1, mname2, func_str_ref)
file.write(line)
c+=1

View File

@@ -1,63 +0,0 @@
##############################################################################################
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
import maxcut
import lfa
import module
import modnaming
import cc_base
import basicutils_7x as basicutils
import snap_cg
import imp
def go():
#Do LFA and MaxCut Analysis to find module boundaries
lfa_funclist, lfa_modlist = lfa.analyze()
merge_flist,maxcut_modlist = maxcut.analyze(lfa_funclist)
#Guess names for the modules using NLP
lfa_modlist = modnaming.guess_module_names(lfa_modlist)
maxcut_modlist = modnaming.guess_module_names(maxcut_modlist)
#Output all results as .csv
cc_base.print_results(merge_flist, lfa_modlist, maxcut_modlist)
#Output module-to-module call graph as a Graphviz .gv file
cc_base.gen_mod_graph(lfa_modlist, "lfa")
cc_base.gen_mod_graph(maxcut_modlist, "mc")
#Output a Python script that will rename modules
cc_base.gen_rename_script(lfa_modlist, "lfa")
cc_base.gen_rename_script(maxcut_modlist, "mc")
#Output .map file (for comparison against ground truth, when available)
cc_base.gen_map_file(lfa_modlist, "lfa")
cc_base.gen_map_file(maxcut_modlist, "mc")
return True
if __name__ == "__main__":
imp.reload(modnaming)
imp.reload(module)
imp.reload(cc_base)
imp.reload(lfa)
imp.reload(maxcut)
imp.reload(snap_cg)
imp.reload(basicutils)
go()

View File

@@ -1,103 +0,0 @@
import idc
import ida_kernwin
import imp
import snap_cg
import lfa
import maxcut
import module
import cc_base
import modnaming
import basicutils_7x as basicutils
from PyQt5 import QtCore, QtGui, QtWidgets
from IDAMagicStrings import get_source_strings
#-------------------------------------------------------------------------------
def handler(item, column_no):
if item.ignore:
return
ea = item.ea
if is_mapped(ea):
jumpto(ea)
#-------------------------------------------------------------------------------
class CBaseTreeViewer(ida_kernwin.PluginForm):
def populate_tree(self):
# Clear previous items
self.tree.clear()
# Get source file names
self.dict, _ = get_source_strings()
module_names = {}
for key in self.dict:
for values in self.dict[key]:
ea, module_name = values[0], values[2]
module_names[ea] = module_name
self.modules_cache = {}
#Do LFA and MaxCut Analysis to find module boundaries
_, lfa_modlist = lfa.analyze()
for module_data in lfa_modlist:
module_name = "Module 0x%08x:0x%08x" % (module_data.start, module_data.end)
for ea in module_names:
if ea >= module_data.start and ea <= module_data.end:
module_name = module_names[ea]
break
if module_name in self.modules_cache:
item = self.modules_cache[module_name]
else:
item = QtWidgets.QTreeWidgetItem(self.tree)
item.setText(0, module_name)
item.ea = module_data.start
item.ignore = True
self.modules_cache[module_name] = item
for func in Functions(module_data.start, module_data.end):
node = QtWidgets.QTreeWidgetItem(item)
node.setText(0, "0x%08x: %s" % (func, idc.get_func_name(func)))
node.ea = func
node.ignore = False
self.tree.itemDoubleClicked.connect(handler)
def OnCreate(self, form):
# Get parent widget
self.parent = ida_kernwin.PluginForm.FormToPyQtWidget(form)
# Create tree control
self.tree = QtWidgets.QTreeWidget()
self.tree.setHeaderLabels(("Names",))
self.tree.setColumnWidth(0, 100)
# Create layout
layout = QtWidgets.QVBoxLayout()
layout.addWidget(self.tree)
self.populate_tree()
# Populate PluginForm
self.parent.setLayout(layout)
def Show(self, title):
return ida_kernwin.PluginForm.Show(self, title, options = ida_kernwin.PluginForm.WOPN_PERSIST)
#-------------------------------------------------------------------------------
def main():
tree_frm = CBaseTreeViewer()
tree_frm.Show("Object Files")
if __name__ == "__main__":
imp.reload(modnaming)
imp.reload(module)
imp.reload(cc_base)
imp.reload(lfa)
imp.reload(maxcut)
imp.reload(snap_cg)
imp.reload(basicutils)
main()

View File

@@ -1,26 +0,0 @@
This is a dataset for exploring other solutions to the CodeCut problem or
improving LFA.
Each of the targets is an ELF file with DWARF debug information, as well as
a .map file that was produced by ld at link time (showing object file
boundaries). Some of the larger targets have a trimmed .map file
(with "_trim") in the filename - this is basically just the object file
boundaries to make parsing faster.
You can use map_read.py to test LFA output (or any other solution to the
CodeCut problem)
Syntax:
map_read.py <ground truth map file> <CodeCut/LFA map file>
This will output a 3 part score: match %, gap %, and underlap %
Expected results for LFA:
Gnuchess - Linux binary (x86) 76.1 3.2 20.7
PX4 Firmware / NuttX (ARM) 82.2 13.6 4.2
GoodFET 41 Firmware (msp430) 76.1 0 23.9
Tmote Sky Firmware / Contiki (msp430) 93.3 0 6.7
NXP Httpd Demo / FreeRTOS (ARM) 86.7 1.4 11.9
A perfect result would be 100% match with no gap and no underlap.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,907 +0,0 @@
.text 0x0000000000402470 0x2e012
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
.text.unlikely
0x0000000000402470 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o
.text.unlikely
0x0000000000402470 0x0 main.o
.text.unlikely
0x0000000000402470 0x0 components.o
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(cmd.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(debug.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(epd.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(genmove.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(init.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(move.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(output.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(players.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(pgn.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(solve.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(swap.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(util.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(engine.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(lexpgn.o)
.text.unlikely
0x0000000000402470 0x0 frontend/libfrontend.a(atak.o)
.text.unlikely
0x0000000000402470 0x0 adapter/libadapter.a(main.o)
.text.unlikely
0x0000000000402470 0x0 adapter/libadapter.a(option.o)
.text.unlikely
0x0000000000402470 0x0 adapter/libadapter.a(piece.o)
.text.unlikely
0x0000000000402470 0x0 adapter/libadapter.a(square.o)
.text.unlikely
0x0000000000402470 0x0 adapter/libadapter.a(uci.o)
.text.unlikely
0x0000000000402470 0x0 adapter/libadapter.a(util.o)
.text.unlikely
0x0000000000402470 0xd4 adapter/libadapter.a(adapter.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(attack.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(board.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(book.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(book_make.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(book_merge.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(colour.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(engine.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(epd.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(fen.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(game.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(hash.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(io.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(line.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(list.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(move.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(move_do.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(move_gen.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(move_legal.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(parse.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(pgn.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(posix.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(random.o)
.text.unlikely
0x0000000000402544 0x0 adapter/libadapter.a(san.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(main.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(move_do.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(option.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(pawn.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(piece.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(protocol.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(pst.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(random.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(search.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(search_full.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(see.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(sort.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(square.o)
.text.unlikely
0x0000000000402544 0x0 engine/libengine.a(trans.o)
.text.unlikely
0x0000000000402544 0x1c engine/libengine.a(util.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(value.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(vector.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(attack.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(board.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(book.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(eval.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(fen.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(hash.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(list.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(material.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(move.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(move_check.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(move_evasion.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(move_gen.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(move_legal.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(posix.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(pv.o)
.text.unlikely
0x0000000000402560 0x0 engine/libengine.a(recog.o)
.text.unlikely
0x0000000000402560 0x0 /usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
.text.startup 0x0000000000402560 0x79b main.o
0x0000000000402560 main
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
*fill* 0x0000000000402cfb 0x5
.text 0x0000000000402d00 0x2a /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crt1.o
0x0000000000402d00 _start
.text 0x0000000000402d2a 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o
*fill* 0x0000000000402d2a 0x6
.text 0x0000000000402d30 0xc6 /usr/lib/gcc/x86_64-linux-gnu/5/crtbegin.o
.text 0x0000000000402df6 0x0 main.o
*fill* 0x0000000000402df6 0xa
.text 0x0000000000402e00 0x1ab components.o
0x0000000000402e00 engine_func(void*)
0x0000000000402e20 InitAdapter()
0x0000000000402eb0 InitEngine()
0x0000000000402f10 adapter_func(void*)
0x0000000000402f30 TerminateAdapterEngine()
*fill* 0x0000000000402fab 0x5
.text 0x0000000000402fb0 0x2863 frontend/libfrontend.a(cmd.o)
0x0000000000402fb0 cmd_variant()
0x0000000000402fc0 cmd_bk()
0x0000000000402fd0 cmd_movenow()
0x0000000000402fe0 cmd_protover()
0x0000000000402ff0 cmd_otim()
0x0000000000403000 cmd_ics()
0x0000000000403010 cmd_hint()
0x0000000000403020 cmd_hard()
0x0000000000403030 cmd_easy()
0x0000000000403040 cmd_accepted()
0x0000000000403050 cmd_activate()
0x0000000000403080 cmd_black()
0x00000000004030b0 cmd_graphic()
0x00000000004030e0 cmd_nographic()
0x0000000000403110 cmd_random()
0x0000000000403140 cmd_switch()
0x0000000000403170 cmd_white()
0x00000000004031a0 cmd_test()
0x00000000004031d0 cmd_analyze()
0x0000000000403200 cmd_force()
0x0000000000403230 cmd_manual()
0x0000000000403260 cmd_nopost()
0x0000000000403290 cmd_quit()
0x0000000000403370 cmd_hash()
0x0000000000403420 cmd_null()
0x00000000004034d0 cmd_time()
0x00000000004035b0 cmd_depth()
0x0000000000403660 cmd_ping()
0x00000000004036a0 cmd_post()
0x00000000004036e0 cmd_go()
0x0000000000403780 cmd_level()
0x00000000004038d0 cmd_st()
0x0000000000403990 cmd_rating()
0x0000000000403a10 cmd_new()
0x0000000000403a90 cmd_undo()
0x0000000000403b60 cmd_remove()
0x0000000000403c40 cmd_result()
0x0000000000403cf0 cmd_solve()
0x0000000000403d00 cmd_usage()
0x0000000000403f60 cmd_usermove()
0x0000000000404110 cmd_help()
0x0000000000404270 cmd_version()
0x00000000004042b0 cmd_exit()
0x00000000004042f0 cmd_xboard()
0x0000000000404390 cmd_book()
0x00000000004045f0 cmd_pgnload()
0x0000000000404760 cmd_edit()
0x0000000000404790 cmd_memory()
0x0000000000404860 cmd_list()
0x00000000004048f0 cmd_name()
0x0000000000404a20 cmd_next()
0x0000000000404af0 cmd_previous()
0x0000000000404bc0 cmd_last()
0x0000000000404d30 cmd_first()
0x0000000000404d50 cmd_pgnreplay()
0x0000000000404fb0 cmd_save()
0x0000000000404fd0 cmd_pgnsave()
0x0000000000404ff0 cmd_show()
0x0000000000405270 cmd_rejected()
0x0000000000405280 parse_input()
0x0000000000405550 check_board()
0x00000000004055d0 cmd_load()
0x0000000000405780 cmd_setboard()
*fill* 0x0000000000405813 0xd
.text 0x0000000000405820 0x23 frontend/libfrontend.a(debug.o)
0x0000000000405820 dbg_open(char const*)
0x0000000000405830 dbg_printf(char const*, ...)
0x0000000000405840 dbg_close()
*fill* 0x0000000000405843 0xd
.text 0x0000000000405850 0xeee frontend/libfrontend.a(epd.o)
0x0000000000405850 ParseEPD(char*)
0x0000000000406210 ReadEPDFile(char const*, short)
0x00000000004063d0 LoadEPD(char*)
0x00000000004064a0 SaveEPD(char*)
*fill* 0x000000000040673e 0x2
.text 0x0000000000406740 0x3460 frontend/libfrontend.a(genmove.o)
0x0000000000406740 GenMoves(short)
0x0000000000407890 GenNonCaptures(short)
0x00000000004084a0 GenCaptures(short)
0x0000000000409290 GenCheckEscapes(short)
0x0000000000409ab0 FilterIllegalMoves(short)
.text 0x0000000000409ba0 0x1091 frontend/libfrontend.a(init.o)
0x0000000000409ba0 InitLzArray()
0x0000000000409be0 InitBitPosArray()
0x0000000000409c20 InitMoveArray()
0x0000000000409d30 InitRay()
0x0000000000409e80 InitFromToRay()
0x0000000000409fc0 InitRankFileBit()
0x000000000040a010 InitBitCount()
0x000000000040a080 InitRotAtak()
0x000000000040a720 InitVars()
0x000000000040aaa0 Initialize()
0x000000000040abd0 NewPosition()
*fill* 0x000000000040ac31 0xf
.text 0x000000000040ac40 0x1d9f frontend/libfrontend.a(move.o)
0x000000000040ac40 MakeMove(int, int*)
0x000000000040b600 UnmakeMove(int, int*)
0x000000000040bb20 SANMove(int, int)
0x000000000040be80 IsInMoveList(int, int, int, char)
0x000000000040bf10 AlgbrMove(int)
0x000000000040bfa0 ValidateMove(char*)
*fill* 0x000000000040c9df 0x1
.text 0x000000000040c9e0 0xd9a frontend/libfrontend.a(output.o)
0x000000000040c9e0 ShowTime()
0x000000000040c9f0 ShowMoveList(int)
0x000000000040cac0 ShowSmallBoard()
0x000000000040ce20 ShowBoard()
0x000000000040d560 ShowCBoard()
0x000000000040d5e0 ShowMvboard()
0x000000000040d650 ShowGame()
*fill* 0x000000000040d77a 0x6
.text 0x000000000040d780 0x572 frontend/libfrontend.a(players.o)
0x000000000040d870 DBSortPlayer(char const*)
0x000000000040d910 DBWritePlayer()
0x000000000040d9a0 DBReadPlayer()
0x000000000040da30 DBListPlayer(char const*)
0x000000000040db10 DBSearchPlayer(char const*)
0x000000000040db90 DBUpdatePlayer(char const*, char const*)
*fill* 0x000000000040dcf2 0xe
.text 0x000000000040dd00 0x6da frontend/libfrontend.a(pgn.o)
0x000000000040dd00 PGNSaveToFile(char const*, char const*)
0x000000000040e270 PGNReadFromFile(char const*, int)
0x000000000040e390 IsTrustedPlayer(char const*)
*fill* 0x000000000040e3da 0x6
.text 0x000000000040e3e0 0x205 frontend/libfrontend.a(solve.o)
0x000000000040e3e0 Solve(char*)
*fill* 0x000000000040e5e5 0xb
.text 0x000000000040e5f0 0x513 frontend/libfrontend.a(swap.o)
0x000000000040e5f0 AddXrayPiece(int, int, int, unsigned long*, unsigned long*)
0x000000000040e700 SwapOff(int)
*fill* 0x000000000040eb03 0xd
.text 0x000000000040eb10 0x371 frontend/libfrontend.a(util.o)
0x000000000040eb10 UpdateFriends()
0x000000000040eb90 UpdateCBoard()
0x000000000040ec70 UpdateMvboard()
0x000000000040ecc0 ValidateBoard()
*fill* 0x000000000040ee81 0xf
.text 0x000000000040ee90 0xf42 frontend/libfrontend.a(engine.o)
0x000000000040efa0 InitFrontend()
0x000000000040efc0 SendToEngine(char*)
0x000000000040f040 ReadFromEngine()
0x000000000040f1c0 ReadFromUser()
0x000000000040f310 SetDataToEngine(char const*)
0x000000000040f330 ExpectAnswerFromEngine(int)
0x000000000040f340 ShowPrompt()
0x000000000040f430 NextUserCmd()
0x000000000040f610 NextEngineCmd()
0x000000000040f8b0 SetUserInputValidMove(int)
0x000000000040f8c0 ChangeColor(int)
0x000000000040f8d0 SetAutoGo(int)
0x000000000040f8e0 GetAutoGo()
0x000000000040f8f0 SolvePosition(char*, char const*)
0x000000000040fae0 ForwardUserInputToEngine()
0x000000000040fc90 ForwardEngineOutputToUser()
*fill* 0x000000000040fdd2 0xe
.text 0x000000000040fde0 0x22f5 frontend/libfrontend.a(lexpgn.o)
0x000000000040ff80 return_append_str(char*, char const*)
0x0000000000410020 append_str(char**, char const*)
0x0000000000410040 append_comment(char const*)
0x00000000004100a0 yy_switch_to_buffer(yy_buffer_state*)
0x0000000000410150 yy_delete_buffer(yy_buffer_state*)
0x00000000004101c0 yy_flush_buffer(yy_buffer_state*)
0x00000000004102d0 yy_create_buffer(_IO_FILE*, int)
0x0000000000410330 yyrestart(_IO_FILE*)
0x0000000000410400 yylex()
0x0000000000411cd0 yypush_buffer_state(yy_buffer_state*)
0x0000000000411d90 yypop_buffer_state()
0x0000000000411e20 yy_scan_buffer(char*, unsigned long)
0x0000000000411ec0 yy_scan_bytes(char const*, unsigned long)
0x0000000000411f40 yy_scan_string(char const*)
0x0000000000411f60 yyget_lineno()
0x0000000000411f70 yyget_in()
0x0000000000411f80 yyget_out()
0x0000000000411f90 yyget_leng()
0x0000000000411fa0 yyget_text()
0x0000000000411fb0 yyset_lineno(int)
0x0000000000411fc0 yyset_in(_IO_FILE*)
0x0000000000411fd0 yyset_out(_IO_FILE*)
0x0000000000411fe0 yyget_debug()
0x0000000000411ff0 yyset_debug(int)
0x0000000000412000 yylex_destroy()
0x00000000004120b0 yyalloc(unsigned long)
0x00000000004120c0 yyrealloc(void*, unsigned long)
0x00000000004120d0 yyfree(void*)
*fill* 0x00000000004120d5 0xb
.text 0x00000000004120e0 0x4ff frontend/libfrontend.a(atak.o)
0x00000000004120e0 SqAtakd(short, short)
0x00000000004122c0 AttackTo(int, int)
0x0000000000412480 PinnedOnKing(int, int)
*fill* 0x00000000004125df 0x1
.text 0x00000000004125e0 0x5cc adapter/libadapter.a(main.o)
0x00000000004129c0 adapter::main_adapter(int, char**)
0x0000000000412b40 adapter::quit()
*fill* 0x0000000000412bac 0x4
.text 0x0000000000412bb0 0x3e5 adapter/libadapter.a(option.o)
0x0000000000412bb0 adapter::option_set(char const*, char const*)
0x0000000000412c20 adapter::option_init()
0x0000000000412e50 adapter::option_get(char const*)
0x0000000000412ec0 adapter::option_get_bool(char const*)
0x0000000000412f50 adapter::option_get_double(char const*)
0x0000000000412f70 adapter::option_get_int(char const*)
0x0000000000412f90 adapter::option_get_string(char const*)
*fill* 0x0000000000412f95 0xb
.text 0x0000000000412fa0 0x1bb adapter/libadapter.a(piece.o)
0x0000000000412fa0 adapter::piece_init()
0x0000000000412ff0 adapter::piece_is_ok(int)
0x0000000000413010 adapter::piece_make_pawn(int)
0x0000000000413020 adapter::piece_pawn_opp(int)
0x0000000000413030 adapter::piece_colour(int)
0x0000000000413040 adapter::piece_type(int)
0x0000000000413050 adapter::piece_is_pawn(int)
0x0000000000413060 adapter::piece_is_knight(int)
0x0000000000413070 adapter::piece_is_bishop(int)
0x0000000000413080 adapter::piece_is_rook(int)
0x0000000000413090 adapter::piece_is_queen(int)
0x00000000004130a0 adapter::piece_is_king(int)
0x00000000004130b0 adapter::piece_is_slider(int)
0x00000000004130c0 adapter::piece_to_12(int)
0x00000000004130d0 adapter::piece_from_12(int)
0x00000000004130e0 adapter::piece_to_char(int)
0x0000000000413100 adapter::piece_from_char(int)
0x0000000000413140 adapter::char_is_piece(int)
*fill* 0x000000000041315b 0x5
.text 0x0000000000413160 0x1f7 adapter/libadapter.a(square.o)
0x0000000000413160 adapter::square_init()
0x00000000004131b0 adapter::square_is_ok(int)
0x00000000004131d0 adapter::square_make(int, int)
0x00000000004131e0 adapter::square_file(int)
0x00000000004131f0 adapter::square_rank(int)
0x0000000000413200 adapter::square_side_rank(int, int)
0x0000000000413230 adapter::square_from_64(int)
0x0000000000413240 adapter::square_to_64(int)
0x0000000000413250 adapter::square_is_promote(int)
0x0000000000413270 adapter::square_ep_dual(int)
0x0000000000413280 adapter::square_colour(int)
0x0000000000413290 adapter::file_from_char(int)
0x00000000004132a0 adapter::rank_from_char(int)
0x00000000004132b0 adapter::file_to_char(int)
0x00000000004132c0 adapter::rank_to_char(int)
0x00000000004132d0 adapter::char_is_file(int)
0x00000000004132e0 adapter::char_is_rank(int)
0x00000000004132f0 adapter::square_to_string(int, char*, int)
0x0000000000413320 adapter::square_from_string(char const*)
*fill* 0x0000000000413357 0x9
.text 0x0000000000413360 0x1328 adapter/libadapter.a(uci.o)
0x0000000000413360 adapter::uci_close(adapter::uci_t*)
0x00000000004133d0 adapter::uci_clear(adapter::uci_t*)
0x00000000004134a0 adapter::uci_send_isready(adapter::uci_t*)
0x00000000004134c0 adapter::uci_send_stop(adapter::uci_t*)
0x00000000004134e0 adapter::uci_send_ucinewgame(adapter::uci_t*)
0x0000000000413510 adapter::uci_option_exist(adapter::uci_t*, char const*)
0x0000000000413570 adapter::uci_send_option(adapter::uci_t*, char const*, char const*, ...)
0x00000000004136e0 adapter::uci_parse(adapter::uci_t*, char const*)
0x00000000004144b0 adapter::uci_open(adapter::uci_t*, adapter::engine_t*)
0x0000000000414590 adapter::uci_send_isready_sync(adapter::uci_t*)
0x0000000000414610 adapter::uci_send_stop_sync(adapter::uci_t*)
*fill* 0x0000000000414688 0x8
.text 0x0000000000414690 0x81f adapter/libadapter.a(util.o)
0x0000000000414690 adapter::util_init()
0x00000000004146d0 adapter::my_random_init()
0x00000000004146f0 adapter::my_random_int(int)
0x0000000000414720 adapter::my_random_double()
0x0000000000414740 adapter::my_atoll(char const*)
0x0000000000414780 adapter::my_round(double)
0x00000000004147a0 adapter::my_free(void*)
0x00000000004147b0 adapter::my_log_open(char const*)
0x00000000004147f0 adapter::my_log_close()
0x0000000000414810 adapter::my_log(char const*, ...)
0x00000000004148e0 adapter::my_fatal(char const*, ...)
0x0000000000414a30 adapter::my_malloc(int)
0x0000000000414a60 adapter::my_realloc(void*, int)
0x0000000000414aa0 adapter::my_file_read_line(_IO_FILE*, char*, int)
0x0000000000414b30 adapter::my_string_empty(char const*)
0x0000000000414b50 adapter::my_string_equal(char const*, char const*)
0x0000000000414b70 adapter::my_string_case_equal(char const*, char const*)
0x0000000000414bd0 adapter::my_strdup(char const*)
0x0000000000414c20 adapter::my_string_clear(char const**)
0x0000000000414c40 adapter::my_string_set(char const**, char const*)
0x0000000000414c70 adapter::my_timer_reset(adapter::my_timer_t*)
0x0000000000414c90 adapter::my_timer_start(adapter::my_timer_t*)
0x0000000000414cb0 adapter::my_timer_stop(adapter::my_timer_t*)
0x0000000000414cf0 adapter::my_timer_elapsed_real(adapter::my_timer_t const*)
0x0000000000414d40 adapter::my_timer_elapsed_cpu(adapter::my_timer_t const*)
0x0000000000414d90 adapter::my_timer_cpu_usage(adapter::my_timer_t const*)
0x0000000000414e90 adapter::compute_pkgdatadir()
*fill* 0x0000000000414eaf 0x1
.text 0x0000000000414eb0 0x2a53 adapter/libadapter.a(adapter.o)
0x0000000000417620 adapter::adapter_loop()
*fill* 0x0000000000417903 0xd
.text 0x0000000000417910 0x32e adapter/libadapter.a(attack.o)
0x0000000000417910 adapter::attack_init()
0x0000000000417a50 adapter::piece_attack(adapter::board_t const*, int, int, int)
0x0000000000417ad0 adapter::is_attacked(adapter::board_t const*, int, int)
0x0000000000417b30 adapter::is_in_check(adapter::board_t const*, int)
0x0000000000417b60 adapter::is_pinned(adapter::board_t const*, int, int, int)
*fill* 0x0000000000417c3e 0x2
.text 0x0000000000417c40 0x70a adapter/libadapter.a(board.o)
0x0000000000417c40 adapter::board_is_ok(adapter::board_t const*)
0x0000000000417c50 adapter::board_clear(adapter::board_t*)
0x0000000000417d50 adapter::board_start(adapter::board_t*)
0x0000000000417d60 adapter::board_copy(adapter::board_t*, adapter::board_t const*)
0x0000000000417d70 adapter::board_equal(adapter::board_t const*, adapter::board_t const*)
0x0000000000417e40 adapter::board_init_list(adapter::board_t*)
0x00000000004180a0 adapter::board_flags(adapter::board_t const*)
0x00000000004180e0 adapter::board_can_play(adapter::board_t const*)
0x0000000000418160 adapter::board_mobility(adapter::board_t const*)
0x00000000004181b0 adapter::board_is_check(adapter::board_t const*)
0x00000000004181c0 adapter::board_is_mate(adapter::board_t const*)
0x00000000004181f0 adapter::board_is_stalemate(adapter::board_t const*)
0x0000000000418220 adapter::king_pos(adapter::board_t const*, int)
0x0000000000418230 adapter::board_disp(adapter::board_t const*)
*fill* 0x000000000041834a 0x6
.text 0x0000000000418350 0xa9f adapter/libadapter.a(book.o)
0x00000000004185b0 adapter::book_clear()
0x00000000004185d0 adapter::book_open(char const*, int)
0x00000000004187a0 adapter::book_close()
0x00000000004187e0 adapter::is_in_book(adapter::board_t const*)
0x0000000000418870 adapter::book_move(adapter::board_t const*, bool)
0x0000000000418980 adapter::book_move(adapter::board_t const*, bool, bool)
0x0000000000418ae0 adapter::book_disp(adapter::board_t const*)
0x0000000000418c50 adapter::book_learn_move(adapter::board_t const*, int, int)
0x0000000000418db0 adapter::book_flush()
*fill* 0x0000000000418def 0x1
.text 0x0000000000418df0 0xb1a adapter/libadapter.a(book_make.o)
0x00000000004192b0 adapter::book_make(int, char**)
*fill* 0x000000000041990a 0x6
.text 0x0000000000419910 0x6a1 adapter/libadapter.a(book_merge.o)
0x0000000000419c10 adapter::book_merge(int, char**)
*fill* 0x0000000000419fb1 0xf
.text 0x0000000000419fc0 0x46 adapter/libadapter.a(colour.o)
0x0000000000419fc0 adapter::colour_is_ok(int)
0x0000000000419fd0 adapter::colour_is_white(int)
0x0000000000419fe0 adapter::colour_is_black(int)
0x0000000000419ff0 adapter::colour_equal(int, int)
0x000000000041a000 adapter::colour_opp(int)
*fill* 0x000000000041a006 0xa
.text 0x000000000041a010 0x29d adapter/libadapter.a(engine.o)
0x000000000041a010 adapter::engine_is_ok(adapter::engine_t const*)
0x000000000041a030 adapter::engine_open(adapter::engine_t*)
0x000000000041a050 adapter::engine_close(adapter::engine_t*)
0x000000000041a060 adapter::engine_get(adapter::engine_t*, char*, int)
0x000000000041a0b0 adapter::engine_send(adapter::engine_t*, char const*, ...)
0x000000000041a1b0 adapter::engine_send_queue(adapter::engine_t*, char const*, ...)
*fill* 0x000000000041a2ad 0x3
.text 0x000000000041a2b0 0xb1c adapter/libadapter.a(epd.o)
0x000000000041a430 adapter::epd_get_op(char const*, char const*, char*, int)
0x000000000041aaf0 adapter::epd_test(int, char**)
*fill* 0x000000000041adcc 0x4
.text 0x000000000041add0 0xad0 adapter/libadapter.a(fen.o)
0x000000000041add0 adapter::fen_fatal(char const*, int, char const*)
0x000000000041af70 adapter::board_from_fen(adapter::board_t*, char const*)
0x000000000041b560 adapter::board_to_fen(adapter::board_t const*, char*, int)
.text 0x000000000041b8a0 0x518 adapter/libadapter.a(game.o)
0x000000000041ba30 adapter::game_is_ok(adapter::game_t const*)
0x000000000041ba70 adapter::game_init(adapter::game_t*, char const*)
0x000000000041bac0 adapter::game_clear(adapter::game_t*)
0x000000000041bad0 adapter::game_status(adapter::game_t const*)
0x000000000041bae0 adapter::game_size(adapter::game_t const*)
0x000000000041baf0 adapter::game_pos(adapter::game_t const*)
0x000000000041bb00 adapter::game_move(adapter::game_t const*, int)
0x000000000041bb10 adapter::game_get_board(adapter::game_t const*, adapter::board_t*, int)
0x000000000041bb90 adapter::game_turn(adapter::game_t const*)
0x000000000041bba0 adapter::game_move_nb(adapter::game_t const*)
0x000000000041bbb0 adapter::game_add_move(adapter::game_t*, int)
0x000000000041bc40 adapter::game_goto(adapter::game_t*, int)
0x000000000041bcd0 adapter::game_rem_move(adapter::game_t*)
0x000000000041bd00 adapter::game_disp(adapter::game_t const*)
*fill* 0x000000000041bdb8 0x8
.text 0x000000000041bdc0 0x1a6 adapter/libadapter.a(hash.o)
0x000000000041bdc0 adapter::hash_init()
0x000000000041be20 adapter::hash_piece_key(int, int)
0x000000000041be50 adapter::hash_key(adapter::board_t const*)
0x000000000041bf10 adapter::hash_castle_key(int)
0x000000000041bf20 adapter::hash_ep_key(int)
0x000000000041bf40 adapter::hash_turn_key(int)
*fill* 0x000000000041bf66 0xa
.text 0x000000000041bf70 0x65f adapter/libadapter.a(io.o)
0x000000000041bf70 adapter::io_is_ok(adapter::io_t const*)
0x000000000041bfb0 adapter::io_init(adapter::io_t*)
0x000000000041bfd0 adapter::io_close(adapter::io_t*)
0x000000000041c030 adapter::io_get_update(adapter::io_t*)
0x000000000041c0e0 adapter::io_line_ready(adapter::io_t const*)
0x000000000041c110 adapter::io_get_line(adapter::io_t*, char*, int)
0x000000000041c220 adapter::io_send(adapter::io_t*, char const*, ...)
0x000000000041c460 adapter::io_send_queue(adapter::io_t*, char const*, ...)
*fill* 0x000000000041c5cf 0x1
.text 0x000000000041c5d0 0x3cd adapter/libadapter.a(line.o)
0x000000000041c5d0 adapter::line_is_ok(unsigned short const*)
0x000000000041c610 adapter::line_clear(unsigned short*)
0x000000000041c620 adapter::line_copy(unsigned short*, unsigned short const*)
0x000000000041c640 adapter::line_from_can(unsigned short*, adapter::board_t const*, char const*, int)
0x000000000041c760 adapter::line_to_can(unsigned short const*, adapter::board_t const*, char*, int)
0x000000000041c870 adapter::line_to_san(unsigned short const*, adapter::board_t const*, char*, int)
*fill* 0x000000000041c99d 0x3
.text 0x000000000041c9a0 0x55a adapter/libadapter.a(list.o)
0x000000000041c9a0 adapter::list_is_ok(adapter::list_t const*)
0x000000000041c9c0 adapter::list_clear(adapter::list_t*)
0x000000000041c9d0 adapter::list_add(adapter::list_t*, int, int)
0x000000000041c9f0 adapter::list_remove(adapter::list_t*, int)
0x000000000041ca30 adapter::list_is_empty(adapter::list_t const*)
0x000000000041ca40 adapter::list_size(adapter::list_t const*)
0x000000000041ca50 adapter::list_move(adapter::list_t const*, int)
0x000000000041ca60 adapter::list_value(adapter::list_t const*, int)
0x000000000041ca70 adapter::list_copy(adapter::list_t*, adapter::list_t const*)
0x000000000041cad0 adapter::list_move_to_front(adapter::list_t*, int)
0x000000000041cb20 adapter::list_note(adapter::list_t*)
0x000000000041cb60 adapter::list_sort(adapter::list_t*)
0x000000000041cc10 adapter::list_contain(adapter::list_t const*, int)
0x000000000041cc50 adapter::list_equal(adapter::list_t*, adapter::list_t*)
0x000000000041ce40 adapter::list_disp(adapter::list_t const*, adapter::board_t const*)
*fill* 0x000000000041cefa 0x6
.text 0x000000000041cf00 0x661 adapter/libadapter.a(move.o)
0x000000000041cf00 adapter::move_is_ok(int)
0x000000000041cf10 adapter::move_make(int, int)
0x000000000041cf40 adapter::move_make_flags(int, int, int)
0x000000000041cf70 adapter::move_from(int)
0x000000000041cf80 adapter::move_to(int)
0x000000000041cf90 adapter::move_promote_hack(int)
0x000000000041cfa0 adapter::move_is_promote(int)
0x000000000041cfb0 adapter::move_is_en_passant(int, adapter::board_t const*)
0x000000000041d000 adapter::move_is_capture(int, adapter::board_t const*)
0x000000000041d030 adapter::move_is_castle(int, adapter::board_t const*)
0x000000000041d180 adapter::move_piece(int, adapter::board_t const*)
0x000000000041d1a0 adapter::move_capture(int, adapter::board_t const*)
0x000000000041d1f0 adapter::move_promote(int, adapter::board_t const*)
0x000000000041d220 adapter::move_is_check(int, adapter::board_t const*)
0x000000000041d280 adapter::move_is_mate(int, adapter::board_t const*)
0x000000000041d2e0 adapter::move_to_can(int, adapter::board_t const*, char*, int)
0x000000000041d300 adapter::move_from_can(char const*, adapter::board_t const*)
0x000000000041d500 adapter::move_order(int)
0x000000000041d510 adapter::move_disp(int, adapter::board_t const*)
*fill* 0x000000000041d561 0xf
.text 0x000000000041d570 0x649 adapter/libadapter.a(move_do.o)
0x000000000041d7c0 adapter::move_do(adapter::board_t*, int)
*fill* 0x000000000041dbb9 0x7
.text 0x000000000041dbc0 0x832 adapter/libadapter.a(move_gen.o)
0x000000000041dc30 adapter::gen_moves(adapter::list_t*, adapter::board_t const*)
0x000000000041e3d0 adapter::gen_legal_moves(adapter::list_t*, adapter::board_t const*)
*fill* 0x000000000041e3f2 0xe
.text 0x000000000041e400 0x175 adapter/libadapter.a(move_legal.o)
0x000000000041e400 adapter::move_is_pseudo(int, adapter::board_t const*)
0x000000000041e450 adapter::pseudo_is_legal(int, adapter::board_t const*)
0x000000000041e4c0 adapter::move_is_legal(int, adapter::board_t const*)
0x000000000041e4f0 adapter::filter_legal(adapter::list_t*, adapter::board_t const*)
*fill* 0x000000000041e575 0xb
.text 0x000000000041e580 0x5c7 adapter/libadapter.a(parse.o)
0x000000000041e680 adapter::match(char*, char const*)
0x000000000041e690 adapter::parse_is_ok(adapter::parse_t const*)
0x000000000041e6f0 adapter::parse_open(adapter::parse_t*, char const*)
0x000000000041e710 adapter::parse_close(adapter::parse_t*)
0x000000000041e760 adapter::parse_add_keyword(adapter::parse_t*, char const*)
0x000000000041e790 adapter::parse_get_word(adapter::parse_t*, char*, int)
0x000000000041e860 adapter::parse_get_string(adapter::parse_t*, char*, int)
*fill* 0x000000000041eb47 0x9
.text 0x000000000041eb50 0xb49 adapter/libadapter.a(pgn.o)
0x000000000041f1b0 adapter::pgn_open(adapter::pgn_t*, char const*)
0x000000000041f290 adapter::pgn_close(adapter::pgn_t*)
0x000000000041f2a0 adapter::pgn_next_game(adapter::pgn_t*)
0x000000000041f4f0 adapter::pgn_next_move(adapter::pgn_t*, char*, int)
*fill* 0x000000000041f699 0x7
.text 0x000000000041f6a0 0x1da adapter/libadapter.a(posix.o)
0x000000000041f6a0 adapter::input_available()
0x000000000041f750 adapter::now_real()
0x000000000041f7f0 adapter::now_cpu()
*fill* 0x000000000041f87a 0x6
.text 0x000000000041f880 0x1c adapter/libadapter.a(random.o)
0x000000000041f880 adapter::random_init()
0x000000000041f890 adapter::random_64(int)
*fill* 0x000000000041f89c 0x4
.text 0x000000000041f8a0 0xc99 adapter/libadapter.a(san.o)
0x000000000041fd50 adapter::move_to_san(int, adapter::board_t const*, char*, int)
0x000000000041fd70 adapter::move_from_san(char const*, adapter::board_t const*)
0x0000000000420490 adapter::move_from_san_debug(char const*, adapter::board_t const*)
*fill* 0x0000000000420539 0x7
.text 0x0000000000420540 0xae engine/libengine.a(main.o)
0x0000000000420540 engine::main_engine(int, char**)
*fill* 0x00000000004205ee 0x2
.text 0x00000000004205f0 0xc77 engine/libengine.a(move_do.o)
0x0000000000420b90 engine::move_do_init()
0x0000000000420bf0 engine::move_do(engine::board_t*, int, engine::undo_t*)
0x0000000000420fc0 engine::move_undo(engine::board_t*, int, engine::undo_t const*)
0x0000000000421190 engine::move_do_null(engine::board_t*, engine::undo_t*)
0x0000000000421230 engine::move_undo_null(engine::board_t*, engine::undo_t const*)
*fill* 0x0000000000421267 0x9
.text 0x0000000000421270 0x245 engine/libengine.a(option.o)
0x0000000000421270 engine::option_list()
0x00000000004212f0 engine::option_set(char const*, char const*)
0x0000000000421360 engine::option_init()
0x0000000000421390 engine::option_get(char const*)
0x0000000000421400 engine::option_get_bool(char const*)
0x0000000000421490 engine::option_get_int(char const*)
0x00000000004214b0 engine::option_get_string(char const*)
*fill* 0x00000000004214b5 0xb
.text 0x00000000004214c0 0x9e0 engine/libengine.a(pawn.o)
0x00000000004214c0 engine::pawn_init_bit()
0x0000000000421630 engine::pawn_init()
0x00000000004216c0 engine::pawn_clear()
0x0000000000421720 engine::pawn_alloc()
0x0000000000421760 engine::pawn_get_info(engine::pawn_info_t*, engine::board_t const*)
0x0000000000421e80 engine::quad(int, int, int)
.text 0x0000000000421ea0 0x1a7 engine/libengine.a(piece.o)
0x0000000000421ea0 engine::piece_init()
0x0000000000421fc0 engine::piece_is_ok(int)
0x0000000000421fe0 engine::piece_from_12(int)
0x0000000000421ff0 engine::piece_to_char(int)
0x0000000000422010 engine::piece_from_char(int)
*fill* 0x0000000000422047 0x9
.text 0x0000000000422050 0xeb0 engine/libengine.a(protocol.o)
0x00000000004220b0 engine::get(char*, int)
0x00000000004220e0 engine::send(char const*, ...)
0x0000000000422e70 engine::loop()
0x0000000000422ec0 engine::event()
.text 0x0000000000422f00 0x59b engine/libengine.a(pst.o)
0x0000000000422f00 engine::pst_init()
*fill* 0x000000000042349b 0x5
.text 0x00000000004234a0 0x2 engine/libengine.a(random.o)
0x00000000004234a0 engine::random_init()
*fill* 0x00000000004234a2 0xe
.text 0x00000000004234b0 0x8e3 engine/libengine.a(search.o)
0x00000000004234b0 engine::depth_is_ok(int)
0x00000000004234c0 engine::height_is_ok(int)
0x00000000004234d0 engine::search_clear()
0x00000000004235e0 engine::search_update_current()
0x0000000000423660 engine::search_update_best()
0x00000000004238e0 engine::search()
0x0000000000423bf0 engine::search_update_root()
0x0000000000423c70 engine::search_check()
*fill* 0x0000000000423d93 0xd
.text 0x0000000000423da0 0x18c5 engine/libengine.a(search_full.o)
0x00000000004253f0 engine::search_full_init(engine::list_t*, engine::board_t*)
0x0000000000425660 engine::search_full_root(engine::list_t*, engine::board_t*, int, int)
*fill* 0x0000000000425665 0xb
.text 0x0000000000425670 0x663 engine/libengine.a(see.o)
0x00000000004259b0 engine::see_move(int, engine::board_t const*)
0x0000000000425c20 engine::see_square(engine::board_t const*, int, int)
*fill* 0x0000000000425cd3 0xd
.text 0x0000000000425ce0 0xc87 engine/libengine.a(sort.o)
0x0000000000425e40 engine::sort_init()
0x0000000000425fa0 engine::sort_next(engine::sort_t*)
0x0000000000426340 engine::sort_init_qs(engine::sort_t*, engine::board_t*, engine::attack_t const*, bool)
0x00000000004263b0 engine::sort_next_qs(engine::sort_t*)
0x0000000000426590 engine::good_move(int, engine::board_t const*, int, int)
0x0000000000426650 engine::history_good(int, engine::board_t const*)
0x00000000004266f0 engine::history_bad(int, engine::board_t const*)
0x0000000000426780 engine::note_moves(engine::list_t*, engine::board_t const*, int, int)
0x00000000004268c0 engine::sort_init(engine::sort_t*, engine::board_t*, engine::attack_t const*, int, int, int)
*fill* 0x0000000000426967 0x9
.text 0x0000000000426970 0x120 engine/libengine.a(square.o)
0x0000000000426970 engine::square_init()
0x00000000004269f0 engine::file_from_char(int)
0x0000000000426a00 engine::rank_from_char(int)
0x0000000000426a10 engine::file_to_char(int)
0x0000000000426a20 engine::rank_to_char(int)
0x0000000000426a30 engine::square_to_string(int, char*, int)
0x0000000000426a60 engine::square_from_string(char const*)
.text 0x0000000000426a90 0x4a3 engine/libengine.a(trans.o)
0x0000000000426a90 engine::trans_is_ok(engine::trans const*)
0x0000000000426af0 engine::trans_free(engine::trans*)
0x0000000000426b20 engine::trans_clear(engine::trans*)
0x0000000000426bd0 engine::trans_init(engine::trans*)
0x0000000000426c50 engine::trans_alloc(engine::trans*)
0x0000000000426cd0 engine::trans_inc_date(engine::trans*)
0x0000000000426d40 engine::trans_store(engine::trans*, unsigned long long, int, int, int, int)
0x0000000000426e90 engine::trans_retrieve(engine::trans*, unsigned long long, int*, int*, int*, int*, int*)
0x0000000000426f00 engine::trans_stats(engine::trans const*)
*fill* 0x0000000000426f33 0xd
.text 0x0000000000426f40 0x586 engine/libengine.a(util.o)
0x0000000000426f40 engine::util_init()
0x0000000000426f80 engine::my_random_init()
0x0000000000426fa0 engine::my_random(int)
0x0000000000426fd0 engine::my_atoll(char const*)
0x0000000000427010 engine::my_round(double)
0x0000000000427030 engine::my_free(void*)
0x0000000000427040 engine::my_fatal(char const*, ...)
0x0000000000427100 engine::my_malloc(int)
0x0000000000427120 engine::my_file_read_line(_IO_FILE*, char*, int)
0x00000000004271a0 engine::my_string_empty(char const*)
0x00000000004271c0 engine::my_string_equal(char const*, char const*)
0x0000000000427220 engine::my_strdup(char const*)
0x0000000000427260 engine::my_string_clear(char const**)
0x0000000000427280 engine::my_string_set(char const**, char const*)
0x00000000004272b0 engine::my_timer_reset(engine::my_timer_t*)
0x00000000004272d0 engine::my_timer_start(engine::my_timer_t*)
0x00000000004272f0 engine::my_timer_stop(engine::my_timer_t*)
0x0000000000427330 engine::my_timer_elapsed_real(engine::my_timer_t const*)
0x0000000000427380 engine::my_timer_elapsed_cpu(engine::my_timer_t const*)
0x00000000004273d0 engine::my_timer_cpu_usage(engine::my_timer_t const*)
*fill* 0x00000000004274c6 0xa
.text 0x00000000004274d0 0x19a engine/libengine.a(value.o)
0x00000000004274d0 engine::value_init()
0x0000000000427580 engine::value_is_ok(int)
0x0000000000427590 engine::range_is_ok(int, int)
0x00000000004275c0 engine::value_is_mate(int)
0x00000000004275d0 engine::value_to_trans(int, int)
0x0000000000427600 engine::value_from_trans(int, int)
0x0000000000427630 engine::value_to_mate(int)
*fill* 0x000000000042766a 0x6
.text 0x0000000000427670 0xc6 engine/libengine.a(vector.o)
0x0000000000427670 engine::vector_init()
0x00000000004276f0 engine::delta_is_ok(int)
0x0000000000427710 engine::inc_is_ok(int)
*fill* 0x0000000000427736 0xa
.text 0x0000000000427740 0xae9 engine/libengine.a(attack.o)
0x0000000000427740 engine::attack_init()
0x0000000000427e00 engine::is_attacked(engine::board_t const*, int, int)
0x0000000000427ea0 engine::line_is_empty(engine::board_t const*, int, int)
0x0000000000427ee0 engine::is_pinned(engine::board_t const*, int, int)
0x0000000000427f60 engine::attack_is_ok(engine::attack_t const*)
0x0000000000428010 engine::attack_set(engine::attack_t*, engine::board_t const*)
0x0000000000428160 engine::piece_attack_king(engine::board_t const*, int, int, int)
*fill* 0x0000000000428229 0x7
.text 0x0000000000428230 0x8cb engine/libengine.a(board.o)
0x0000000000428230 engine::board_is_ok(engine::board_t const*)
0x0000000000428240 engine::board_clear(engine::board_t*)
0x00000000004282b0 engine::board_copy(engine::board_t*, engine::board_t const*)
0x00000000004282d0 engine::board_is_legal(engine::board_t const*)
0x0000000000428300 engine::board_is_check(engine::board_t const*)
0x0000000000428320 engine::board_is_mate(engine::board_t const*)
0x0000000000428380 engine::board_is_stalemate(engine::board_t*)
0x0000000000428440 engine::board_is_repetition(engine::board_t const*)
0x00000000004284e0 engine::board_opening(engine::board_t const*)
0x0000000000428590 engine::board_endgame(engine::board_t const*)
0x0000000000428640 engine::board_init_list(engine::board_t*)
*fill* 0x0000000000428afb 0x5
.text 0x0000000000428b00 0x586 engine/libengine.a(book.o)
0x0000000000428c40 engine::book_init()
0x0000000000428c60 engine::book_open(char const*)
0x0000000000428db0 engine::book_close()
0x0000000000428df0 engine::book_move(engine::board_t*)
*fill* 0x0000000000429086 0xa
.text 0x0000000000429090 0x2012 engine/libengine.a(eval.o)
0x0000000000429890 engine::eval_init()
0x0000000000429ab0 engine::eval(engine::board_t const*)
*fill* 0x000000000042b0a2 0xe
.text 0x000000000042b0b0 0x639 engine/libengine.a(fen.o)
0x000000000042b0b0 engine::board_from_fen(engine::board_t*, char const*)
0x000000000042b4e0 engine::board_to_fen(engine::board_t const*, char*, int)
*fill* 0x000000000042b6e9 0x7
.text 0x000000000042b6f0 0x260 engine/libengine.a(hash.o)
0x000000000042b6f0 engine::hash_init()
0x000000000042b730 engine::hash_key(engine::board_t const*)
0x000000000042b830 engine::hash_pawn_key(engine::board_t const*)
0x000000000042b890 engine::hash_material_key(engine::board_t const*)
0x000000000042b8e0 engine::hash_piece_key(int, int)
0x000000000042b910 engine::hash_castle_key(int)
0x000000000042b930 engine::hash_ep_key(int)
0x000000000042b940 engine::hash_turn_key(int)
.text 0x000000000042b950 0x287 engine/libengine.a(list.o)
0x000000000042b950 engine::list_is_ok(engine::list_t const*)
0x000000000042b970 engine::list_remove(engine::list_t*, int)
0x000000000042b9b0 engine::list_copy(engine::list_t*, engine::list_t const*)
0x000000000042ba00 engine::list_sort(engine::list_t*)
0x000000000042baa0 engine::list_contain(engine::list_t const*, int)
0x000000000042bae0 engine::list_note(engine::list_t*)
0x000000000042bb20 engine::list_filter(engine::list_t*, engine::board_t*, bool (*)(int, engine::board_t*), bool)
*fill* 0x000000000042bbd7 0x9
.text 0x000000000042bbe0 0xdac engine/libengine.a(material.o)
0x000000000042bbe0 engine::material_init()
0x000000000042bc30 engine::material_clear()
0x000000000042bc90 engine::material_alloc()
0x000000000042bcd0 engine::material_get_info(engine::material_info_t*, engine::board_t const*)
*fill* 0x000000000042c98c 0x4
.text 0x000000000042c990 0x34a engine/libengine.a(move.o)
0x000000000042c990 engine::move_is_ok(int)
0x000000000042c9b0 engine::move_promote(int)
0x000000000042c9e0 engine::move_order(int)
0x000000000042ca00 engine::move_is_capture(int, engine::board_t const*)
0x000000000042ca30 engine::move_is_under_promote(int)
0x000000000042ca60 engine::move_is_tactical(int, engine::board_t const*)
0x000000000042ca90 engine::move_capture(int, engine::board_t const*)
0x000000000042cad0 engine::move_to_string(int, char*, int)
0x000000000042cbc0 engine::move_from_string(char const*, engine::board_t const*)
*fill* 0x000000000042ccda 0x6
.text 0x000000000042cce0 0xa96 engine/libengine.a(move_check.o)
0x000000000042cd70 engine::gen_quiet_checks(engine::list_t*, engine::board_t*)
0x000000000042d640 engine::move_is_check(int, engine::board_t*)
*fill* 0x000000000042d776 0xa
.text 0x000000000042d780 0x7c3 engine/libengine.a(move_evasion.o)
0x000000000042ded0 engine::gen_legal_evasions(engine::list_t*, engine::board_t const*, engine::attack_t const*)
0x000000000042dee0 engine::gen_pseudo_evasions(engine::list_t*, engine::board_t const*, engine::attack_t const*)
0x000000000042def0 engine::legal_evasion_exist(engine::board_t const*, engine::attack_t const*)
*fill* 0x000000000042df43 0xd
.text 0x000000000042df50 0x1a8a engine/libengine.a(move_gen.o)
0x000000000042e1f0 engine::gen_quiet_moves(engine::list_t*, engine::board_t const*)
0x000000000042ebd0 engine::add_pawn_move(engine::list_t*, int, int)
0x000000000042ec40 engine::gen_moves(engine::list_t*, engine::board_t const*)
0x000000000042ef00 engine::gen_legal_moves(engine::list_t*, engine::board_t*)
0x000000000042ef80 engine::gen_captures(engine::list_t*, engine::board_t const*)
0x000000000042f9a0 engine::add_promote(engine::list_t*, int)
*fill* 0x000000000042f9da 0x6
.text 0x000000000042f9e0 0x3a3 engine/libengine.a(move_legal.o)
0x000000000042fa30 engine::move_is_pseudo(int, engine::board_t*)
0x000000000042fb50 engine::quiet_is_pseudo(int, engine::board_t*)
0x000000000042fc50 engine::pseudo_is_legal(int, engine::board_t*)
*fill* 0x000000000042fd83 0xd
.text 0x000000000042fd90 0x20a engine/libengine.a(posix.o)
0x000000000042fd90 engine::input_available()
0x000000000042fe70 engine::now_real()
0x000000000042ff10 engine::now_cpu()
*fill* 0x000000000042ff9a 0x6
.text 0x000000000042ffa0 0x127 engine/libengine.a(pv.o)
0x000000000042ffa0 engine::pv_is_ok(unsigned short const*)
0x0000000000430000 engine::pv_copy(unsigned short*, unsigned short const*)
0x0000000000430020 engine::pv_cat(unsigned short*, unsigned short const*, int)
0x0000000000430040 engine::pv_to_string(unsigned short const*, char*, int)
*fill* 0x00000000004300c7 0x9
.text 0x00000000004300d0 0x340 engine/libengine.a(recog.o)
0x00000000004301f0 engine::recog_draw(engine::board_t const*)
.text 0x0000000000430410 0x72 /usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
0x0000000000430410 __libc_csu_init
0x0000000000430480 __libc_csu_fini
.text 0x0000000000430482 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/crtend.o
.text 0x0000000000430482 0x0 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crtn.o
*(.gnu.warning)
.fini 0x0000000000430484 0x9
*(SORT(.fini))
.fini 0x0000000000430484 0x4 /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/crti.o
0x0000000000430484 _fini

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,78 +0,0 @@
import idc
import ida_kernwin
import imp
import snap_cg
import lfa
import maxcut
import module
import cc_base
import modnaming
import basicutils_7x as basicutils
from PyQt5 import QtCore, QtGui, QtWidgets
#-------------------------------------------------------------------------------
def handler(item, column_no):
ea = item.ea
if is_mapped(ea):
jumpto(ea)
#-------------------------------------------------------------------------------
class CBaseTreeViewer(ida_kernwin.PluginForm):
def populate_tree(self):
# Clear previous items
self.tree.clear()
#Do LFA and MaxCut Analysis to find module boundaries
_, lfa_modlist = lfa.analyze()
for module_data in lfa_modlist:
module_name = "Module 0x%08x:0x%08x" % (module_data.start, module_data.end)
item = QtWidgets.QTreeWidgetItem(self.tree)
item.setText(0, module_name)
item.ea = module_data.start
for func in Functions(module_data.start, module_data.end):
node = QtWidgets.QTreeWidgetItem(item)
node.setText(0, "0x%08x: %s" % (func, idc.get_func_name(func)))
node.ea = func
self.tree.itemDoubleClicked.connect(handler)
def OnCreate(self, form):
# Get parent widget
self.parent = ida_kernwin.PluginForm.FormToPyQtWidget(form)
# Create tree control
self.tree = QtWidgets.QTreeWidget()
self.tree.setHeaderLabels(("Names",))
self.tree.setColumnWidth(0, 100)
# Create layout
layout = QtWidgets.QVBoxLayout()
layout.addWidget(self.tree)
self.populate_tree()
# Populate PluginForm
self.parent.setLayout(layout)
def Show(self, title):
return ida_kernwin.PluginForm.Show(self, title, options = ida_kernwin.PluginForm.WOPN_PERSIST)
#-------------------------------------------------------------------------------
def main():
tree_frm = CBaseTreeViewer()
tree_frm.Show("Object Files")
if __name__ == "__main__":
imp.reload(modnaming)
imp.reload(module)
imp.reload(cc_base)
imp.reload(lfa)
imp.reload(maxcut)
imp.reload(snap_cg)
imp.reload(basicutils)
main()

View File

@@ -1,292 +0,0 @@
##############################################################################################
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
################################################################################
### Object File Boundary Detection in IDA Pro with Local Function Affinity ###
################################################################################
# LFA Metric
# Local Function Affinity (LFA) is a measurement of the direction a function
# is being "pulled" by the functions it calls and the functions that call it.
# By looking at an average of the log of the distance between these functions
# we get a measurement of whether the function is related to functions in the
# positive or negative direction.
# Edge Detection
# In a standard C/C++ development environment, the project is divided into
# multiple source files, which are compiled to object files, then linked into
# the final binary in order. If external references are eliminated (LFA does
# this imperfectly by just eliminating calls whose distance is above a chosen
# threshold) we would expect to see LFA starting positive, switching to
# negative over the course of a source file, then switching back to positive
# at the beginning of the next file. So object file boundaries
# What is code anyway?
# Don't get too hung up on "object file boundaries" - for LFA (or any other
# attempt to solve the problem) to be perfect, the design and implementation
# of the code would have to be perfect. What LFA is really finding is clusters
# of functionality, that should be more or less related to object files
# but it will often break up large object files into multiple clusters or
# detect 2 or 3 related object files as one file.
IDA_VERSION = 7
import basicutils_7x as basicutils
#External dependencies
import math
#CodeCut dependencies
import cc_base
import module
#Threshold above which a function call is considered "external"
#For published research - 0x1000 = 4K
MAX_CALL = 0x1000
#This is a list of the LFA scores for all functions
g_function_list = []
#This is a list of modules a.k.a. object files after the edge_detect()
#function is executed
g_module_list = []
#func_callers_weight(f):
#Return the LFA score for functions that this functions calls (i.e. the "calls from" score)
#If there are no references, return 0
def func_callers_weight(f):
fc = 0
fs = 0
for xref in basicutils.FuncXrefsFrom(f):
dist = abs(xref - f)
#print "%08x: %08x %d " % (f, xref, dist),
if dist > MAX_CALL:
continue
if (dist != 0):
logdist = math.log(dist)
else: #recursive function call
logdist = 0
if (xref - f < 0):
o = -logdist
else:
o = logdist
#print " %f " % o,
fs += o
fc += 1
if fc == 0:
score = 0
else:
score = fs / fc
return score
#func_callee_weight(f):
#Return the LFA score for calls where this function is the "callee" (i.e. the "calls to" score)
#If there are no references, return 0
def func_callee_weight(f):
fc = 0
fs = 0
a = 0
for xref in basicutils.CodeRefsTo(f):
dist = abs(xref - f)
#print "%08x: %08x %d " % (f, xref, dist),
if dist > MAX_CALL:
continue
if (dist != 0):
logdist = math.log(dist)
else: #recursive function call
logdist = 0
if (xref - f < 0):
o = -logdist
else:
o = logdist
#print " %f " % o,
fs += o
fc += 1
if fc == 0:
score = 0
else:
score = fs / fc
return score
#func_call_weight(start,end):
#Iterate over each function in the range and calculated the LFA scores
# If both scores are 0, skip the function altogether, exclude it from the list
# If one score is 0, interpolate that score from the previous score
def func_call_weight(f_start, f_end):
global g_function_list
c = 1
f = f_start
fe = f_end
if f==0:
f = basicutils.NextFunction(0)
f_end = basicutils.BADADDR
prevscore = 0
prevscore_1 = 0
prevscore_2 = 0
z1 = 0
z2 = 0
#for each function in range
while (f < fe):
#get both LFA scores for the function
score_1 = func_callers_weight(f)
score_2 = func_callee_weight(f)
#if both scores are 0 (i.e. no references for the function or all refs are above the threshold)
#then skip the function altogether
if (score_1 == 0) and (score_2 == 0):
#print("Skipping 0x%08x\n" % f)
prevscore_1 = 0
prevscore_2 = 0
z1 = 1
z2 = 1
finf = module.func_info(f,0,0)
finf.lfa_skip=1
g_function_list.append(finf)
f = basicutils.NextFunction(f)
continue
#if 1st or 2nd score is zero, interpolate using previous score and an assumed negative linear slope
#otherwise use the score
if (score_1 == 0):
score_1 = prevscore_1 - z1 * .4
z1 += 1
else:
prevscore_1 = score_1
z1 = 1
if (score_2 == 0):
score_2 = prevscore_2 - z2 * .4
z2 += 1
else:
prevscore_2 = score_2
z2 = 1
total_score = score_1 + score_2
#Output scores in log window
#print("0x%08x, %d , %f, %f, %f" % (f, c,score_1, score_2, total_score))
#Add scores to the global function score list
finf = module.func_info(f,score_1,score_2)
finf.lfa_skip=0
g_function_list.append(finf)
line = "0x%08x, %d , %f, %f, %f\n" % (f,c,score_1, score_2, total_score)
f=basicutils.NextFunction(f)
c+=1
#get_last _three and get_lfa_start:
#Previously LFA would just skip functions if they had no caller or callee score
#it would effectively drop them. This meant that when doing edge detection we
#knew every function in the function list had a score. Now we're putting all
#functions in the function list, and we have a "skip" field if LFA should skip it
#for scoring purposes. So these functions help parse that skip field, since for
#edge detection we look at the previous three scores.
def get_last_three(index):
c=0
i = index-1
p=[]
while ((c<3) and (i>0)):
#print "get_last_3: %d,%d" % (c,i)
if (g_function_list[i].lfa_skip == 0):
p.append(g_function_list[i])
c+=1
i-=1
if (c==3):
return p[0],p[1],p[2]
else:
print("Error: could not find 3 scored entries before index: %d (%d,%d)" % (index, i, c))
return 0,0,0
def get_lfa_start():
c=0;
i=0;
while (c < 4):
#print "get_lfa_start: %d,%d" % (c,i)
if (g_function_list[i].lfa_skip==0):
c+=1
i+=1
return i
#edge_detect():
# Determine boundaries between object files
# Edge condition is a delta of at least 2 where the current score is positive
# and 2 of the last 3 scores were negative (negative trend)
def edge_detect():
global g_function_list
global g_module_list
#For published research
EDGE_THRESHOLD = 2
c=get_lfa_start()
#do edge detection
while (c<len(g_function_list)):
if (g_function_list[c].lfa_skip == 0):
f_1,f_2,f_3 = get_last_three(c)
p_1 = f_1.total_score
p_2 = f_2.total_score
p_3 = f_3.total_score
s = g_function_list[c].total_score
#if score is positive and it is diff of at least 2 from previous
#and the previous function was not an edge
if ((not f_1.edge[0] == 1) and (s > 0) and ((s - p_1) > EDGE_THRESHOLD)):
#if 2 of last 3 were negative
m = sorted([p_1,p_2,p_3])
if (m[1] < 0):
g_function_list[c].edge[0]=1
c+=1
#assign modules based on where the edges are
c=0
mod_start = g_function_list[0].loc
while(c<len(g_function_list)):
f = g_function_list[c]
if (f.edge[0] == 1):
#change from previous code, this will make the modules contiguous
b_mod = module.bin_module(mod_start,f.loc-1,0,"")
mod_start = f.loc #set the start of the next module to this function (where edge was detected)
g_module_list.append(b_mod)
c+=1
#Main entry point - returns an LFA module list and a global function list (with the LFA module edges marked)
def analyze():
global g_function_list
global g_module_list
#Define range to analyze
#just do .text segment if we've got one
#otherwise just start from the first function in DB
start,end = basicutils.SegByName(".text")
if (start == basicutils.BADADDR):
start = basicutils.NextFunction(0)
end = basicutils.BADADDR
#Calculate LFA score for all functions
func_call_weight(start,end)
#Detect edges - object file boundaries
edge_detect()
return g_function_list, g_module_list

View File

@@ -1,343 +0,0 @@
#!/usr/bin/python
##############################################################################################
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
import sys
#Syntax: map_read.py <ground truth .map file> <LFA produced .map file>
#Reads the two map files and outputs a score
#Score is % overlap, % underlap, and % gap (the sum of which should be 100%)
#Raw list of modules
g_mod_list1 = []
g_mod_list2 = []
#"Reconciled" module list - after modules have been combined to represent best alignment
g_rec_list1 = []
g_rec_list2 = []
#name
#offset - starting address of the module
#mlen - length of the module
#reach - end address of the module (offset + mlen)
#gap - when collapsing two modules,
class bin_mod:
def __init__(self, n, o, ml):
self.name = n
self.offset = o
self.mlen = ml
self.reach = o+ml
self.gap = 0
#map_parse(function, mlist):
#Parse a gcc/ld formatted .map file
# (mlist == 1): ground truth map, saved to g_mod_list1
# (mlist == 2): LFA map, saved to g_mod_list2
def map_parse(f,mlist):
global g_mod_list1
global g_mod_list2
line = f.readline()
prev_name = ""
while (line != ""):
#print "line %s" % line
if (not line.startswith(" .text") or (len(line) < 17)):
line = f.readline()
continue
#line wrap case
if not ((line[16] == '0') and (line[17] == 'x')):
seg = line.strip()
line = f.readline()
else:
seg = line[0:15].strip()
offset = int(line[16:34],16)
mlen = int(line[35:45].strip(),16)
name = line[46:].strip()
#print "%s\n%s\n%s\n%s\n"% (line[0:15],line[16:33],line[34:45],line[46:])
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,offset,mlen,name)
if (offset == 0) or (mlen == 0):
line = f.readline()
continue
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,offset,mlen,name)
if (name == prev_name):
#print "Combining"
if (mlist == 1):
new_reach = offset+mlen
begin = g_mod_list1[-1].offset
new_len = new_reach-begin
g_mod_list1[-1].mlen = new_len
g_mod_list1[-1].reach = new_reach
else:
new_reach = offset+mlen
begin = g_mod_list2[-1].offset
new_len = new_reach-begin
g_mod_list2[-1].mlen = new_len
g_mod_list2[-1].reach = new_reach
#print "Seg: %s Offset: %x Len: %x Name: %s" % (seg,begin,new_len,name)
else:
bm = bin_mod(name,offset,mlen)
if (mlist == 1):
g_mod_list1.append(bm)
else:
g_mod_list2.append(bm)
#read next line
line = f.readline()
prev_name = name
#map_print():
#Print both ground truth and LFA map output
def map_print(n):
if (n==1):
print("Map 1 (ground truth):")
mod_list = g_mod_list1
else:
print("Map 2:")
mod_list = g_mod_list2
print("# of modules: %d" % len(mod_list))
for x in range(len(mod_list)):
print("Name: %s Offset: %x Len: %x" % (mod_list[x].name,mod_list[x].offset,mod_list[x].mlen))
#score_underlap(module1,module2):
#opposite of overlap - actually "disjoint areas" might be more accurate
#For the purposes of scoring this is the area of m1 that m2 doesn't cover
#to ensure that the underlap does not get counted twice
def score_underlap(m1,m2):
#Assume that the m1s are contiguous (from .map files)
#Only measure the portion of this m1 that the m2 doesn't cover
#This ensures that disjoint areas don't get counted twice
m2_upper = max(m1.offset,m2.offset)
m2_lower = min(m1.reach, m2.reach)
ul = abs (m1.offset - m2_upper)
ul += abs (m1.reach - m2_lower)
return ul
#mod_underlap(m1,m2):
#Like score underlap but this is a simpler calculation for use with module list reconciliation
def mod_underlap(m1,m2):
ul = abs (m1.offset - m2.offset)
ul += abs (m1.reach - m2.reach)
return ul
#mod_collapse(module1,module2):
#Return a module object that is the combination of the two modules
#Does not update either of the global module lists
def mod_collapse(m1,m2):
nname = m1.name + "_and_" + m2.name
noffset = min(m1.offset,m2.offset)
nr = max(m1.reach,m2.reach)
nlen = nr - noffset
cm = bin_mod(nname, noffset, nlen)
cm.gap = m1.gap
cm.gap += m2.gap
#will work regardless of module order,
#the correct one will be positive, the wrong one negative
cm.gap += max(m2.offset - m1.reach, m1.offset - m2.reach)
return cm
#mod_print(m):
#Print a single module
def mod_print(m):
#print "%s: %08x - %08x" % (m.name,m.offset,m.reach),
print("%08x - %08x" % (m.offset,m.reach), end=' ')
if (m.gap != 0):
print(" gap: %x" % m.gap, end=' ')
#rec_list_print():
#Print side by side the reconciled module lists
def rec_list_print():
i1 = len(g_rec_list1)
i2 = len(g_rec_list2)
if (i1 != i2):
print("Error: List lengths don't match, not fully reconciled (%d and %d)." % (i1,i2))
return
for i in range(i1):
mod_print(g_rec_list1[i])
mod_print(g_rec_list2[i])
print("u: %x" % (score_underlap(g_rec_list1[i],g_rec_list2[i])))
#final_score():
#Determine the scores by iterating through the reconciled module lists
#and tallying underlap areas and gap areas
def final_score():
start = min(g_rec_list1[0].offset,g_rec_list2[0].offset)
end = max(g_rec_list1[-1].reach,g_rec_list2[-1].reach)
i1 = len(g_rec_list1)
i2 = len(g_rec_list2)
if (i1 != i2):
print("Error: List lengths don't match, not fully reconciled (%d and %d)." % (i1,i2))
return
s=0
g=0
for i in range(0,i1):
s+=score_underlap(g_rec_list1[i],g_rec_list2[i])
#only count gaps from the "compare" map file (the one we generate with LFA)
g+=g_rec_list2[i].gap
#Area of overlap - total area - (underlaps + gaps)
good_area = (end-start) - (s+g)
print("Length: 0x%x Good: 0x%x (%2f) Underlap: 0x%x (%2f) Gaps: 0x%x (%2f)" % (end-start,good_area, good_area*100.0/(end-start),s,s*100.0/(end-start),g,g*100.0/(end-start)))
return (s+g)/1.0/(end-start)
#map_reconcile():
#Attempt to combine modules in either list to make the maps more similar
#When combining modules, keep track of gaps between the modules so we can account for that in the overall score
#This might seem like cheating, but here's why it's not:
# - we want to give the algorithm credit if it finds a couple of clusters of functionality within a .o file
# (i.e. it says one .o file is really 2 or 3 .o files)
# - we want to give the algorithm credit if it says nearby .o files are so inter-related that they are essentially one
# (i.e. it says that 2 or 3 adjacent .o files are really one .o file
#
#I'm definitely open to suggestions on better ways to do this
def map_reconcile():
i1 = 0
i2 = 0
while (i1 < len(g_mod_list1)) and (i2 < len(g_mod_list2)):
m1 = g_mod_list1[i1]
m2 = g_mod_list2[i2]
#"reach" - aka the end of the current modules under consideration
m1r = m1.reach
m2r = m2.reach
#current underlap
po = mod_underlap(m1,m2)
pc = 0x10000000000
print(" m1 (%d): " % i1, end=' ')
mod_print(m1)
print(" m2 (%d): " % i2, end=' ')
mod_print(m2)
print(" underlap: %x" % (po))
d=0
#module 1 is longer than module 2, so attempt to collapse modules in list 2 to optimize
if (m1r > m2r):
nm2 = g_mod_list2[i2]
#add/collapse m2 modules, but check to see if makes it better
while (d == 0) and (i2+1 < len(g_mod_list2)):
pnm2 = nm2
nm2 = mod_collapse(nm2,g_mod_list2[i2+1])
pc = mod_underlap(m1, nm2)
print("nm2 (%d): (%x)" % (i2+1,pc), end=' ')
mod_print(nm2)
print("")
if (pc < po):
po = pc
i2+=1
else:
d=1
print("Collapsed m2 (%d): " % i2, end=' ')
mod_print(pnm2)
print("")
#add final collapsed modules to reconciled list
g_rec_list1.append(m1)
g_rec_list2.append(pnm2)
#module 2 is longer than module 1, so attempt to collapse modules in list 1 to optimize
else:
nm1 = g_mod_list1[i1]
while (d==0) and (i1+1 < len(g_mod_list1)):
pnm1 = nm1
nm1 = mod_collapse(nm1,g_mod_list1[i1+1])
pc = mod_underlap(nm1, m2)
print("nm1 (%d): (%x)" % (i1 + 1, pc), end=' ')
mod_print(nm1)
print("")
if (pc < po):
po = pc
i1 += 1
else:
d=1
print("Collapsed m1 (%d): " % i1, end=' ')
mod_print(pnm1)
print("")
g_rec_list1.append(pnm1)
g_rec_list2.append(m2)
i1+=1
i2+=1
print("")
#end case
#if we've got one module left on either side,
#collapse all the other modules on the other side to match
if (i1 == len(g_mod_list1)-1):
m1 = g_mod_list1[i1]
print("end m1 (%d):" % (i1), end=' ')
mod_print(m1)
print("")
nm2 = g_mod_list2[i2]
i2 += 1
while (i2 < len(g_mod_list2)):
nm2 = mod_collapse(nm2,g_mod_list2[i2])
print("end nm2 (%d):" % (i2), end=' ')
mod_print(nm2)
print("")
i2 += 1
g_rec_list1.append(m1)
g_rec_list2.append(nm2)
if (i2 == len(g_mod_list2)-1):
m2 = g_mod_list2[i2]
print("end m2 (%d):" % (i2), end=' ')
mod_print(m2)
print("")
nm1 = g_mod_list1[i1]
i1 += 1
while (i1 < len(g_mod_list1)):
nm1 = mod_collapse(nm1,g_mod_list1[i1])
print("end nm1 (%d):" % (i1), end=' ')
mod_print(nm1)
print("")
i1 += 1
g_rec_list1.append(nm1)
g_rec_list2.append(m2)
#"ground truth" map file
f = open(sys.argv[1], 'r')
map_parse(f,1)
#map file to compare
f2 = open(sys.argv[2], 'r')
map_parse(f2,2)
map_print(1)
map_print(2)
#"Reconcile" maps to make them more similar - see comment above for why we do this
map_reconcile()
#Print reconciled map
rec_list_print()
#Print score
print("Score: %f" % (final_score()))
f.close()
f2.close()

View File

@@ -1,180 +0,0 @@
##############################################################################################
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
###############################################################
### Object File Boundary Detection in IDA Pro with MaxCut ###
###############################################################
import snap
import sys
import snap_cg
import module
g_maxcut_modlist = []
#make_subgraph()
#returns a Snap subgraph for just the address region specified
#(i.e. the subgraph will not have any edges that originate outside the region
#or terminate outside the region)
def make_subgraph(region_start,region_end, graph):
print("make_subgraph: start: 0x%x and end: 0x%x" % (region_start,region_end))
NIdV = snap.TIntV()
#this would be much faster if we had a linear list of functions (nodes)
for Node in graph.Nodes():
start = Node.GetId()
if (start >= region_start) and (start <= region_end):
NIdV.Add(start)
if (start > region_end):
break
return snap.GetSubGraph(graph, NIdV)
#make_cut()
#This function analyzes the region specified and returns the cut address for the address with the
#maximum score, i.e. the address that has the highest average distance call length of function calls
#that go across the address. If multiple addresses with zero calls are found (inf. score) the one
#closest to the middle of the region is returned.
def make_cut(region_start, region_end, graph):
print("make_cut: start: 0x%x end: 0x%x" % (region_start,region_end))
weight = {}
z = 0
zeroes = []
for Node in graph.Nodes():
start = Node.GetId()
#iterate only over nodes in this region
cut_address = start - 1
if cut_address < region_start:
continue
weight[cut_address] = 0
edge_count = 0
for Edge in graph.Edges():
edge_start = Edge.GetSrcNId()
edge_end = Edge.GetDstNId()
#only look at edges that cross the possible cut address
#handle both cases for the directed graph
if (edge_start < cut_address and edge_end > cut_address) or (edge_end < cut_address and edge_start > cut_address):
#print " cut %x, %x to %x cross" % (cut_address,edge_start,edge_end)
weight[cut_address] += abs(edge_end - edge_start)
edge_count +=1
#If we have a place where we have no edges crossing - keep track of it
#We will pick the place closest to the center of the module
if edge_count == 0:
print(" returning 0 weight count at: 0x%0x" % cut_address)
z+=1
zeroes.append(cut_address)
weight[cut_address] = 0
else:
weight[cut_address] = weight[cut_address]/ edge_count
#print "w: %x: %x" % (cut_address, weight[cut_address])
#if we had edges with zero crossings, pick the one closest to the center
if (z > 0):
print(" total of %d zero weight counts" % (z))
center = region_start + ((region_end-region_start)/2)
min_dist = sys.maxsize
for i in range(z):
dist = abs(center - zeroes[i])
if dist < min_dist:
min_dist = dist
min_zero = zeroes[i]
print(" returning zero cut at addr: %x" % min_zero)
return min_zero
#otherwise pick the edge with the maximum weight score
max_weight=0
#print " weight table:"
for addr,w in weight.items():
#print " %x: %x" % (addr,w)
if w > max_weight:
max_addr = addr
max_weight = w
print(" returning max weight: %f at addr: 0x%x" % (max_weight,max_addr))
return max_addr
#do_cutting()
#This is the main recursive algorithm for MaxCut
#Find a cut address, split the graph into two subgraphs, and recurse on those subgraphs
#Stop if the area being cut is below a particular threshold
def do_cutting(start, end, graph):
nodes = graph.GetNodes()
print("do_cutting: start: 0x%x end: 0x%x nodes: 0x%x" % (start, end, nodes))
THRESHOLD = 0x1000
#THRESHOLD = 0x2000
if (end - start > THRESHOLD) and (nodes > 1):
cut_address = make_cut(start, end,graph)
graph1 = make_subgraph(start,cut_address,graph)
graph2 = make_subgraph(cut_address+1,end,graph)
do_cutting(start,cut_address,graph1)
do_cutting(cut_address+1,end,graph2)
else:
print("Module 0x%x to 0x%x" % (start, end))
b_mod = module.bin_module(start,end,0,"")
g_maxcut_modlist.append(b_mod)
#func_list_annotate()
#This function copies our list of modules into the function list
#This allows us to have a single function list with modules from multiple algorithms (LFA and MaxCut)
def func_list_annotate(flist):
c=0
m=0
while (m < len(g_maxcut_modlist)):
start = g_maxcut_modlist[m].start
while (flist[c].loc < start):
#print "F: %08x M: %08x" % (flist[c].loc, start)
c+=1
if (c == len(flist)):
print("Error: Maxcut module list does not reconcile with function list")
return None
flist[c].edge[1]=1
#print "MC: Set %08x func edge to 1" % flist[c].loc
m+=1
return flist
#Main entry point
#Returns a global function list (annotated with MaxCut edges) and a global module list
def analyze(flist):
sys.setrecursionlimit(5000)
UGraph = snap_cg.create_snap_cg()
g_min_node=sys.maxsize
g_max_node=0
for Node in UGraph.Nodes():
id = Node.GetId()
if id < g_min_node:
g_min_node = id
if id > g_max_node:
g_max_node = id
do_cutting(g_min_node,g_max_node, UGraph)
r_flist = func_list_annotate(flist)
return r_flist,g_maxcut_modlist

View File

@@ -1,309 +0,0 @@
##############################################################################################
# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
IDA_VERSION = 7
if (IDA_VERSION < 7):
import idc
import struct
import idautils
import basicutils_6x as basicutils
else:
import ida_idaapi
import ida_idc
import ida_funcs
import ida_nalt
import ida_segment
import idautils
import basicutils_7x as basicutils
import math
import nltk
import nltk.collocations
import re
### NLP Section ###
# This section of code attempts to name the modules based on common strings in the string references
# Not really based on any sound science or anything - your mileage may heavily vary. :-D
#string_range_tokenize(start,end,sep):
#Compile all string references between start and end as a list of strings (called "tokens")
# <sep> should be a nonsense word, and will show up in the list
def string_range_tokenize(start,end,sep):
# get all string references in this range concatenated into a single string
t = basicutils.CompileTextFromRange(start,end,sep)
#Enable this if you already have a bunch of function names and want to include that in the mix
#t+= basicutils.CompileFuncNamesFromRangeAsText(start,end,sep)
#print "string_range_tokenize: raw text:"
#print t
#remove printf/sprintf format strings
tc = re.sub("%[0-9A-Za-z]+"," ",t)
#convert dash to underscore
tc = re.sub("-","_",tc)
#replace _ and / with space - may want to turn this off sometimes
#this will break up snake case and paths
#problem is that if you have a path that is used throughout the binary it will probably dominate results
tc = re.sub("_"," ",tc)
#replace / and \\ with a space
tc = re.sub("[/\\\\]"," ",tc)
#remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _
tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc)
#lowercase it - and store this as the original set of tokens to work with
tokens = [tk.lower() for tk in tc.split()]
#remove English stop words
#this is the list from the MIT *bow project
eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"}
#remove "code" stop words
#e.g. common words in debugging strings
code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"}
stopw = eng_stopw.union(code_sw)
c = 0
tokens_f = []
for t in tokens:
if t not in stopw:
tokens_f.append(t)
return tokens_f
#bracket_strings(start,end,b_brack,e_brack):
#Return the most common string in the range <star,end> that begins with b_brack and ends with e_brack
# The count of how many times this string appeared is also returned
#I find somewhat often people format debug strings like "[MOD_NAME] Function X did Y!"
#This function is called by guess_module_names() - if you see this format with different brackets
#you can edit that call
def bracket_strings(start,end,b_brack,e_brack):
sep = "tzvlw"
t = basicutils.CompileTextFromRange(start,end,sep)
tokens = [tk.lower() for tk in t.split(sep)]
b=[]
for tk in tokens:
tk = tk.strip()
if tk.startswith(b_brack) :
b_contents = tk[1:tk.find(e_brack)]
#Hack to get rid of [-],[+],[*] - could also try to remove non alpha
if (len(b_contents) > 3):
#Hack for debug prints that started with [0x%x]
if (b_contents != "0x%x"):
b.append(tk[1:tk.find(e_brack)])
print("bracket_strings tokens:")
print(tokens)
print(b)
u_gram=""
u_gram_score=0
if (len(b) > 0):
f = nltk.FreqDist(b)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
return (u_gram,u_gram_score)
#source_file_strings(start,end):
#Return the most common string that looks like a source file name in the given range
# The count of how many times this string appeared is also returned
def source_file_strings(start,end):
sep = "tzvlw"
t = basicutils.CompileTextFromRange(start,end,sep)
#normally would do lower here to normalize but we lose camel case that way
tokens = [tk for tk in t.split(sep)]
#for each string, remove quotes and commas, then tokenize based on spaces to generate the final list
tokens2=[]
for tk in tokens:
tk = tk.strip()
#strip punctuation, need to leave in _ for filenames and / and \ for paths
tk = re.sub("[\"\'\,]"," ",tk)
for tk2 in tk.split(" "):
tokens2.append(tk2)
b=[]
for tk in tokens2:
tk = tk.strip()
if tk.endswith(".c") or tk.endswith(".cpp") or tk.endswith(".cc"):
#If there's a dir path, only use the end filename
#This could be tweaked if the directory structure is part of the software architecture
#e.g. if there are multiple source directories with meaningful names
if tk.rfind("/") != -1:
ntk = tk[tk.rfind("/")+1:]
elif tk.rfind("\\") != -1:
ntk = tk[tk.rfind("\\")+1:]
else:
ntk = tk
b.append(ntk)
print("source_file_strings tokens:")
#print tokens
print(b)
#a better way to do this (if there are multiple)
#would be to sort, uniquify, and then make the name foo.c_and_bar.c
u_gram=""
u_gram_score=0
if (len(b) > 0):
f = nltk.FreqDist(b)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
return (u_gram,u_gram_score)
#common_strings(start,end):
#Return a list of the common strings in the given range
#Uses NLTK to generate a list of unigrams, bigrams, and trigrams (1 word, 2 word phrase, 3 word phrase)
#If the trigram score > 1/2 * bigram score, the most common trigram is used
#If the bigram score > 1/2 * unigram score, the most common bigram is used
#Otherwise the most common unigram (single word is used)
def common_strings(start,end):
CS_THRESHOLD = 6
sep = "tvlwz"
tokens = string_range_tokenize(start,end,sep)
#make a copy since we're going to edit it
u_tokens = tokens
c=0
while (c<len(u_tokens)):
if u_tokens[c] == sep:
del u_tokens[c]
else:
c+=1
print("common_strings tokens:")
print(tokens)
if len(u_tokens) < CS_THRESHOLD:
#print "%08x - %08x : %s" % (start,end,"no string")
return ("",0)
f = nltk.FreqDist(u_tokens)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
#print "Tokens:"
#print tokens
#print len(tokens)
bgs = list(nltk.bigrams(tokens))
c=0
while (c<len(bgs)):
if sep in bgs[c]:
del bgs[c]
else:
c+=1
#print "Bigrams:"
#print bgs
if (len(bgs) != 0):
fs = nltk.FreqDist(bgs)
b_gram = fs.most_common(1)[0][0]
#print "Most Common:"
#print b_gram
b_str = b_gram[0] + "_" + b_gram[1]
b_gram_score = fs.most_common(1)[0][1]
else:
b_str =""
b_gram_score = 0
tgs = list(nltk.trigrams(tokens))
c=0
while (c<len(tgs)):
if sep in tgs[c]:
del tgs[c]
else:
c+=1
#print "Trigrams:"
#print tgs
if (len(tgs) != 0):
ft = nltk.FreqDist(tgs)
t_gram = ft.most_common(1)[0][0]
t_str = t_gram[0] + "_" + t_gram[1] + "_" + t_gram[2]
t_gram_score = ft.most_common(1)[0][1]
else:
t_str = ""
t_gram_score = 0
#print "1: %s - %d 2: %s - %d 3: %s - %d\n" % (u_gram,u_gram_score,b_str,b_gram_score,t_str,t_gram_score)
if (b_gram_score * 2 >= u_gram_score):
if (t_gram_score * 2 >= b_gram_score):
ret = t_str
ret_s = t_gram_score
else:
ret = b_str
ret_s = b_gram_score
else:
ret = u_gram
ret_s = u_gram_score
#print "%08x - %08x : %s" % (start,end,ret)
return (ret,ret_s)
### End of NLP Section ###
#guess_module_names():
#Use the NLP section (above) to guess the names of modules and add them to the global module list
#Attempts to find common bracket strings (e.g. "[MOD_NAME] Debug print!")
#then source file names (most often left over from calls to assert())
#then common trigram/bigram/unigrams
#You can tweak the switchover thresholds below.
def guess_module_names(module_list):
#idea - make score threshold based on the size of the module
# (e.g. smaller modules should have a smaller threshold
C_SCORE_THRESHOLD = 3
S_SCORE_THRESHOLD = 1
B_SCORE_THRESHOLD = 1
c=0
unk_mod=0
while (c<len(module_list)):
m = module_list[c]
# first look for strings that start with [FOO], (bracket strings)
# then look for strings that contain source files (.c,.cpp,etc.)
# then try common strings
# above thresholds can be tweaked - they represent the number of strings that have to be repeated
# in order to use that string as the module name
(name,scr) = bracket_strings(m.start,m.end,"[","]")
if (scr < B_SCORE_THRESHOLD):
(name,scr) = source_file_strings(m.start,m.end)
if (scr < S_SCORE_THRESHOLD):
(name,scr) = common_strings(m.start,m.end)
if (scr < C_SCORE_THRESHOLD):
#Couldn't come up with a name so name it umod1, umod2, etc.
name = "umod%d" % (unk_mod)
#"word cloud" or something to get an idea of what the module is
#print basicutils.CompileTextFromRange(m.start,m.end," ")
unk_mod+=1
module_list[c].name = name
module_list[c].score = scr
print("%08x - %08x : %s (%d)" % (m.start,m.end,name,scr))
c+=1
return module_list

View File

@@ -1,52 +0,0 @@
##############################################################################################
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
#This represents the information we want to record about an individual function
#The function lists returned by LFA and MaxCut are made up of these
class func_info():
def __init__(self,loc,score1,score2):
self.loc = loc #the effective address of the function
self.score1=score1 #"Calls from" local function affinity score
self.score2=score2 #"Calls to" local function affinity score
self.total_score=score1+score2
self.lfa_skip=0 #Set to 1 if "skipped" (not scored) by LFA
self.edge=[0,0] #Set by edge_detect() - if 1, this is the start of a new module
#index 0 for LFA, 1 for MaxCut
def __repr__(self):
return "Function: 0x%08x" % (self.loc)
def __str__(self):
return self.__repr__()
#This represents the object files (aka modules) identified by LFA and MaxCut
class bin_module():
def __init__(self,start,end,score,name):
self.start=start
self.end=end
self.score=score #Currently unused
self.name=name
def __repr__(self):
line = "Module at 0x%08x:0x%08x" % (self.start, self.end)
if self.name != "" and self.name is not None:
line += " (name %s)" % self.name
return line
def __str__(self):
return self.__repr__()

View File

@@ -1,67 +0,0 @@
##############################################################################################
# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
## This code creates a Snap PNGraph object that represents the call graph of a binary
## (the .text section)
import snap
import sys
import idc
import struct
import idautils
import basicutils_7x as basicutils
MAX_DIST = 0
UGraph = []
def add_edge(f, t):
global UGraph
n = basicutils.GetFunctionName(f)
if n != "":
#since we're only doing one edge for each xref, we'll do weight based on distance from the middle of the caller to the callee
f_start = idc.get_func_attr(f, idc.FUNCATTR_START)
if (not UGraph.IsNode(f_start)):
print("Error: had to add node (to): %08x" % f_start)
UGraph.AddNode(f_start)
print("%08x -> %08x" % (f_start, t))
UGraph.AddEdge(t,f_start)
#print "s_%#x -> s_%#x" % (f_start,t)," [len = ",get_weight(func_mid, t), "]"
def add_node(f):
basicutils.ForEveryXrefToD(f, add_edge)
def create_snap_cg():
global UGraph
UGraph= snap.PNGraph.New()
#Add every function linearly, this makes sure the nodes are in order
basicutils.ForEveryFuncInSeg(".text",UGraph.AddNode)
basicutils.ForEveryFuncInSeg(".text",add_node)
for NI in UGraph.Nodes():
print("node id 0x%x with out-degree %d and in-degree %d" %(
NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()))
return UGraph

View File

@@ -1,15 +0,0 @@
The "data" directory is intended to hold data files that will be used by this module and will
not end up in the .jar file, but will be present in the zip or tar file. Typically, data
files are placed here rather than in the resources directory if the user may need to edit them.
An optional data/languages directory can exist for the purpose of containing various Sleigh language
specification files and importer opinion files.
The data/buildLanguage.xml is used for building the contents of the data/languages directory.
The skel language definition has been commented-out within the skel.ldefs file so that the
skeleton language does not show-up within Ghidra.
See the Sleigh language documentation (docs/languages/index.html) for details Sleigh language
specification syntax.

View File

@@ -1,348 +0,0 @@
##############################################################################################
# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
#
# This material is based upon work supported by the Defense Advanced Research
# Projects Agency (DARPA) and Naval Information Warfare Center Pacific (NIWC Pacific)
# under Contract Number N66001-20-C-4024.
#
import sys
import math
import nltk
import nltk.collocations
import re
#uncomment "print" to get debug prints
def debug_print(x):
#print(x)
return
### NLP Section ###
# This section of code attempts to name the modules based on common strings in the string references
# Not really based on any sound science or anything - your mileage may heavily vary. :-D
#string_range_tokenize(t):
#Take a long string and convert it into a list of tokens. If using a separator, this will appear in the token list
def string_range_tokenize(t):
#print "string_range_tokenize: raw text:"
#print t
#remove printf/sprintf format strings
#tc = re.sub("%[0-9A-Za-z]+"," ",t)
#convert dash to underscore
#tc = re.sub("-","_",tc)
#replace _ and / with space - may want to turn this off sometimes
#this will break up snake case and paths
#problem is that if you have a path that is used throughout the binary it will probably dominate results
#tc = re.sub("_"," ",tc)
#replace / and \\ with a space
#tc = re.sub("[/\\\\]"," ",tc)
#remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _
#tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc)
#lowercase it - and store this as the original set of tokens to work with
tokens = [tk.lower() for tk in t.split()]
#remove English stop words
#this is the list from the MIT *bow project
eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"}
#remove "code" stop words
#e.g. common words in debugging strings
code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"}
#remove code stop words (from Joxean Koret's "IDAMagicStrings")
jk_sw = {"copyright", "char", "bool", "int", "unsigned", "long",
"double", "float", "signed", "license", "version", "cannot", "error",
"invalid", "null", "warning", "general", "argument", "written", "report",
"failed", "assert", "object", "integer", "unknown", "localhost", "native",
"memory", "system", "write", "read", "open", "close", "help", "exit", "test",
"return", "libs", "home", "ambiguous", "internal", "request", "inserting",
"deleting", "removing", "updating", "adding", "assertion", "flags",
"overflow", "enabled", "disabled", "enable", "disable", "virtual", "client",
"server", "switch", "while", "offset", "abort", "panic", "static", "updated",
"pointer", "reason", "month", "year", "week", "hour", "minute", "second",
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
'september', 'october', 'november', 'december', "arguments", "corrupt",
"corrupted", "default", "success", "expecting", "missing", "phrase",
"unrecognized", "undefined"}
stopw = eng_stopw.union(code_sw)
stopw = stopw.union(jk_sw)
c = 0
tokens_f = []
for t in tokens:
if t not in stopw:
tokens_f.append(t)
return tokens_f
#bracket_strings(t,b_brack,e_brack):
#Return the most common string in the text that begins with b_brack and ends with e_brack
# The count of how many times this string appeared is also returned
#I find somewhat often people format debug strings like "[MOD_NAME] Function X did Y!"
#This function is called by guess_module_names() - if you see this format with different brackets
#you can edit that call
def bracket_strings(t, b_brack,e_brack, sep):
#sep = "tzvlw"
#t = basicutils.CompileTextFromRange(start,end,sep)
tokens = [tk.lower() for tk in t.split(sep)]
#don't want to use tokenize here because it removes brackets
b=[]
for tk in tokens:
tk = tk.strip()
if tk.startswith(b_brack) :
b_contents = tk[1:tk.find(e_brack)]
#print("found bracket string, content: %s" % b_contents)
#Hack to get rid of [-],[+],[*] - could also try to remove non alpha
if (len(b_contents) > 3):
#Hack for debug prints that started with [0x%x]
if (b_contents != "0x%x"):
b.append(b_contents)
debug_print("bracket_strings tokens:")
debug_print(tokens)
debug_print(b)
u_gram=""
u_gram_score=0
if (len(b) > 0):
f = nltk.FreqDist(b)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
return (u_gram,u_gram_score)
#is_source_file_str(f):
#return True if the file string ends with one of the source file extensions
#This uses structure borrowed from Joxean Koret's IDAMagicStrings
LANGS = {}
LANGS["C/C++"] = ["c", "cc", "cxx", "cpp", "h", "hpp"]
LANGS["C"] = ["c"]
LANGS["C++"] = ["cc", "cxx", "cpp", "hpp", "c++"]
LANGS["Obj-C"] = ["m"]
LANGS["Rust"] = ["rs"]
LANGS["Golang"] = ["go"]
LANGS["OCaml"] = ["ml"]
def is_source_file_str(f):
for key in LANGS:
for ext in LANGS[key]:
if f.endswith("." + ext):
return True
return False
#source_file_strings(start,end):
#Return the most common string that looks like a source file name in the given text string
# The count of how many times this string appeared is also returned
def source_file_strings(t,sep):
#sep = "tzvlw"
#t = basicutils.CompileTextFromRange(start,end,sep)
#normally would do lower here to normalize but we lose camel case that way
tokens = [tk for tk in t.split(sep)]
#for each string, remove quotes and commas, then tokenize based on spaces to generate the final list
tokens2=[]
for tk in tokens:
tk = tk.strip()
#strip punctuation, need to leave in _ for filenames and / and \ for paths
tk = re.sub("[\"\'\,]"," ",tk)
for tk2 in tk.split(" "):
tokens2.append(tk2)
debug_print("source_file_strings tokens2:")
debug_print(tokens2)
b=[]
for tk in tokens2:
tk = tk.strip()
if is_source_file_str(tk):
#If there's a dir path, only use the end filename
#This could be tweaked if the directory structure is part of the software architecture
#e.g. if there are multiple source directories with meaningful names
if tk.rfind("/") != -1:
ntk = tk[tk.rfind("/")+1:]
elif tk.rfind("\\") != -1:
ntk = tk[tk.rfind("\\")+1:]
else:
ntk = tk
b.append(ntk)
debug_print("source_file_strings tokens:")
debug_print(tokens)
debug_print(b)
#a better way to do this (if there are multiple)
#would be to sort, uniquify, and then make the name foo.c_and_bar.c
u_gram=""
u_gram_score=0
if (len(b) > 0):
f = nltk.FreqDist(b)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
return (u_gram,u_gram_score)
#common_strings(t, sep):
#Return a list of the common strings in the string "t" - lines separated by "sep"
#Uses NLTK to generate a list of unigrams, bigrams, and trigrams (1 word, 2 word phrase, 3 word phrase)
#If the trigram score > 1/2 * bigram score, the most common trigram is used
#If the bigram score > 1/2 * unigram score, the most common bigram is used
#Otherwise the most common unigram (single word is used)
def common_strings(t,sep):
CS_THRESHOLD = 6
tokens = string_range_tokenize(t)
#make a copy since we're going to edit it
u_tokens = tokens
c=0
while (c<len(u_tokens)):
if u_tokens[c] == sep:
del u_tokens[c]
else:
c+=1
debug_print("common_strings tokens:")
debug_print(tokens)
if len(u_tokens) < CS_THRESHOLD:
#print("less than threshold")
return ("",0)
f = nltk.FreqDist(u_tokens)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
#print "Tokens:"
#print tokens
#print len(tokens)
bgs = list(nltk.bigrams(tokens))
c=0
while (c<len(bgs)):
if sep in bgs[c]:
del bgs[c]
else:
c+=1
debug_print("Bigrams:")
debug_print(bgs)
if (len(bgs) != 0):
fs = nltk.FreqDist(bgs)
b_gram = fs.most_common(1)[0][0]
#print "Most Common:"
#print b_gram
b_str = b_gram[0] + "_" + b_gram[1]
b_gram_score = fs.most_common(1)[0][1]
else:
b_str =""
b_gram_score = 0
tgs = list(nltk.trigrams(tokens))
c=0
while (c<len(tgs)):
if sep in tgs[c]:
del tgs[c]
else:
c+=1
debug_print("Trigrams:")
debug_print(tgs)
if (len(tgs) != 0):
ft = nltk.FreqDist(tgs)
t_gram = ft.most_common(1)[0][0]
t_str = t_gram[0] + "_" + t_gram[1] + "_" + t_gram[2]
t_gram_score = ft.most_common(1)[0][1]
else:
t_str = ""
t_gram_score = 0
debug_print("1: %s - %d 2: %s - %d 3: %s - %d\n" % (u_gram,u_gram_score,b_str,b_gram_score,t_str,t_gram_score))
if (b_gram_score > 1) and (b_gram_score * 2 >= u_gram_score):
if (t_gram_score > 1) and (t_gram_score * 2 >= b_gram_score):
ret = t_str
ret_s = t_gram_score
else:
ret = b_str
ret_s = b_gram_score
else:
ret = u_gram
ret_s = u_gram_score
return (ret,ret_s)
### End of NLP Section ###
#guess_module_names():
#Use the NLP section (above) to guess the names of modules and add them to the global module list
#Attempts to find common bracket strings (e.g. "[MOD_NAME] Debug print!")
#then source file names (most often left over from calls to assert())
#then common trigram/bigram/unigrams
#You can tweak the switchover thresholds below.
def guess_module_names(t,sep):
#idea - make score threshold based on the size of the module
# (e.g. smaller modules should have a smaller threshold
C_SCORE_THRESHOLD = 4 #we need to see at least <N> occurrences of a string set in order to pick that name
S_SCORE_THRESHOLD = 2 #if we see <N> occurrences of foo.c we'll pick "foo.c"
B_SCORE_THRESHOLD = 2 #if we see <N> occurrences of [foo] we'll pick "foo"
# first look for strings that start with [FOO], (bracket strings)
# then look for strings that contain source files (.c,.cpp,etc.)
# then try common strings
# above thresholds can be tweaked - they represent the number of strings that have to be repeated
# in order to use that string as the module name
(name,scr) = bracket_strings(t,"[","]",sep)
debug_print("bracket name: %s score: %d" %(name, scr))
#if (True):
if (scr < B_SCORE_THRESHOLD):
(name,scr) = source_file_strings(t,sep)
debug_print("source name: %s score: %d" % (name, scr))
#if (True):e
if (scr < S_SCORE_THRESHOLD):
(name,scr) = common_strings(t,sep)
debug_print("common name: %s score: %d" % (name, scr))
if (scr < C_SCORE_THRESHOLD):
#Couldn't come up with a name
name = "unknown"
return name
def main():
#t=""
sep = "tzvlw"
# java side handles adding sep between strings,
# read all in at once (no newlines between strings)
#t = sys.stdin.readline()
t = input()
#print ("text in: %s" % t)
name = guess_module_names(t,sep)
print(name)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,74 @@
# @category CodeCut
# @menupath CodeCut.ModNaming (Run)
# @toolbar codecut.png
# @runtime PyGhidra
# (C) 2022 The Johns Hopkins University Applied Physics Laboratory LLC
# (JHU/APL). All Rights Reserved.
#
# This material may be only be used, modified, or reproduced by or for
# the U.S. Government pursuant to the license rights granted under the
# clauses at DFARS 252.227-7013/7014 or FAR 52.227-14. For any other
# permission, please contact the Office of Technology Transfer at
# JHU/APL.
#
# NO WARRANTY, NO LIABILITY. THIS MATERIAL IS PROVIDED "AS IS." JHU/APL
# MAKES NO REPRESENTATION OR WARRANTY WITH RESPECT TO THE PERFORMANCE OF
# THE MATERIALS, INCLUDING THEIR SAFETY, EFFECTIVENESS, OR COMMERCIAL
# VIABILITY, AND DISCLAIMS ALL WARRANTIES IN THE MATERIAL, WHETHER
# EXPRESS OR IMPLIED, INCLUDING (BUT NOT LIMITED TO) ANY AND ALL IMPLIED
# WARRANTIES OF PERFORMANCE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE, AND NON-INFRINGEMENT OF INTELLECTUAL PROPERTY OR OTHER THIRD
# PARTY RIGHTS. ANY USER OF THE MATERIAL ASSUMES THE ENTIRE RISK AND
# LIABILITY FOR USING THE MATERIAL. IN NO EVENT SHALL JHU/APL BE LIABLE
# TO ANY USER OF THE MATERIAL FOR ANY ACTUAL, INDIRECT, CONSEQUENTIAL,
# SPECIAL OR OTHER DAMAGES ARISING FROM THE USE OF, OR INABILITY TO USE,
# THE MATERIAL, INCLUDING, BUT NOT LIMITED TO, ANY DAMAGES FOR LOST
# PROFITS.
#
# HAVE A NICE DAY.
# This material is based upon work supported by the Defense Advanced Research
# Projects Agency (DARPA) and Naval Information Warfare Center Pacific (NIWC Pacific)
# under Contract Number N66001-20-C-4024.
from dependency_bootstrap import DependencyManager
# list the packages you need
# dictionary of "import name" : "pip name"
# for when they differ, e.g. "sklearn": "scikit-learn"
deps = DependencyManager(
{"nltk": "nltk"})
# make sure they're installed
if not deps.ensure_or_prompt():
println("[ModNaming] Required Python packages not available, exiting.")
exit(1)
from modnaming import *
import sys
import json
# Pass Ghidra context + args into your package entry point
# run(currentProgram, state, monitor, *args)
def main():
args = list(getScriptArgs())
with open(args[0], "r") as f:
t = f.read()
sep = "tzvlw"
name = guess_module_names(t, sep)
with open(args[1], "w") as f:
f.write(name)
print("Successfully guessed module name: ", name)
if __name__ == "__main__":
main()

View File

@@ -1,3 +1,6 @@
#@category CodeCut
#@runtime PyGhidra
#
## Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
## (JHU/APL). All Rights Reserved.
#

View File

@@ -35,11 +35,11 @@ def decompile_user_functions_in_range(
current_program.getAddressFactory().getAddress(end_address_str)
if start_address is None or end_address is None:
print 'Invalid address range specified.'
print('Invalid address range specified.')
return
if start_address >= end_address:
print 'Invalid address range: start address should be less than end address.'
print('Invalid address range: start address should be less than end address.')
return
decompiler = DecompInterface()

View File

@@ -0,0 +1,100 @@
from __future__ import annotations
import sys, os, io, importlib, subprocess
from typing import Dict, List, Tuple
# list the packages you need
# dictionary of "import name" : "pip name"
# for when they differ, e.g. "sklearn": "scikit-learn"
class DependencyManager:
"""
Minimal dependency manager for Ghidra Python (PyGhidra/CPython).
- Takes a dict {import_name: pip_name}.
- Prompts the user to install missing ones via a Swing/Ghidra popup.
- Reloads site/import caches so new installs are importable immediately.
"""
def __init__(self, packages: Dict[str, str], *, title: str = "Missing Python Packages"):
self.packages = packages
self.title = title
# -------- public API --------
def ensure_or_prompt(self) -> bool:
_, missing = self._try_imports(list(self.packages.keys()))
if not missing:
return True
if not self._ask_to_install(missing):
return False
pip_names = [self.packages[name] for name in missing]
if not self._pip_install(pip_names):
return False
self._reload_paths()
_, still = self._try_imports(missing)
if still:
print("[deps] Still missing after install:", still)
return False
return True
# -------- internals --------
def _try_imports(self, names: List[str]) -> Tuple[List[str], List[str]]:
ok, missing = [], []
for n in names:
try:
importlib.import_module(n)
ok.append(n)
except Exception:
missing.append(n)
return ok, missing
def _ask_to_install(self, missing: List[str]) -> bool:
# Prefer Ghidra OptionDialog (GUI-safe)
try:
from docking.widgets import OptionDialog
lines = ["The following Python packages are required and missing:\n"]
lines += [f" • import '{name}' (pip install {self.packages[name]})" for name in missing]
lines += ["", "Install them now with pip?"]
msg = "\n".join(lines)
return OptionDialog.showYesNoDialog(None, self.title, msg) == OptionDialog.YES_OPTION
except Exception:
# Headless fallback is unlikely in-tool, but just in case:
print(f"{self.title}: will install {', '.join(self.packages[n] for n in missing)}")
return True
def _pip_install(self, pip_names: List[str]) -> bool:
args = ["install", "--upgrade", "--no-input"] + pip_names
print(f"[deps] pip {' '.join(args)}")
# Suppress pips version check and ensure no interactive prompts
env = dict(os.environ)
env.setdefault("PIP_DISABLE_PIP_VERSION_CHECK", "1")
env.setdefault("PYTHONWARNINGS", "ignore") # optional: quiet noisy warnings
# pip 20+: use cli.main
from pip._internal.cli.main import main as pip_main # type: ignore
try:
code = pip_main(args)
except SystemExit as e: # pip may call sys.exit()
code = int(e.code) if e.code is not None else 0
if int(code) == 0:
return True
print(f"[deps] pip (in-process) failed with code {code}")
def _reload_paths(self) -> None:
importlib.invalidate_caches()
try:
import site
importlib.reload(site) # process site-packages & .pth files
except Exception:
pass
try:
import pkg_resources # type: ignore
pkg_resources.working_set.__init__() # rebuild dist cache
except Exception:
pass

View File

@@ -1,3 +1,4 @@
# @category CodeCut
from ghidra.program.model.listing import Function
from ghidra.program.model.symbol import SourceType
@@ -39,6 +40,7 @@ def get_referenced_function_signatures_base(function, monitor):
return signatures
def getFunctionReferences(function, monitor):
refs = set()
instructions = \
@@ -47,9 +49,10 @@ def getFunctionReferences(function, monitor):
for instr in instructions:
flowType = instr.getFlowType()
if flowType.isCall():
target = instr.getOperandReferences(0)[0].getToAddress()
func = \
function.getProgram().getFunctionManager().getFunctionAt(target)
oprefs = instr.getOperandReferences(0)
if not oprefs: continue
target = oprefs[0].getToAddress()
func = function.getProgram().getFunctionManager().getFunctionAt(target)
if func is not None:
refs.add(func)
return refs

View File

@@ -17,7 +17,7 @@ def get_global_variables(program, start_addr, end_addr):
#set.addRange(start_addr, end_addr)
print(start_address, end_address)
print(addrset)
#for symbol in symbol_table.getAllSymbols(False):
for symbol in symbol_table.getSymbols(addrset,SymbolType.LABEL,True):
print(symbol)
@@ -27,30 +27,30 @@ def get_global_variables(program, start_addr, end_addr):
if (program.getListing().getDataAt(symbol.getAddress())):
global_vars.append(symbol)
'''
'''
def is_user_defined(var):
var_name = var.getName()
var_addr = var.getAddress()
if var_name.startswith('__') or var_name.startswith('_'):
return False
if var_name.startswith('imp_') or var_name.startswith('thunk_'):
return False
if var_name.startswith('fde_') or var_name.startswith('cie_'):
return False
if var_name.startswith('completed.0') \
or var_name.startswith('data_start'):
return False
if var_addr.toString().startswith('EXTERNAL:'):
return False
section_name = program.getMemory().getBlock(var_addr).getName()
#if section_name not in ['.data', '.bss']:
# return False
return True
'''

View File

@@ -0,0 +1,351 @@
##############################################################################################
# Copyright 2022 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# HAVE A NICE DAY.
#
# This material is based upon work supported by the Defense Advanced Research
# Projects Agency (DARPA) and Naval Information Warfare Center Pacific (NIWC Pacific)
# under Contract Number N66001-20-C-4024.
#
import sys
print(sys.executable)
import sys
import math
import nltk
import nltk.collocations
import re
#uncomment "print" to get debug prints
def debug_print(x):
#print(x)
return
### NLP Section ###
# This section of code attempts to name the modules based on common strings in the string references
# Not really based on any sound science or anything - your mileage may heavily vary. :-D
#string_range_tokenize(t):
#Take a long string and convert it into a list of tokens. If using a separator, this will appear in the token list
def string_range_tokenize(t):
#print "string_range_tokenize: raw text:"
#print t
#remove printf/sprintf format strings
#tc = re.sub("%[0-9A-Za-z]+"," ",t)
#convert dash to underscore
#tc = re.sub("-","_",tc)
#replace _ and / with space - may want to turn this off sometimes
#this will break up snake case and paths
#problem is that if you have a path that is used throughout the binary it will probably dominate results
#tc = re.sub("_"," ",tc)
#replace / and \\ with a space
#tc = re.sub("[/\\\\]"," ",tc)
#remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _
#tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc)
#lowercase it - and store this as the original set of tokens to work with
tokens = [tk.lower() for tk in t.split()]
#remove English stop words
#this is the list from the MIT *bow project
eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"}
#remove "code" stop words
#e.g. common words in debugging strings
code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"}
#remove code stop words (from Joxean Koret's "IDAMagicStrings")
jk_sw = {"copyright", "char", "bool", "int", "unsigned", "long",
"double", "float", "signed", "license", "version", "cannot", "error",
"invalid", "null", "warning", "general", "argument", "written", "report",
"failed", "assert", "object", "integer", "unknown", "localhost", "native",
"memory", "system", "write", "read", "open", "close", "help", "exit", "test",
"return", "libs", "home", "ambiguous", "internal", "request", "inserting",
"deleting", "removing", "updating", "adding", "assertion", "flags",
"overflow", "enabled", "disabled", "enable", "disable", "virtual", "client",
"server", "switch", "while", "offset", "abort", "panic", "static", "updated",
"pointer", "reason", "month", "year", "week", "hour", "minute", "second",
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
'september', 'october', 'november', 'december', "arguments", "corrupt",
"corrupted", "default", "success", "expecting", "missing", "phrase",
"unrecognized", "undefined"}
stopw = eng_stopw.union(code_sw)
stopw = stopw.union(jk_sw)
c = 0
tokens_f = []
for t in tokens:
if t not in stopw:
tokens_f.append(t)
return tokens_f
#bracket_strings(t,b_brack,e_brack):
#Return the most common string in the text that begins with b_brack and ends with e_brack
# The count of how many times this string appeared is also returned
#I find somewhat often people format debug strings like "[MOD_NAME] Function X did Y!"
#This function is called by guess_module_names() - if you see this format with different brackets
#you can edit that call
def bracket_strings(t, b_brack,e_brack, sep):
#sep = "tzvlw"
#t = basicutils.CompileTextFromRange(start,end,sep)
tokens = [tk.lower() for tk in t.split(sep)]
#don't want to use tokenize here because it removes brackets
b=[]
for tk in tokens:
tk = tk.strip()
if tk.startswith(b_brack):
b_contents = tk[1:tk.find(e_brack)] if e_brack in tk else tk[1:]
#print("found bracket string, content: %s" % b_contents)
#Hack to get rid of [-],[+],[*] - could also try to remove non alpha
if (len(b_contents) > 3):
#Hack for debug prints that started with [0x%x]
if (b_contents != "0x%x"):
b.append(b_contents)
debug_print("bracket_strings tokens:")
debug_print(tokens)
debug_print(b)
u_gram=""
u_gram_score=0
if (len(b) > 0):
f = nltk.FreqDist(b)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
return (u_gram,u_gram_score)
#is_source_file_str(f):
#return True if the file string ends with one of the source file extensions
#This uses structure borrowed from Joxean Koret's IDAMagicStrings
LANGS = {}
LANGS["C/C++"] = ["c", "cc", "cxx", "cpp", "h", "hpp"]
LANGS["C"] = ["c"]
LANGS["C++"] = ["cc", "cxx", "cpp", "hpp", "c++"]
LANGS["Obj-C"] = ["m"]
LANGS["Rust"] = ["rs"]
LANGS["Golang"] = ["go"]
LANGS["OCaml"] = ["ml"]
def is_source_file_str(f):
for key in LANGS:
for ext in LANGS[key]:
if f.endswith("." + ext):
return True
return False
#source_file_strings(start,end):
#Return the most common string that looks like a source file name in the given text string
# The count of how many times this string appeared is also returned
def source_file_strings(t, sep):
#sep = "tzvlw"
#t = basicutils.CompileTextFromRange(start,end,sep)
#normally would do lower here to normalize but we lose camel case that way
tokens = [tk for tk in t.split(sep)]
#for each string, remove quotes and commas, then tokenize based on spaces to generate the final list
tokens2=[]
for tk in tokens:
tk = tk.strip()
#strip punctuation, need to leave in _ for filenames and / and \ for paths
tk = re.sub("[\"\',]"," ",tk)
for tk2 in tk.split(" "):
tokens2.append(tk2)
debug_print("source_file_strings tokens2:")
debug_print(tokens2)
b=[]
for tk in tokens2:
tk = tk.strip()
if is_source_file_str(tk):
#If there's a dir path, only use the end filename
#This could be tweaked if the directory structure is part of the software architecture
#e.g. if there are multiple source directories with meaningful names
if tk.rfind("/") != -1:
ntk = tk[tk.rfind("/")+1:]
elif tk.rfind("\\") != -1:
ntk = tk[tk.rfind("\\")+1:]
else:
ntk = tk
b.append(ntk)
debug_print("source_file_strings tokens:")
debug_print(tokens)
debug_print(b)
#a better way to do this (if there are multiple)
#would be to sort, uniquify, and then make the name foo.c_and_bar.c
u_gram=""
u_gram_score=0
if (len(b) > 0):
f = nltk.FreqDist(b)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
return (u_gram,u_gram_score)
#common_strings(t, sep):
#Return a list of the common strings in the string "t" - lines separated by "sep"
#Uses NLTK to generate a list of unigrams, bigrams, and trigrams (1 word, 2 word phrase, 3 word phrase)
#If the trigram score > 1/2 * bigram score, the most common trigram is used
#If the bigram score > 1/2 * unigram score, the most common bigram is used
#Otherwise the most common unigram (single word is used)
def common_strings(t,sep):
CS_THRESHOLD = 6
tokens = string_range_tokenize(t)
#make a copy since we're going to edit it
u_tokens = tokens
c=0
while (c<len(u_tokens)):
if u_tokens[c] == sep:
del u_tokens[c]
else:
c+=1
debug_print("common_strings tokens:")
debug_print(tokens)
if len(u_tokens) < CS_THRESHOLD:
#print("less than threshold")
return ("",0)
f = nltk.FreqDist(u_tokens)
u_gram = f.most_common(1)[0][0]
u_gram_score = f.most_common(1)[0][1]
#print "Tokens:"
#print tokens
#print len(tokens)
bgs = list(nltk.bigrams(tokens))
c=0
while (c<len(bgs)):
if sep in bgs[c]:
del bgs[c]
else:
c+=1
debug_print("Bigrams:")
debug_print(bgs)
if (len(bgs) != 0):
fs = nltk.FreqDist(bgs)
b_gram = fs.most_common(1)[0][0]
#print "Most Common:"
#print b_gram
b_str = b_gram[0] + "_" + b_gram[1]
b_gram_score = fs.most_common(1)[0][1]
else:
b_str =""
b_gram_score = 0
tgs = list(nltk.trigrams(tokens))
c=0
while (c<len(tgs)):
if sep in tgs[c]:
del tgs[c]
else:
c+=1
debug_print("Trigrams:")
debug_print(tgs)
if (len(tgs) != 0):
ft = nltk.FreqDist(tgs)
t_gram = ft.most_common(1)[0][0]
t_str = t_gram[0] + "_" + t_gram[1] + "_" + t_gram[2]
t_gram_score = ft.most_common(1)[0][1]
else:
t_str = ""
t_gram_score = 0
debug_print("1: %s - %d 2: %s - %d 3: %s - %d\n" % (u_gram,u_gram_score,b_str,b_gram_score,t_str,t_gram_score))
if (b_gram_score > 1) and (b_gram_score * 2 >= u_gram_score):
if (t_gram_score > 1) and (t_gram_score * 2 >= b_gram_score):
ret = t_str
ret_s = t_gram_score
else:
ret = b_str
ret_s = b_gram_score
else:
ret = u_gram
ret_s = u_gram_score
return (ret,ret_s)
### End of NLP Section ###
#guess_module_names():
#Use the NLP section (above) to guess the names of modules and add them to the global module list
#Attempts to find common bracket strings (e.g. "[MOD_NAME] Debug print!")
#then source file names (most often left over from calls to assert())
#then common trigram/bigram/unigrams
#You can tweak the switchover thresholds below.
def guess_module_names(t,sep):
#idea - make score threshold based on the size of the module
# (e.g. smaller modules should have a smaller threshold
C_SCORE_THRESHOLD = 4 #we need to see at least <N> occurrences of a string set in order to pick that name
S_SCORE_THRESHOLD = 2 #if we see <N> occurrences of foo.c we'll pick "foo.c"
B_SCORE_THRESHOLD = 2 #if we see <N> occurrences of [foo] we'll pick "foo"
# first look for strings that start with [FOO], (bracket strings)
# then look for strings that contain source files (.c,.cpp,etc.)
# then try common strings
# above thresholds can be tweaked - they represent the number of strings that have to be repeated
# in order to use that string as the module name
(name,scr) = bracket_strings(t,"[","]",sep)
debug_print("bracket name: %s score: %d" %(name, scr))
#if (True):
if (scr < B_SCORE_THRESHOLD):
(name,scr) = source_file_strings(t,sep)
debug_print("source name: %s score: %d" % (name, scr))
#if (True):e
if (scr < S_SCORE_THRESHOLD):
(name,scr) = common_strings(t,sep)
debug_print("common name: %s score: %d" % (name, scr))
if (scr < C_SCORE_THRESHOLD):
#Couldn't come up with a name
name = "unknown"
return name
def main():
#t=""
sep = "tzvlw"
# java side handles adding sep between strings,
# read all in at once (no newlines between strings)
#t = sys.stdin.readline()
t = input()
#print ("text in: %s" % t)
name = guess_module_names(t,sep)
print(name)
if __name__ == "__main__":
main()

View File

@@ -1,122 +0,0 @@
Changelog
=========
+ Version 0.28 (2022.02.03)
- Added a method for returning the index of a section by name (#331)
- Allow filtering by section types in iter_sections (#345)
- Support Android compressed rel/rela sections (#357)
- Initial support for PPC64LE (#360)
- Initial DWARF v5 support (#363 with several follow-ups)
- Fixed parsing for structures containing uids or gids in core
dumps (#354)
- Allow filtering by segment types in iter_segments (#375)
- Add support for .note.gnu.property (#386)
- Update readelf tests to work with more recent version of
readelf (#387)
- Add support for note GNU_PROPERTY_X86_FEATURE_1_AND (#388)
+ Version 0.27 (2020.10.27)
- Print addend wfor RELA relocations without symbol (#292)
- Implement symbol lookup for {GNU,}HashSection (#290)
- Major rewrite of expression parsing
- Cashed random access to CUs and DIEs (#264)
- GNU expressions (#303)
- Support parsing LSDA pointers from FDEs (#308)
- Add support for DWA_OP_GNU_push_tls_address in expressions (#315)
- Some initial support for AArch64 little-endian (#318)
- Support for ELF files with a large number of sections (#333)
- Some minimal support for DWARFv1 (#335)
- Many small bug fixes; see git log.
+ Version 0.26 (2019.12.05)
- Call relocation for ARM v3 (#194)
- More complete architecture coverage for ENUM_E_MACHINE (#206)
- Support for .debug_pubtypes and .debug_pubnames sections (#208)
- Support for DWARF v4 location lists (#214)
- Decode strings in dynamic string tables (#217)
- Improve symbol table handling in dynamic segments (#219)
- Improved handling of location information (#225)
- Avoid deprecation warnings in Python 3.7+
- Add DWARF v5 OPs (#240)
- Handle many new translation forms and constants
- Lazy DIE parsing to speed up partial parsing of DWARF info (#249)
+ Version 0.25 (2018.09.01)
- Make parsing of SH_TYPE and PT_TYPE fields dependent on the machine
(e_machine header field), making it possible to support conflicting type
enums between different machines (#71 and #121).
- Add parsing and readelf dumping for .eh_frame (#155)
- Support compressed sections (#152)
- Better support for parsing core dumps (#147)
- More comprehensive handling of ARM relocations (#121)
- Convert all ascii encoding to utf-8 encoding (#182)
- Don't attempt to hex/string dump SHT_NOBITS sections in readelf (#119).
- Test with Python 3.6
- Minor bugfixes (#118)
- Cleanup: Use argparse instead of optparse
- Make readelf comparison tests run in parallel using multiprocessing; cuts
testing time 3-5x
- Improvements in MIPS flags handling (#165)
+ Version 0.24 (2016.08.04)
- Retrieve symbols by name - get_symbol_by_name (#58).
- Symbol/section names are strings internally now, not bytestrings (this may
affect API usage in Python 3) (#76).
- Added DT_MIPS_* constants to ENUM_D_TAG (#79)
- Made dwarf_decode_address example a bit more useful for command-line
invocation.
- More DWARF v4 support w.r.t decoding function ranges; DW_AT_high_pc value
is now either absolute or relative to DW_AT_low_pc, depending on the class
of the form encoded in the file. Also #89.
- Support for SHT_NOTE sections (#109)
- Support for .debug_aranges section (#108)
- Support for zlib-compressed debug sections (#102)
- Support for DWARF v4 line programs (#82)
+ Version 0.23 (2014.11.08)
- Minimal Python 2.x version raised to 2.7
- Basic support for MIPS (contributed by Karl Vogel).
- Support for PT_NOTE segment parsing (contributed by Alex Deymo).
- Support for parsing symbol table in dynamic segment
(contributed by Nam T. Nguyen).
+ Version 0.22 (2014.03.30)
- pyelftools repository moved to https://github.com/eliben/pyelftools
- Support for version sections - contributed by Yann Rouillard.
- Better ARM support (including AArch64) - contributed by Dobromir Stefanov.
- Added some initial support for parsing Solaris OpenCSW ELF files
(contributed by Yann Rouillard).
- Added some initial support for DWARF4 (as generated by gcc 4.8)
and DWARF generated by recent versions of Clang (3.3).
- Added the get_full_path utility method to DIEs that have an associated
file name / path (based on pull request #16 by Shaheed Haque).
- Set up Travis CI integration.
+ Version 0.21 (2013.04.17)
- Added new example: dwarf_decode_address - decode function name and
file & line information from an address.
- Issue #7: parsing incorrect DWARF was made a bit more forgiving for cases
where serialized DIE trees have extra NULLs at the end.
- Very initial support for ARM ELF files (Matthew Fernandez - pull
request #6).
- Support for dumping the dynamic section (Mike Frysinger - pull
request #7).
- Output of scripts/readelf.py now matches that of binutils 2.23.52.
- Added more machine EM_ values to ENUM_E_TYPE.
+ Version 0.20 (2012.01.27)
- Python 3 support
- Fixed some problems with running tests
- Issue #2: made all examples run (and test/run_examples_test.py pass)
on Windows.
+ Version 0.10 - Initial public release (2012.01.06)

View File

@@ -1,32 +0,0 @@
pyelftools is in the public domain (see below if you need more details).
pyelftools uses the construct library for structured parsing of a binary
stream. construct is packaged in pyelftools/construct - see its LICENSE
file for the license.
-------------------------------------------------------------------------------
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

View File

@@ -1,8 +0,0 @@
recursive-include elftools *.py
recursive-include scripts *.py
recursive-include examples *.py *.elf *.out
recursive-include test *.py *.elf *.arm *.mips *.o
include README.rst
include LICENSE
include CHANGES
include tox.ini

View File

@@ -1,55 +0,0 @@
==========
pyelftools
==========
.. image:: https://github.com/eliben/pyelftools/workflows/pyelftools-tests/badge.svg
:align: center
:target: https://github.com/eliben/pyelftools/actions
**pyelftools** is a pure-Python library for parsing and analyzing ELF files
and DWARF debugging information. See the
`User's guide <https://github.com/eliben/pyelftools/wiki/User's-guide>`_
for more details.
Pre-requisites
--------------
As a user of **pyelftools**, one only needs Python to run. It works with
Python versions 2.7 and 3.x (x >= 5). For hacking on **pyelftools** the
requirements are a bit more strict, please see the
`hacking guide <https://github.com/eliben/pyelftools/wiki/Hacking-guide>`_.
Installing
----------
**pyelftools** can be installed from PyPI (Python package index)::
> pip install pyelftools
Alternatively, you can download the source distribution for the most recent and
historic versions from the *Downloads* tab on the `pyelftools project page
<https://github.com/eliben/pyelftools>`_ (by going to *Tags*). Then, you can
install from source, as usual::
> python setup.py install
Since **pyelftools** is a work in progress, it's recommended to have the most
recent version of the code. This can be done by downloading the `master zip
file <https://github.com/eliben/pyelftools/archive/master.zip>`_ or just
cloning the Git repository.
Since **pyelftools** has no external dependencies, it's also easy to use it
without installing, by locally adjusting ``PYTHONPATH``.
How to use it?
--------------
**pyelftools** is a regular Python library: you import and invoke it from your
own code. For a detailed usage guide and links to examples, please consult the
`user's guide <https://github.com/eliben/pyelftools/wiki/User's-guide>`_.
License
-------
**pyelftools** is open source software. Its code is in the public domain. See
the ``LICENSE`` file for more details.

View File

@@ -1,36 +0,0 @@
New version
-----------
* Update elftools/__init__.py
* Update setup.py
* Update CHANGES
* Tag in git (v0.xx)
construct
---------
construct seems to be maintained again - they also backported my Python 3 fixes.
Theoretically, I can remove construct from pyelftools and use it as a dependency
instead. I don't really have time to play with this now, but may do so in the
future.
Preparing a new release
-----------------------
* Run 'tox' tests (with '-r' to create new venvs)
* Make sure new version was updated everywhere appropriate
* Run ``python setup.py build sdist bdist_wheel`` (no 'upload' yet)
* Untar the created ``dist/pyelftools-x.y.tar.gz`` and make sure
everything looks ok
* Now build with upload to send it to PyPi
* Tag new version in git
* Test with pip install from some new virtualenv
Distribution
------------
1. First install Twine (https://packaging.python.org/tutorials/packaging-projects/)
2. python3 -m twine upload dist/*, but make sure ``setup.py`` was already run
and the updated whl and tarbal are in dist/.
Credentials for PyPI are stored in ~/.pypirc

View File

@@ -1,7 +0,0 @@
#-------------------------------------------------------------------------------
# elftools
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
__version__ = '0.28'

View File

@@ -1,91 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: common/construct_utils.py
#
# Some complementary construct utilities
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..construct import (
Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil,
Rename, SizeofError
)
class RepeatUntilExcluding(Subconstruct):
""" A version of construct's RepeatUntil that doesn't include the last
element (which casued the repeat to exit) in the return value.
Only parsing is currently implemented.
P.S. removed some code duplication
"""
__slots__ = ["predicate"]
def __init__(self, predicate, subcon):
Subconstruct.__init__(self, subcon)
self.predicate = predicate
self._clear_flag(self.FLAG_COPY_CONTEXT)
self._set_flag(self.FLAG_DYNAMIC)
def _parse(self, stream, context):
obj = []
try:
context_for_subcon = context
if self.subcon.conflags & self.FLAG_COPY_CONTEXT:
context_for_subcon = context.__copy__()
while True:
subobj = self.subcon._parse(stream, context_for_subcon)
if self.predicate(subobj, context):
break
obj.append(subobj)
except ConstructError as ex:
raise ArrayError("missing terminator", ex)
return obj
def _build(self, obj, stream, context):
raise NotImplementedError('no building')
def _sizeof(self, context):
raise SizeofError("can't calculate size")
def _LEB128_reader():
""" Read LEB128 variable-length data from the stream. The data is terminated
by a byte with 0 in its highest bit.
"""
return RepeatUntil(
lambda obj, ctx: ord(obj) < 0x80,
Field(None, 1))
class _ULEB128Adapter(Adapter):
""" An adapter for ULEB128, given a sequence of bytes in a sub-construct.
"""
def _decode(self, obj, context):
value = 0
for b in reversed(obj):
value = (value << 7) + (ord(b) & 0x7F)
return value
class _SLEB128Adapter(Adapter):
""" An adapter for SLEB128, given a sequence of bytes in a sub-construct.
"""
def _decode(self, obj, context):
value = 0
for b in reversed(obj):
value = (value << 7) + (ord(b) & 0x7F)
if ord(obj[-1]) & 0x40:
# negative -> sign extend
value |= - (1 << (7 * len(obj)))
return value
def ULEB128(name):
""" A construct creator for ULEB128 encoding.
"""
return Rename(name, _ULEB128Adapter(_LEB128_reader()))
def SLEB128(name):
""" A construct creator for SLEB128 encoding.
"""
return Rename(name, _SLEB128Adapter(_LEB128_reader()))

View File

@@ -1,22 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: common/exceptions.py
#
# Exception classes for elftools
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
class ELFError(Exception):
pass
class ELFRelocationError(ELFError):
pass
class ELFParseError(ELFError):
pass
class ELFCompressionError(ELFError):
pass
class DWARFError(Exception):
pass

View File

@@ -1,82 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: common/py3compat.py
#
# Python 2/3 compatibility code
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import sys
PY3 = sys.version_info[0] == 3
if PY3:
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
# Functions for acting on bytestrings and strings. In Python 2 and 3,
# strings and bytes are the same and chr/ord can be used to convert between
# numeric byte values and their string pepresentations. In Python 3, bytes
# and strings are different types and bytes hold numeric values when
# iterated over.
def bytes2hex(b, sep=''):
if not sep:
return b.hex()
return sep.join(map('{:02x}'.format, b))
def bytes2str(b): return b.decode('latin-1')
def str2bytes(s): return s.encode('latin-1')
def int2byte(i): return bytes((i,))
def byte2int(b): return b
def iterbytes(b):
"""Return an iterator over the elements of a bytes object.
For example, for b'abc' yields b'a', b'b' and then b'c'.
"""
for i in range(len(b)):
yield b[i:i+1]
ifilter = filter
maxint = sys.maxsize
else:
import cStringIO
StringIO = BytesIO = cStringIO.StringIO
def bytes2hex(b, sep=''):
res = b.encode('hex')
if not sep:
return res
return sep.join(res[i:i+2] for i in range(0, len(res), 2))
def bytes2str(b): return b
def str2bytes(s): return s
int2byte = chr
byte2int = ord
def iterbytes(b):
return iter(b)
from itertools import ifilter
maxint = sys.maxint
def iterkeys(d):
"""Return an iterator over the keys of a dictionary."""
return getattr(d, 'keys' if PY3 else 'iterkeys')()
def itervalues(d):
"""Return an iterator over the values of a dictionary."""
return getattr(d, 'values' if PY3 else 'itervalues')()
def iteritems(d):
"""Return an iterator over the items of a dictionary."""
return getattr(d, 'items' if PY3 else 'iteritems')()
try:
from collections.abc import Mapping # python >= 3.3
except ImportError:
from collections import Mapping # python < 3.3

View File

@@ -1,114 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: common/utils.py
#
# Miscellaneous utilities for elftools
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from contextlib import contextmanager
from .exceptions import ELFParseError, ELFError, DWARFError
from .py3compat import int2byte
from ..construct import ConstructError, ULInt8
def merge_dicts(*dicts):
"Given any number of dicts, merges them into a new one."""
result = {}
for d in dicts:
result.update(d)
return result
def bytelist2string(bytelist):
""" Convert a list of byte values (e.g. [0x10 0x20 0x00]) to a bytes object
(e.g. b'\x10\x20\x00').
"""
return b''.join(int2byte(b) for b in bytelist)
def struct_parse(struct, stream, stream_pos=None):
""" Convenience function for using the given struct to parse a stream.
If stream_pos is provided, the stream is seeked to this position before
the parsing is done. Otherwise, the current position of the stream is
used.
Wraps the error thrown by construct with ELFParseError.
"""
try:
if stream_pos is not None:
stream.seek(stream_pos)
return struct.parse_stream(stream)
except ConstructError as e:
raise ELFParseError(str(e))
def parse_cstring_from_stream(stream, stream_pos=None):
""" Parse a C-string from the given stream. The string is returned without
the terminating \x00 byte. If the terminating byte wasn't found, None
is returned (the stream is exhausted).
If stream_pos is provided, the stream is seeked to this position before
the parsing is done. Otherwise, the current position of the stream is
used.
Note: a bytes object is returned here, because this is what's read from
the binary file.
"""
if stream_pos is not None:
stream.seek(stream_pos)
CHUNKSIZE = 64
chunks = []
found = False
while True:
chunk = stream.read(CHUNKSIZE)
end_index = chunk.find(b'\x00')
if end_index >= 0:
chunks.append(chunk[:end_index])
found = True
break
else:
chunks.append(chunk)
if len(chunk) < CHUNKSIZE:
break
return b''.join(chunks) if found else None
def elf_assert(cond, msg=''):
""" Assert that cond is True, otherwise raise ELFError(msg)
"""
_assert_with_exception(cond, msg, ELFError)
def dwarf_assert(cond, msg=''):
""" Assert that cond is True, otherwise raise DWARFError(msg)
"""
_assert_with_exception(cond, msg, DWARFError)
@contextmanager
def preserve_stream_pos(stream):
""" Usage:
# stream has some position FOO (return value of stream.tell())
with preserve_stream_pos(stream):
# do stuff that manipulates the stream
# stream still has position FOO
"""
saved_pos = stream.tell()
yield
stream.seek(saved_pos)
def roundup(num, bits):
""" Round up a number to nearest multiple of 2^bits. The result is a number
where the least significant bits passed in bits are 0.
"""
return (num - 1 | (1 << bits) - 1) + 1
def read_blob(stream, length):
"""Read length bytes from stream, return a list of ints
"""
return [struct_parse(ULInt8(''), stream) for i in range(length)]
#------------------------- PRIVATE -------------------------
def _assert_with_exception(cond, msg, exception_type):
if not cond:
raise exception_type(msg)

View File

@@ -1,19 +0,0 @@
Copyright (C) 2009 Tomer Filiba, 2010-2011 Corbin Simpson
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,13 +0,0 @@
construct is a Python library for declarative parsing and building of binary
data. This is my fork of construct 2, with some modifications for Python 3
and bug fixes. The construct website is http://construct.readthedocs.org
pyelftools carries construct around because construct has been abandoned for
a long time and didn't get bugfixes; it also didn't work with Python 3.
These days (Feb 2018) construct is maintained again, but its APIs have
underwent extensive changes that would require rewriting all of the
construct-facing code in pyelftools. I'm still evaluating the pros/cons of
this effort. See https://github.com/eliben/pyelftools/issues/180 for details.
LICENSE is the original license.

View File

@@ -1,110 +0,0 @@
"""
#### ####
## #### ## ## #### ###### ##### ## ## #### ###### ## ##
## ## ## ### ## ## ## ## ## ## ## ## ## #### ##
## ## ## ###### ### ## ##### ## ## ## ## ##
## ## ## ## ### ## ## ## ## ## ## ## ## ##
#### #### ## ## #### ## ## ## ##### #### ## ######
Parsing made even more fun (and faster too)
Homepage:
http://construct.wikispaces.com (including online tutorial)
Typical usage:
>>> from construct import *
Hands-on example:
>>> from construct import *
>>> s = Struct("foo",
... UBInt8("a"),
... UBInt16("b"),
... )
>>> s.parse("\\x01\\x02\\x03")
Container(a = 1, b = 515)
>>> print s.parse("\\x01\\x02\\x03")
Container:
a = 1
b = 515
>>> s.build(Container(a = 1, b = 0x0203))
"\\x01\\x02\\x03"
"""
from .core import *
from .adapters import *
from .macros import *
from .debug import Probe, Debugger
#===============================================================================
# Metadata
#===============================================================================
__author__ = "tomer filiba (tomerfiliba [at] gmail.com)"
__maintainer__ = "Corbin Simpson <MostAwesomeDude@gmail.com>"
__version__ = "2.06"
#===============================================================================
# Shorthand expressions
#===============================================================================
Bits = BitField
Byte = UBInt8
Bytes = Field
Const = ConstAdapter
Tunnel = TunnelAdapter
Embed = Embedded
#===============================================================================
# Deprecated names
# Next scheduled name cleanout: 2.1
#===============================================================================
import functools, warnings
def deprecated(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
warnings.warn(
"This name is deprecated, use %s instead" % f.__name__,
DeprecationWarning, stacklevel=2)
return f(*args, **kwargs)
return wrapper
MetaBytes = deprecated(MetaField)
GreedyRepeater = deprecated(GreedyRange)
OptionalGreedyRepeater = deprecated(OptionalGreedyRange)
Repeater = deprecated(Range)
StrictRepeater = deprecated(Array)
MetaRepeater = deprecated(Array)
OneOfValidator = deprecated(OneOf)
NoneOfValidator = deprecated(NoneOf)
#===============================================================================
# exposed names
#===============================================================================
__all__ = [
'AdaptationError', 'Adapter', 'Alias', 'Aligned', 'AlignedStruct',
'Anchor', 'Array', 'ArrayError', 'BFloat32', 'BFloat64', 'Bit', 'BitField',
'BitIntegerAdapter', 'BitIntegerError', 'BitStruct', 'Bits', 'Bitwise',
'Buffered', 'Byte', 'Bytes', 'CString', 'CStringAdapter', 'Const',
'ConstAdapter', 'ConstError', 'Construct', 'ConstructError', 'Container',
'Debugger', 'Embed', 'Embedded', 'EmbeddedBitStruct', 'Enum', 'ExprAdapter',
'Field', 'FieldError', 'Flag', 'FlagsAdapter', 'FlagsContainer',
'FlagsEnum', 'FormatField', 'GreedyRange', 'GreedyRepeater',
'HexDumpAdapter', 'If', 'IfThenElse', 'IndexingAdapter', 'LFloat32',
'LFloat64', 'LazyBound', 'LengthValueAdapter', 'ListContainer',
'MappingAdapter', 'MappingError', 'MetaArray', 'MetaBytes', 'MetaField',
'MetaRepeater', 'NFloat32', 'NFloat64', 'Nibble', 'NoneOf',
'NoneOfValidator', 'Octet', 'OnDemand', 'OnDemandPointer', 'OneOf',
'OneOfValidator', 'OpenRange', 'Optional', 'OptionalGreedyRange',
'OptionalGreedyRepeater', 'PaddedStringAdapter', 'Padding',
'PaddingAdapter', 'PaddingError', 'PascalString', 'Pass', 'Peek',
'Pointer', 'PrefixedArray', 'Probe', 'Range', 'RangeError', 'Reconfig',
'Rename', 'RepeatUntil', 'Repeater', 'Restream', 'SBInt16', 'SBInt32',
'SBInt64', 'SBInt8', 'SLInt16', 'SLInt32', 'SLInt64', 'SLInt8', 'SNInt16',
'SNInt32', 'SNInt64', 'SNInt8', 'Select', 'SelectError', 'Sequence',
'SizeofError', 'SlicingAdapter', 'StaticField', 'StrictRepeater', 'String',
'StringAdapter', 'Struct', 'Subconstruct', 'Switch', 'SwitchError',
'SymmetricMapping', 'Terminator', 'TerminatorError', 'Tunnel',
'TunnelAdapter', 'UBInt16', 'UBInt32', 'UBInt64', 'UBInt8', 'ULInt16',
'ULInt32', 'ULInt64', 'ULInt8', 'UNInt16', 'UNInt32', 'UNInt64', 'UNInt8',
'Union', 'ValidationError', 'Validator', 'Value', "Magic",
]

View File

@@ -1,470 +0,0 @@
from .core import Adapter, AdaptationError, Pass
from .lib import int_to_bin, bin_to_int, swap_bytes
from .lib import FlagsContainer, HexString
from .lib.py3compat import BytesIO, decodebytes
#===============================================================================
# exceptions
#===============================================================================
class BitIntegerError(AdaptationError):
__slots__ = []
class MappingError(AdaptationError):
__slots__ = []
class ConstError(AdaptationError):
__slots__ = []
class ValidationError(AdaptationError):
__slots__ = []
class PaddingError(AdaptationError):
__slots__ = []
#===============================================================================
# adapters
#===============================================================================
class BitIntegerAdapter(Adapter):
"""
Adapter for bit-integers (converts bitstrings to integers, and vice versa).
See BitField.
Parameters:
* subcon - the subcon to adapt
* width - the size of the subcon, in bits
* swapped - whether to swap byte order (little endian/big endian).
default is False (big endian)
* signed - whether the value is signed (two's complement). the default
is False (unsigned)
* bytesize - number of bits per byte, used for byte-swapping (if swapped).
default is 8.
"""
__slots__ = ["width", "swapped", "signed", "bytesize"]
def __init__(self, subcon, width, swapped = False, signed = False,
bytesize = 8):
Adapter.__init__(self, subcon)
self.width = width
self.swapped = swapped
self.signed = signed
self.bytesize = bytesize
def _encode(self, obj, context):
if obj < 0 and not self.signed:
raise BitIntegerError("object is negative, but field is not signed",
obj)
obj2 = int_to_bin(obj, width = self.width)
if self.swapped:
obj2 = swap_bytes(obj2, bytesize = self.bytesize)
return obj2
def _decode(self, obj, context):
if self.swapped:
obj = swap_bytes(obj, bytesize = self.bytesize)
return bin_to_int(obj, signed = self.signed)
class MappingAdapter(Adapter):
"""
Adapter that maps objects to other objects.
See SymmetricMapping and Enum.
Parameters:
* subcon - the subcon to map
* decoding - the decoding (parsing) mapping (a dict)
* encoding - the encoding (building) mapping (a dict)
* decdefault - the default return value when the object is not found
in the decoding mapping. if no object is given, an exception is raised.
if `Pass` is used, the unmapped object will be passed as-is
* encdefault - the default return value when the object is not found
in the encoding mapping. if no object is given, an exception is raised.
if `Pass` is used, the unmapped object will be passed as-is
"""
__slots__ = ["encoding", "decoding", "encdefault", "decdefault"]
def __init__(self, subcon, decoding, encoding,
decdefault = NotImplemented, encdefault = NotImplemented):
Adapter.__init__(self, subcon)
self.decoding = decoding
self.encoding = encoding
self.decdefault = decdefault
self.encdefault = encdefault
def _encode(self, obj, context):
try:
return self.encoding[obj]
except (KeyError, TypeError):
if self.encdefault is NotImplemented:
raise MappingError("no encoding mapping for %r [%s]" % (
obj, self.subcon.name))
if self.encdefault is Pass:
return obj
return self.encdefault
def _decode(self, obj, context):
try:
return self.decoding[obj]
except (KeyError, TypeError):
if self.decdefault is NotImplemented:
raise MappingError("no decoding mapping for %r [%s]" % (
obj, self.subcon.name))
if self.decdefault is Pass:
return obj
return self.decdefault
class FlagsAdapter(Adapter):
"""
Adapter for flag fields. Each flag is extracted from the number, resulting
in a FlagsContainer object. Not intended for direct usage.
See FlagsEnum.
Parameters
* subcon - the subcon to extract
* flags - a dictionary mapping flag-names to their value
"""
__slots__ = ["flags"]
def __init__(self, subcon, flags):
Adapter.__init__(self, subcon)
self.flags = flags
def _encode(self, obj, context):
flags = 0
for name, value in self.flags.items():
if getattr(obj, name, False):
flags |= value
return flags
def _decode(self, obj, context):
obj2 = FlagsContainer()
for name, value in self.flags.items():
setattr(obj2, name, bool(obj & value))
return obj2
class StringAdapter(Adapter):
"""
Adapter for strings. Converts a sequence of characters into a python
string, and optionally handles character encoding.
See String.
Parameters:
* subcon - the subcon to convert
* encoding - the character encoding name (e.g., "utf8"), or None to
return raw bytes (usually 8-bit ASCII).
"""
__slots__ = ["encoding"]
def __init__(self, subcon, encoding = None):
Adapter.__init__(self, subcon)
self.encoding = encoding
def _encode(self, obj, context):
if self.encoding:
obj = obj.encode(self.encoding)
return obj
def _decode(self, obj, context):
if self.encoding:
obj = obj.decode(self.encoding)
return obj
class PaddedStringAdapter(Adapter):
r"""
Adapter for padded strings.
See String.
Parameters:
* subcon - the subcon to adapt
* padchar - the padding character. default is b"\x00".
* paddir - the direction where padding is placed ("right", "left", or
"center"). the default is "right".
* trimdir - the direction where trimming will take place ("right" or
"left"). the default is "right". trimming is only meaningful for
building, when the given string is too long.
"""
__slots__ = ["padchar", "paddir", "trimdir"]
def __init__(self, subcon, padchar = b"\x00", paddir = "right",
trimdir = "right"):
if paddir not in ("right", "left", "center"):
raise ValueError("paddir must be 'right', 'left' or 'center'",
paddir)
if trimdir not in ("right", "left"):
raise ValueError("trimdir must be 'right' or 'left'", trimdir)
Adapter.__init__(self, subcon)
self.padchar = padchar
self.paddir = paddir
self.trimdir = trimdir
def _decode(self, obj, context):
if self.paddir == "right":
obj = obj.rstrip(self.padchar)
elif self.paddir == "left":
obj = obj.lstrip(self.padchar)
else:
obj = obj.strip(self.padchar)
return obj
def _encode(self, obj, context):
size = self._sizeof(context)
if self.paddir == "right":
obj = obj.ljust(size, self.padchar)
elif self.paddir == "left":
obj = obj.rjust(size, self.padchar)
else:
obj = obj.center(size, self.padchar)
if len(obj) > size:
if self.trimdir == "right":
obj = obj[:size]
else:
obj = obj[-size:]
return obj
class LengthValueAdapter(Adapter):
"""
Adapter for length-value pairs. It extracts only the value from the
pair, and calculates the length based on the value.
See PrefixedArray and PascalString.
Parameters:
* subcon - the subcon returning a length-value pair
"""
__slots__ = []
def _encode(self, obj, context):
return (len(obj), obj)
def _decode(self, obj, context):
return obj[1]
class CStringAdapter(StringAdapter):
r"""
Adapter for C-style strings (strings terminated by a terminator char).
Parameters:
* subcon - the subcon to convert
* terminators - a sequence of terminator chars. default is b"\x00".
* encoding - the character encoding to use (e.g., "utf8"), or None to
return raw-bytes. the terminator characters are not affected by the
encoding.
"""
__slots__ = ["terminators"]
def __init__(self, subcon, terminators = b"\x00", encoding = None):
StringAdapter.__init__(self, subcon, encoding = encoding)
self.terminators = terminators
def _encode(self, obj, context):
return StringAdapter._encode(self, obj, context) + self.terminators[0:1]
def _decode(self, obj, context):
return StringAdapter._decode(self, b''.join(obj[:-1]), context)
class TunnelAdapter(Adapter):
"""
Adapter for tunneling (as in protocol tunneling). A tunnel is construct
nested upon another (layering). For parsing, the lower layer first parses
the data (note: it must return a string!), then the upper layer is called
to parse that data (bottom-up). For building it works in a top-down manner;
first the upper layer builds the data, then the lower layer takes it and
writes it to the stream.
Parameters:
* subcon - the lower layer subcon
* inner_subcon - the upper layer (tunneled/nested) subcon
Example:
# a pascal string containing compressed data (zlib encoding), so first
# the string is read, decompressed, and finally re-parsed as an array
# of UBInt16
TunnelAdapter(
PascalString("data", encoding = "zlib"),
GreedyRange(UBInt16("elements"))
)
"""
__slots__ = ["inner_subcon"]
def __init__(self, subcon, inner_subcon):
Adapter.__init__(self, subcon)
self.inner_subcon = inner_subcon
def _decode(self, obj, context):
return self.inner_subcon._parse(BytesIO(obj), context)
def _encode(self, obj, context):
stream = BytesIO()
self.inner_subcon._build(obj, stream, context)
return stream.getvalue()
class ExprAdapter(Adapter):
"""
A generic adapter that accepts 'encoder' and 'decoder' as parameters. You
can use ExprAdapter instead of writing a full-blown class when only a
simple expression is needed.
Parameters:
* subcon - the subcon to adapt
* encoder - a function that takes (obj, context) and returns an encoded
version of obj
* decoder - a function that takes (obj, context) and returns a decoded
version of obj
Example:
ExprAdapter(UBInt8("foo"),
encoder = lambda obj, ctx: obj / 4,
decoder = lambda obj, ctx: obj * 4,
)
"""
__slots__ = ["_encode", "_decode"]
def __init__(self, subcon, encoder, decoder):
Adapter.__init__(self, subcon)
self._encode = encoder
self._decode = decoder
class HexDumpAdapter(Adapter):
"""
Adapter for hex-dumping strings. It returns a HexString, which is a string
"""
__slots__ = ["linesize"]
def __init__(self, subcon, linesize = 16):
Adapter.__init__(self, subcon)
self.linesize = linesize
def _encode(self, obj, context):
return obj
def _decode(self, obj, context):
return HexString(obj, linesize = self.linesize)
class ConstAdapter(Adapter):
"""
Adapter for enforcing a constant value ("magic numbers"). When decoding,
the return value is checked; when building, the value is substituted in.
Parameters:
* subcon - the subcon to validate
* value - the expected value
Example:
Const(Field("signature", 2), "MZ")
"""
__slots__ = ["value"]
def __init__(self, subcon, value):
Adapter.__init__(self, subcon)
self.value = value
def _encode(self, obj, context):
if obj is None or obj == self.value:
return self.value
else:
raise ConstError("expected %r, found %r" % (self.value, obj))
def _decode(self, obj, context):
if obj != self.value:
raise ConstError("expected %r, found %r" % (self.value, obj))
return obj
class SlicingAdapter(Adapter):
"""
Adapter for slicing a list (getting a slice from that list)
Parameters:
* subcon - the subcon to slice
* start - start index
* stop - stop index (or None for up-to-end)
* step - step (or None for every element)
"""
__slots__ = ["start", "stop", "step"]
def __init__(self, subcon, start, stop = None):
Adapter.__init__(self, subcon)
self.start = start
self.stop = stop
def _encode(self, obj, context):
if self.start is None:
return obj
return [None] * self.start + obj
def _decode(self, obj, context):
return obj[self.start:self.stop]
class IndexingAdapter(Adapter):
"""
Adapter for indexing a list (getting a single item from that list)
Parameters:
* subcon - the subcon to index
* index - the index of the list to get
"""
__slots__ = ["index"]
def __init__(self, subcon, index):
Adapter.__init__(self, subcon)
if type(index) is not int:
raise TypeError("index must be an integer", type(index))
self.index = index
def _encode(self, obj, context):
return [None] * self.index + [obj]
def _decode(self, obj, context):
return obj[self.index]
class PaddingAdapter(Adapter):
r"""
Adapter for padding.
Parameters:
* subcon - the subcon to pad
* pattern - the padding pattern (character as byte). default is b"\x00"
* strict - whether or not to verify, during parsing, that the given
padding matches the padding pattern. default is False (unstrict)
"""
__slots__ = ["pattern", "strict"]
def __init__(self, subcon, pattern = b"\x00", strict = False):
Adapter.__init__(self, subcon)
self.pattern = pattern
self.strict = strict
def _encode(self, obj, context):
return self._sizeof(context) * self.pattern
def _decode(self, obj, context):
if self.strict:
expected = self._sizeof(context) * self.pattern
if obj != expected:
raise PaddingError("expected %r, found %r" % (expected, obj))
return obj
#===============================================================================
# validators
#===============================================================================
class Validator(Adapter):
"""
Abstract class: validates a condition on the encoded/decoded object.
Override _validate(obj, context) in deriving classes.
Parameters:
* subcon - the subcon to validate
"""
__slots__ = []
def _decode(self, obj, context):
if not self._validate(obj, context):
raise ValidationError("invalid object", obj)
return obj
def _encode(self, obj, context):
return self._decode(obj, context)
def _validate(self, obj, context):
raise NotImplementedError()
class OneOf(Validator):
"""
Validates that the object is one of the listed values.
:param ``Construct`` subcon: object to validate
:param iterable valids: a set of valid values
>>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x05")
5
>>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08")
Traceback (most recent call last):
...
construct.core.ValidationError: ('invalid object', 8)
>>>
>>> OneOf(UBInt8("foo"), [4,5,6,7]).build(5)
'\\x05'
>>> OneOf(UBInt8("foo"), [4,5,6,7]).build(9)
Traceback (most recent call last):
...
construct.core.ValidationError: ('invalid object', 9)
"""
__slots__ = ["valids"]
def __init__(self, subcon, valids):
Validator.__init__(self, subcon)
self.valids = valids
def _validate(self, obj, context):
return obj in self.valids
class NoneOf(Validator):
"""
Validates that the object is none of the listed values.
:param ``Construct`` subcon: object to validate
:param iterable invalids: a set of invalid values
>>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08")
8
>>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x06")
Traceback (most recent call last):
...
construct.core.ValidationError: ('invalid object', 6)
"""
__slots__ = ["invalids"]
def __init__(self, subcon, invalids):
Validator.__init__(self, subcon)
self.invalids = invalids
def _validate(self, obj, context):
return obj not in self.invalids

View File

@@ -1,133 +0,0 @@
"""
Debugging utilities for constructs
"""
from __future__ import print_function
import sys
import traceback
import pdb
import inspect
from .core import Construct, Subconstruct
from .lib import HexString, Container, ListContainer
class Probe(Construct):
"""
A probe: dumps the context, stack frames, and stream content to the screen
to aid the debugging process.
See also Debugger.
Parameters:
* name - the display name
* show_stream - whether or not to show stream contents. default is True.
the stream must be seekable.
* show_context - whether or not to show the context. default is True.
* show_stack - whether or not to show the upper stack frames. default
is True.
* stream_lookahead - the number of bytes to dump when show_stack is set.
default is 100.
Example:
Struct("foo",
UBInt8("a"),
Probe("between a and b"),
UBInt8("b"),
)
"""
__slots__ = [
"printname", "show_stream", "show_context", "show_stack",
"stream_lookahead"
]
counter = 0
def __init__(self, name = None, show_stream = True,
show_context = True, show_stack = True,
stream_lookahead = 100):
Construct.__init__(self, None)
if name is None:
Probe.counter += 1
name = "<unnamed %d>" % (Probe.counter,)
self.printname = name
self.show_stream = show_stream
self.show_context = show_context
self.show_stack = show_stack
self.stream_lookahead = stream_lookahead
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self.printname)
def _parse(self, stream, context):
self.printout(stream, context)
def _build(self, obj, stream, context):
self.printout(stream, context)
def _sizeof(self, context):
return 0
def printout(self, stream, context):
obj = Container()
if self.show_stream:
obj.stream_position = stream.tell()
follows = stream.read(self.stream_lookahead)
if not follows:
obj.following_stream_data = "EOF reached"
else:
stream.seek(-len(follows), 1)
obj.following_stream_data = HexString(follows)
print
if self.show_context:
obj.context = context
if self.show_stack:
obj.stack = ListContainer()
frames = [s[0] for s in inspect.stack()][1:-1]
frames.reverse()
for f in frames:
a = Container()
a.__update__(f.f_locals)
obj.stack.append(a)
print("=" * 80)
print("Probe", self.printname)
print(obj)
print("=" * 80)
class Debugger(Subconstruct):
"""
A pdb-based debugger. When an exception occurs in the subcon, a debugger
will appear and allow you to debug the error (and even fix on-the-fly).
Parameters:
* subcon - the subcon to debug
Example:
Debugger(
Enum(UBInt8("foo"),
a = 1,
b = 2,
c = 3
)
)
"""
__slots__ = ["retval"]
def _parse(self, stream, context):
try:
return self.subcon._parse(stream, context)
except Exception:
self.retval = NotImplemented
self.handle_exc("(you can set the value of 'self.retval', "
"which will be returned)")
if self.retval is NotImplemented:
raise
else:
return self.retval
def _build(self, obj, stream, context):
try:
self.subcon._build(obj, stream, context)
except Exception:
self.handle_exc()
def handle_exc(self, msg = None):
print("=" * 80)
print("Debugging exception of %s:" % (self.subcon,))
print("".join(traceback.format_exception(*sys.exc_info())[1:]))
if msg:
print(msg)
pdb.post_mortem(sys.exc_info()[2])
print("=" * 80)

View File

@@ -1,7 +0,0 @@
from .binary import (
int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin)
from .bitstream import BitStreamReader, BitStreamWriter
from .container import (Container, FlagsContainer, ListContainer,
LazyContainer)
from .hex import HexString, hexdump

View File

@@ -1,117 +0,0 @@
from .py3compat import int2byte
def int_to_bin(number, width=32):
r"""
Convert an integer into its binary representation in a bytes object.
Width is the amount of bits to generate. If width is larger than the actual
amount of bits required to represent number in binary, sign-extension is
used. If it's smaller, the representation is trimmed to width bits.
Each "bit" is either '\x00' or '\x01'. The MSBit is first.
Examples:
>>> int_to_bin(19, 5)
b'\x01\x00\x00\x01\x01'
>>> int_to_bin(19, 8)
b'\x00\x00\x00\x01\x00\x00\x01\x01'
"""
if number < 0:
number += 1 << width
i = width - 1
bits = bytearray(width)
while number and i >= 0:
bits[i] = number & 1
number >>= 1
i -= 1
return bytes(bits)
_bit_values = {
0: 0,
1: 1,
48: 0, # '0'
49: 1, # '1'
# The following are for Python 2, in which iteration over a bytes object
# yields single-character bytes and not integers.
'\x00': 0,
'\x01': 1,
'0': 0,
'1': 1,
}
def bin_to_int(bits, signed=False):
r"""
Logical opposite of int_to_bin. Both '0' and '\x00' are considered zero,
and both '1' and '\x01' are considered one. Set sign to True to interpret
the number as a 2-s complement signed integer.
"""
number = 0
bias = 0
ptr = 0
if signed and _bit_values[bits[0]] == 1:
bits = bits[1:]
bias = 1 << len(bits)
for b in bits:
number <<= 1
number |= _bit_values[b]
return number - bias
def swap_bytes(bits, bytesize=8):
r"""
Bits is a b'' object containing a binary representation. Assuming each
bytesize bits constitute a bytes, perform a endianness byte swap. Example:
>>> swap_bytes(b'00011011', 2)
b'11100100'
"""
i = 0
l = len(bits)
output = [b""] * ((l // bytesize) + 1)
j = len(output) - 1
while i < l:
output[j] = bits[i : i + bytesize]
i += bytesize
j -= 1
return b"".join(output)
_char_to_bin = {}
_bin_to_char = {}
for i in range(256):
ch = int2byte(i)
bin = int_to_bin(i, 8)
# Populate with for both keys i and ch, to support Python 2 & 3
_char_to_bin[ch] = bin
_char_to_bin[i] = bin
_bin_to_char[bin] = ch
def encode_bin(data):
"""
Create a binary representation of the given b'' object. Assume 8-bit
ASCII. Example:
>>> encode_bin('ab')
b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00"
"""
return b"".join(_char_to_bin[ch] for ch in data)
def decode_bin(data):
"""
Locical opposite of decode_bin.
"""
if len(data) & 7:
raise ValueError("Data length must be a multiple of 8")
i = 0
j = 0
l = len(data) // 8
chars = [b""] * l
while j < l:
chars[j] = _bin_to_char[data[i:i+8]]
i += 8
j += 1
return b"".join(chars)

View File

@@ -1,77 +0,0 @@
from .binary import encode_bin, decode_bin
class BitStreamReader(object):
__slots__ = ["substream", "buffer", "total_size"]
def __init__(self, substream):
self.substream = substream
self.total_size = 0
self.buffer = ""
def close(self):
if self.total_size % 8 != 0:
raise ValueError("total size of read data must be a multiple of 8",
self.total_size)
def tell(self):
return self.substream.tell()
def seek(self, pos, whence = 0):
self.buffer = ""
self.total_size = 0
self.substream.seek(pos, whence)
def read(self, count):
if count < 0:
raise ValueError("count cannot be negative")
l = len(self.buffer)
if count == 0:
data = ""
elif count <= l:
data = self.buffer[:count]
self.buffer = self.buffer[count:]
else:
data = self.buffer
count -= l
bytes = count // 8
if count & 7:
bytes += 1
buf = encode_bin(self.substream.read(bytes))
data += buf[:count]
self.buffer = buf[count:]
self.total_size += len(data)
return data
class BitStreamWriter(object):
__slots__ = ["substream", "buffer", "pos"]
def __init__(self, substream):
self.substream = substream
self.buffer = []
self.pos = 0
def close(self):
self.flush()
def flush(self):
bytes = decode_bin("".join(self.buffer))
self.substream.write(bytes)
self.buffer = []
self.pos = 0
def tell(self):
return self.substream.tell() + self.pos // 8
def seek(self, pos, whence = 0):
self.flush()
self.substream.seek(pos, whence)
def write(self, data):
if not data:
return
if type(data) is not str:
raise TypeError("data must be a string, not %r" % (type(data),))
self.buffer.append(data)

View File

@@ -1,161 +0,0 @@
"""
Various containers.
"""
from pprint import pformat
from .py3compat import MutableMapping
def recursion_lock(retval, lock_name = "__recursion_lock__"):
def decorator(func):
def wrapper(self, *args, **kw):
if getattr(self, lock_name, False):
return retval
setattr(self, lock_name, True)
try:
return func(self, *args, **kw)
finally:
setattr(self, lock_name, False)
wrapper.__name__ = func.__name__
return wrapper
return decorator
class Container(MutableMapping):
"""
A generic container of attributes.
Containers are the common way to express parsed data.
"""
def __init__(self, **kw):
self.__dict__ = kw
# The core dictionary interface.
def __getitem__(self, name):
return self.__dict__[name]
def __delitem__(self, name):
del self.__dict__[name]
def __setitem__(self, name, value):
self.__dict__[name] = value
def keys(self):
return self.__dict__.keys()
def __len__(self):
return len(self.__dict__.keys())
# Extended dictionary interface.
def update(self, other):
self.__dict__.update(other)
__update__ = update
def __contains__(self, value):
return value in self.__dict__
# Rich comparisons.
def __eq__(self, other):
try:
return self.__dict__ == other.__dict__
except AttributeError:
return False
def __ne__(self, other):
return not self == other
# Copy interface.
def copy(self):
return self.__class__(**self.__dict__)
__copy__ = copy
# Iterator interface.
def __iter__(self):
return iter(self.__dict__)
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, repr(self.__dict__))
def __str__(self):
return "%s(%s)" % (self.__class__.__name__, str(self.__dict__))
class FlagsContainer(Container):
"""
A container providing pretty-printing for flags.
Only set flags are displayed.
"""
@recursion_lock("<...>")
def __str__(self):
d = dict((k, self[k]) for k in self
if self[k] and not k.startswith("_"))
return "%s(%s)" % (self.__class__.__name__, pformat(d))
class ListContainer(list):
"""
A container for lists.
"""
__slots__ = ["__recursion_lock__"]
@recursion_lock("[...]")
def __str__(self):
return pformat(self)
class LazyContainer(object):
__slots__ = ["subcon", "stream", "pos", "context", "_value"]
def __init__(self, subcon, stream, pos, context):
self.subcon = subcon
self.stream = stream
self.pos = pos
self.context = context
self._value = NotImplemented
def __eq__(self, other):
try:
return self._value == other._value
except AttributeError:
return False
def __ne__(self, other):
return not (self == other)
def __str__(self):
return self.__pretty_str__()
def __pretty_str__(self, nesting = 1, indentation = " "):
if self._value is NotImplemented:
text = "<unread>"
elif hasattr(self._value, "__pretty_str__"):
text = self._value.__pretty_str__(nesting, indentation)
else:
text = str(self._value)
return "%s: %s" % (self.__class__.__name__, text)
def read(self):
self.stream.seek(self.pos)
return self.subcon._parse(self.stream, self.context)
def dispose(self):
self.subcon = None
self.stream = None
self.context = None
self.pos = None
def _get_value(self):
if self._value is NotImplemented:
self._value = self.read()
return self._value
value = property(_get_value)
has_value = property(lambda self: self._value is not NotImplemented)

View File

@@ -1,43 +0,0 @@
from .py3compat import byte2int, int2byte, bytes2str
# Map an integer in the inclusive range 0-255 to its string byte representation
_printable = dict((i, ".") for i in range(256))
_printable.update((i, bytes2str(int2byte(i))) for i in range(32, 128))
def hexdump(data, linesize):
"""
data is a bytes object. The returned result is a string.
"""
prettylines = []
if len(data) < 65536:
fmt = "%%04X %%-%ds %%s"
else:
fmt = "%%08X %%-%ds %%s"
fmt = fmt % (3 * linesize - 1,)
for i in range(0, len(data), linesize):
line = data[i : i + linesize]
hextext = " ".join('%02x' % byte2int(b) for b in line)
rawtext = "".join(_printable[byte2int(b)] for b in line)
prettylines.append(fmt % (i, str(hextext), str(rawtext)))
return prettylines
class HexString(bytes):
"""
Represents bytes that will be hex-dumped to a string when its string
representation is requested.
"""
def __init__(self, data, linesize = 16):
self.linesize = linesize
def __new__(cls, data, *args, **kwargs):
return bytes.__new__(cls, data)
def __str__(self):
if not self:
return "''"
sep = "\n"
return sep + sep.join(
hexdump(self, self.linesize))

View File

@@ -1,74 +0,0 @@
#-------------------------------------------------------------------------------
# py3compat.py
#
# Some Python2&3 compatibility code
#-------------------------------------------------------------------------------
import sys
PY3 = sys.version_info[0] == 3
try:
from collections.abc import MutableMapping # python >= 3.3
except ImportError:
from collections import MutableMapping # python < 3.3
if PY3:
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
def bchr(i):
""" When iterating over b'...' in Python 2 you get single b'_' chars
and in Python 3 you get integers. Call bchr to always turn this
to single b'_' chars.
"""
return bytes((i,))
def u(s):
return s
def int2byte(i):
return bytes((i,))
def byte2int(b):
return b
def str2bytes(s):
return s.encode("latin-1")
def str2unicode(s):
return s
def bytes2str(b):
return b.decode('latin-1')
def decodebytes(b, encoding):
return bytes(b, encoding)
advance_iterator = next
else:
import cStringIO
StringIO = BytesIO = cStringIO.StringIO
int2byte = chr
byte2int = ord
bchr = lambda i: i
def u(s):
return unicode(s, "unicode_escape")
def str2bytes(s):
return s
def str2unicode(s):
return unicode(s, "unicode_escape")
def bytes2str(b):
return b
def decodebytes(b, encoding):
return b.decode(encoding)
def advance_iterator(it):
return it.next()

View File

@@ -1,634 +0,0 @@
from .lib.py3compat import int2byte
from .lib import (BitStreamReader, BitStreamWriter, encode_bin,
decode_bin)
from .core import (Struct, MetaField, StaticField, FormatField,
OnDemand, Pointer, Switch, Value, RepeatUntil, MetaArray, Sequence, Range,
Select, Pass, SizeofError, Buffered, Restream, Reconfig)
from .adapters import (BitIntegerAdapter, PaddingAdapter,
ConstAdapter, CStringAdapter, LengthValueAdapter, IndexingAdapter,
PaddedStringAdapter, FlagsAdapter, StringAdapter, MappingAdapter)
#===============================================================================
# fields
#===============================================================================
def Field(name, length):
"""
A field consisting of a specified number of bytes.
:param str name: the name of the field
:param length: the length of the field. the length can be either an integer
(StaticField), or a function that takes the context as an argument and
returns the length (MetaField)
"""
if callable(length):
return MetaField(name, length)
else:
return StaticField(name, length)
def BitField(name, length, swapped = False, signed = False, bytesize = 8):
"""
BitFields, as the name suggests, are fields that operate on raw, unaligned
bits, and therefore must be enclosed in a BitStruct. Using them is very
similar to all normal fields: they take a name and a length (in bits).
:param str name: name of the field
:param int length: number of bits in the field, or a function that takes
the context as its argument and returns the length
:param bool swapped: whether the value is byte-swapped
:param bool signed: whether the value is signed
:param int bytesize: number of bits per byte, for byte-swapping
>>> foo = BitStruct("foo",
... BitField("a", 3),
... Flag("b"),
... Padding(3),
... Nibble("c"),
... BitField("d", 5),
... )
>>> foo.parse("\\xe1\\x1f")
Container(a = 7, b = False, c = 8, d = 31)
>>> foo = BitStruct("foo",
... BitField("a", 3),
... Flag("b"),
... Padding(3),
... Nibble("c"),
... Struct("bar",
... Nibble("d"),
... Bit("e"),
... )
... )
>>> foo.parse("\\xe1\\x1f")
Container(a = 7, b = False, bar = Container(d = 15, e = 1), c = 8)
"""
return BitIntegerAdapter(Field(name, length),
length,
swapped=swapped,
signed=signed,
bytesize=bytesize
)
def Padding(length, pattern = b"\x00", strict = False):
r"""a padding field (value is discarded)
* length - the length of the field. the length can be either an integer,
or a function that takes the context as an argument and returns the
length
* pattern - the padding pattern (character/byte) to use. default is b"\x00"
* strict - whether or not to raise an exception is the actual padding
pattern mismatches the desired pattern. default is False.
"""
return PaddingAdapter(Field(None, length),
pattern = pattern,
strict = strict,
)
def Flag(name, truth = 1, falsehood = 0, default = False):
"""
A flag.
Flags are usually used to signify a Boolean value, and this construct
maps values onto the ``bool`` type.
.. note:: This construct works with both bit and byte contexts.
.. warning:: Flags default to False, not True. This is different from the
C and Python way of thinking about truth, and may be subject to change
in the future.
:param str name: field name
:param int truth: value of truth (default 1)
:param int falsehood: value of falsehood (default 0)
:param bool default: default value (default False)
"""
return SymmetricMapping(Field(name, 1),
{True : int2byte(truth), False : int2byte(falsehood)},
default = default,
)
#===============================================================================
# field shortcuts
#===============================================================================
def Bit(name):
"""a 1-bit BitField; must be enclosed in a BitStruct"""
return BitField(name, 1)
def Nibble(name):
"""a 4-bit BitField; must be enclosed in a BitStruct"""
return BitField(name, 4)
def Octet(name):
"""an 8-bit BitField; must be enclosed in a BitStruct"""
return BitField(name, 8)
def UBInt8(name):
"""unsigned, big endian 8-bit integer"""
return FormatField(name, ">", "B")
def UBInt16(name):
"""unsigned, big endian 16-bit integer"""
return FormatField(name, ">", "H")
def UBInt32(name):
"""unsigned, big endian 32-bit integer"""
return FormatField(name, ">", "L")
def UBInt64(name):
"""unsigned, big endian 64-bit integer"""
return FormatField(name, ">", "Q")
def SBInt8(name):
"""signed, big endian 8-bit integer"""
return FormatField(name, ">", "b")
def SBInt16(name):
"""signed, big endian 16-bit integer"""
return FormatField(name, ">", "h")
def SBInt32(name):
"""signed, big endian 32-bit integer"""
return FormatField(name, ">", "l")
def SBInt64(name):
"""signed, big endian 64-bit integer"""
return FormatField(name, ">", "q")
def ULInt8(name):
"""unsigned, little endian 8-bit integer"""
return FormatField(name, "<", "B")
def ULInt16(name):
"""unsigned, little endian 16-bit integer"""
return FormatField(name, "<", "H")
def ULInt32(name):
"""unsigned, little endian 32-bit integer"""
return FormatField(name, "<", "L")
def ULInt64(name):
"""unsigned, little endian 64-bit integer"""
return FormatField(name, "<", "Q")
def SLInt8(name):
"""signed, little endian 8-bit integer"""
return FormatField(name, "<", "b")
def SLInt16(name):
"""signed, little endian 16-bit integer"""
return FormatField(name, "<", "h")
def SLInt32(name):
"""signed, little endian 32-bit integer"""
return FormatField(name, "<", "l")
def SLInt64(name):
"""signed, little endian 64-bit integer"""
return FormatField(name, "<", "q")
def UNInt8(name):
"""unsigned, native endianity 8-bit integer"""
return FormatField(name, "=", "B")
def UNInt16(name):
"""unsigned, native endianity 16-bit integer"""
return FormatField(name, "=", "H")
def UNInt32(name):
"""unsigned, native endianity 32-bit integer"""
return FormatField(name, "=", "L")
def UNInt64(name):
"""unsigned, native endianity 64-bit integer"""
return FormatField(name, "=", "Q")
def SNInt8(name):
"""signed, native endianity 8-bit integer"""
return FormatField(name, "=", "b")
def SNInt16(name):
"""signed, native endianity 16-bit integer"""
return FormatField(name, "=", "h")
def SNInt32(name):
"""signed, native endianity 32-bit integer"""
return FormatField(name, "=", "l")
def SNInt64(name):
"""signed, native endianity 64-bit integer"""
return FormatField(name, "=", "q")
def BFloat32(name):
"""big endian, 32-bit IEEE floating point number"""
return FormatField(name, ">", "f")
def LFloat32(name):
"""little endian, 32-bit IEEE floating point number"""
return FormatField(name, "<", "f")
def NFloat32(name):
"""native endianity, 32-bit IEEE floating point number"""
return FormatField(name, "=", "f")
def BFloat64(name):
"""big endian, 64-bit IEEE floating point number"""
return FormatField(name, ">", "d")
def LFloat64(name):
"""little endian, 64-bit IEEE floating point number"""
return FormatField(name, "<", "d")
def NFloat64(name):
"""native endianity, 64-bit IEEE floating point number"""
return FormatField(name, "=", "d")
#===============================================================================
# arrays
#===============================================================================
def Array(count, subcon):
"""
Repeats the given unit a fixed number of times.
:param int count: number of times to repeat
:param ``Construct`` subcon: construct to repeat
>>> c = Array(4, UBInt8("foo"))
>>> c.parse("\\x01\\x02\\x03\\x04")
[1, 2, 3, 4]
>>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06")
[1, 2, 3, 4]
>>> c.build([5,6,7,8])
'\\x05\\x06\\x07\\x08'
>>> c.build([5,6,7,8,9])
Traceback (most recent call last):
...
construct.core.RangeError: expected 4..4, found 5
"""
if callable(count):
con = MetaArray(count, subcon)
else:
con = MetaArray(lambda ctx: count, subcon)
con._clear_flag(con.FLAG_DYNAMIC)
return con
def PrefixedArray(subcon, length_field = UBInt8("length")):
"""an array prefixed by a length field.
* subcon - the subcon to be repeated
* length_field - a construct returning an integer
"""
return LengthValueAdapter(
Sequence(subcon.name,
length_field,
Array(lambda ctx: ctx[length_field.name], subcon),
nested = False
)
)
def OpenRange(mincount, subcon):
from sys import maxsize
return Range(mincount, maxsize, subcon)
def GreedyRange(subcon):
"""
Repeats the given unit one or more times.
:param ``Construct`` subcon: construct to repeat
>>> from construct import GreedyRange, UBInt8
>>> c = GreedyRange(UBInt8("foo"))
>>> c.parse("\\x01")
[1]
>>> c.parse("\\x01\\x02\\x03")
[1, 2, 3]
>>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06")
[1, 2, 3, 4, 5, 6]
>>> c.parse("")
Traceback (most recent call last):
...
construct.core.RangeError: expected 1..2147483647, found 0
>>> c.build([1,2])
'\\x01\\x02'
>>> c.build([])
Traceback (most recent call last):
...
construct.core.RangeError: expected 1..2147483647, found 0
"""
return OpenRange(1, subcon)
def OptionalGreedyRange(subcon):
"""
Repeats the given unit zero or more times. This repeater can't
fail, as it accepts lists of any length.
:param ``Construct`` subcon: construct to repeat
>>> from construct import OptionalGreedyRange, UBInt8
>>> c = OptionalGreedyRange(UBInt8("foo"))
>>> c.parse("")
[]
>>> c.parse("\\x01\\x02")
[1, 2]
>>> c.build([])
''
>>> c.build([1,2])
'\\x01\\x02'
"""
return OpenRange(0, subcon)
#===============================================================================
# subconstructs
#===============================================================================
def Optional(subcon):
"""an optional construct. if parsing fails, returns None.
* subcon - the subcon to optionally parse or build
"""
return Select(subcon.name, subcon, Pass)
def Bitwise(subcon):
"""converts the stream to bits, and passes the bitstream to subcon
* subcon - a bitwise construct (usually BitField)
"""
# subcons larger than MAX_BUFFER will be wrapped by Restream instead
# of Buffered. implementation details, don't stick your nose in :)
MAX_BUFFER = 1024 * 8
def resizer(length):
if length & 7:
raise SizeofError("size must be a multiple of 8", length)
return length >> 3
if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER:
con = Buffered(subcon,
encoder = decode_bin,
decoder = encode_bin,
resizer = resizer
)
else:
con = Restream(subcon,
stream_reader = BitStreamReader,
stream_writer = BitStreamWriter,
resizer = resizer)
return con
def Aligned(subcon, modulus = 4, pattern = b"\x00"):
r"""aligns subcon to modulus boundary using padding pattern
* subcon - the subcon to align
* modulus - the modulus boundary (default is 4)
* pattern - the padding pattern (default is \x00)
"""
if modulus < 2:
raise ValueError("modulus must be >= 2", modulus)
def padlength(ctx):
return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus
return SeqOfOne(subcon.name,
subcon,
# ??????
# ??????
# ??????
# ??????
Padding(padlength, pattern = pattern),
nested = False,
)
def SeqOfOne(name, *args, **kw):
"""a sequence of one element. only the first element is meaningful, the
rest are discarded
* name - the name of the sequence
* args - subconstructs
* kw - any keyword arguments to Sequence
"""
return IndexingAdapter(Sequence(name, *args, **kw), index = 0)
def Embedded(subcon):
"""embeds a struct into the enclosing struct.
* subcon - the struct to embed
"""
return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED)
def Rename(newname, subcon):
"""renames an existing construct
* newname - the new name
* subcon - the subcon to rename
"""
return Reconfig(newname, subcon)
def Alias(newname, oldname):
"""creates an alias for an existing element in a struct
* newname - the new name
* oldname - the name of an existing element
"""
return Value(newname, lambda ctx: ctx[oldname])
#===============================================================================
# mapping
#===============================================================================
def SymmetricMapping(subcon, mapping, default = NotImplemented):
"""defines a symmetrical mapping: a->b, b->a.
* subcon - the subcon to map
* mapping - the encoding mapping (a dict); the decoding mapping is
achieved by reversing this mapping
* default - the default value to use when no mapping is found. if no
default value is given, and exception is raised. setting to Pass would
return the value "as is" (unmapped)
"""
reversed_mapping = dict((v, k) for k, v in mapping.items())
return MappingAdapter(subcon,
encoding = mapping,
decoding = reversed_mapping,
encdefault = default,
decdefault = default,
)
def Enum(subcon, **kw):
"""a set of named values mapping.
* subcon - the subcon to map
* kw - keyword arguments which serve as the encoding mapping
* _default_ - an optional, keyword-only argument that specifies the
default value to use when the mapping is undefined. if not given,
and exception is raised when the mapping is undefined. use `Pass` to
pass the unmapped value as-is
"""
return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented))
def FlagsEnum(subcon, **kw):
"""a set of flag values mapping.
* subcon - the subcon to map
* kw - keyword arguments which serve as the encoding mapping
"""
return FlagsAdapter(subcon, kw)
#===============================================================================
# structs
#===============================================================================
def AlignedStruct(name, *subcons, **kw):
"""a struct of aligned fields
* name - the name of the struct
* subcons - the subcons that make up this structure
* kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern'
"""
return Struct(name, *(Aligned(sc, **kw) for sc in subcons))
def BitStruct(name, *subcons):
"""a struct of bitwise fields
* name - the name of the struct
* subcons - the subcons that make up this structure
"""
return Bitwise(Struct(name, *subcons))
def EmbeddedBitStruct(*subcons):
"""an embedded BitStruct. no name is necessary.
* subcons - the subcons that make up this structure
"""
return Bitwise(Embedded(Struct(None, *subcons)))
#===============================================================================
# strings
#===============================================================================
def String(name, length, encoding=None, padchar=None, paddir="right",
trimdir="right"):
"""
A configurable, fixed-length string field.
The padding character must be specified for padding and trimming to work.
:param str name: name
:param int length: length, in bytes
:param str encoding: encoding (e.g. "utf8") or None for no encoding
:param str padchar: optional character to pad out strings
:param str paddir: direction to pad out strings; one of "right", "left",
or "both"
:param str trim: direction to trim strings; one of "right", "left"
>>> from construct import String
>>> String("foo", 5).parse("hello")
'hello'
>>>
>>> String("foo", 12, encoding = "utf8").parse("hello joh\\xd4\\x83n")
u'hello joh\\u0503n'
>>>
>>> foo = String("foo", 10, padchar = "X", paddir = "right")
>>> foo.parse("helloXXXXX")
'hello'
>>> foo.build("hello")
'helloXXXXX'
"""
con = StringAdapter(Field(name, length), encoding=encoding)
if padchar is not None:
con = PaddedStringAdapter(con, padchar=padchar, paddir=paddir,
trimdir=trimdir)
return con
def PascalString(name, length_field=UBInt8("length"), encoding=None):
"""
A length-prefixed string.
``PascalString`` is named after the string types of Pascal, which are
length-prefixed. Lisp strings also follow this convention.
The length field will appear in the same ``Container`` as the
``PascalString``, with the given name.
:param str name: name
:param ``Construct`` length_field: a field which will store the length of
the string
:param str encoding: encoding (e.g. "utf8") or None for no encoding
>>> foo = PascalString("foo")
>>> foo.parse("\\x05hello")
'hello'
>>> foo.build("hello world")
'\\x0bhello world'
>>>
>>> foo = PascalString("foo", length_field = UBInt16("length"))
>>> foo.parse("\\x00\\x05hello")
'hello'
>>> foo.build("hello")
'\\x00\\x05hello'
"""
return StringAdapter(
LengthValueAdapter(
Sequence(name,
length_field,
Field("data", lambda ctx: ctx[length_field.name]),
)
),
encoding=encoding,
)
def CString(name, terminators=b"\x00", encoding=None,
char_field=Field(None, 1)):
"""
A string ending in a terminator.
``CString`` is similar to the strings of C, C++, and other related
programming languages.
By default, the terminator is the NULL byte (b``0x00``).
:param str name: name
:param iterable terminators: sequence of valid terminators, in order of
preference
:param str encoding: encoding (e.g. "utf8") or None for no encoding
:param ``Construct`` char_field: construct representing a single character
>>> foo = CString("foo")
>>> foo.parse(b"hello\\x00")
b'hello'
>>> foo.build(b"hello")
b'hello\\x00'
>>> foo = CString("foo", terminators = b"XYZ")
>>> foo.parse(b"helloX")
b'hello'
>>> foo.parse(b"helloY")
b'hello'
>>> foo.parse(b"helloZ")
b'hello'
>>> foo.build(b"hello")
b'helloX'
"""
return Rename(name,
CStringAdapter(
RepeatUntil(lambda obj, ctx: obj in terminators, char_field),
terminators=terminators,
encoding=encoding,
)
)
#===============================================================================
# conditional
#===============================================================================
def IfThenElse(name, predicate, then_subcon, else_subcon):
"""an if-then-else conditional construct: if the predicate indicates True,
`then_subcon` will be used; otherwise `else_subcon`
* name - the name of the construct
* predicate - a function taking the context as an argument and returning
True or False
* then_subcon - the subcon that will be used if the predicate returns True
* else_subcon - the subcon that will be used if the predicate returns False
"""
return Switch(name, lambda ctx: bool(predicate(ctx)),
{
True : then_subcon,
False : else_subcon,
}
)
def If(predicate, subcon, elsevalue = None):
"""an if-then conditional construct: if the predicate indicates True,
subcon will be used; otherwise, `elsevalue` will be returned instead.
* predicate - a function taking the context as an argument and returning
True or False
* subcon - the subcon that will be used if the predicate returns True
* elsevalue - the value that will be used should the predicate return False.
by default this value is None.
"""
return IfThenElse(subcon.name,
predicate,
subcon,
Value("elsevalue", lambda ctx: elsevalue)
)
#===============================================================================
# misc
#===============================================================================
def OnDemandPointer(offsetfunc, subcon, force_build = True):
"""an on-demand pointer.
* offsetfunc - a function taking the context as an argument and returning
the absolute stream position
* subcon - the subcon that will be parsed from the `offsetfunc()` stream
position on demand
* force_build - see OnDemand. by default True.
"""
return OnDemand(Pointer(offsetfunc, subcon),
advance_stream = False,
force_build = force_build
)
def Magic(data):
return ConstAdapter(Field(None, len(data)), data)

View File

@@ -1,79 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/abbrevtable.py
#
# DWARF abbreviation table
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..common.utils import struct_parse, dwarf_assert
class AbbrevTable(object):
""" Represents a DWARF abbreviation table.
"""
def __init__(self, structs, stream, offset):
""" Create new abbreviation table. Parses the actual table from the
stream and stores it internally.
structs:
A DWARFStructs instance for parsing the data
stream, offset:
The stream and offset into the stream where this abbreviation
table lives.
"""
self.structs = structs
self.stream = stream
self.offset = offset
self._abbrev_map = self._parse_abbrev_table()
def get_abbrev(self, code):
""" Get the AbbrevDecl for a given code. Raise KeyError if no
declaration for this code exists.
"""
return self._abbrev_map[code]
def _parse_abbrev_table(self):
""" Parse the abbrev table from the stream
"""
map = {}
self.stream.seek(self.offset)
while True:
decl_code = struct_parse(
struct=self.structs.Dwarf_uleb128(''),
stream=self.stream)
if decl_code == 0:
break
declaration = struct_parse(
struct=self.structs.Dwarf_abbrev_declaration,
stream=self.stream)
map[decl_code] = AbbrevDecl(decl_code, declaration)
return map
class AbbrevDecl(object):
""" Wraps a parsed abbreviation declaration, exposing its fields with
dict-like access, and adding some convenience methods.
The abbreviation declaration represents an "entry" that points to it.
"""
def __init__(self, code, decl):
self.code = code
self.decl = decl
def has_children(self):
""" Does the entry have children?
"""
return self['children_flag'] == 'DW_CHILDREN_yes'
def iter_attr_specs(self):
""" Iterate over the attribute specifications for the entry. Yield
(name, form) pairs.
"""
for attr_spec in self['attr_spec']:
yield attr_spec.name, attr_spec.form
def __getitem__(self, entry):
return self.decl[entry]

View File

@@ -1,113 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/aranges.py
#
# DWARF aranges section decoding (.debug_aranges)
#
# Dorothy Chen (dorothchen@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
from collections import namedtuple
from ..common.utils import struct_parse
from bisect import bisect_right
import math
# An entry in the aranges table;
# begin_addr: The beginning address in the CU
# length: The length of the address range in this entry
# info_offset: The CU's offset into .debug_info
# see 6.1.2 in DWARF4 docs for explanation of the remaining fields
ARangeEntry = namedtuple('ARangeEntry',
'begin_addr length info_offset unit_length version address_size segment_size')
class ARanges(object):
""" ARanges table in DWARF
stream, size:
A stream holding the .debug_aranges section, and its size
structs:
A DWARFStructs instance for parsing the data
"""
def __init__(self, stream, size, structs):
self.stream = stream
self.size = size
self.structs = structs
# Get entries of aranges table in the form of ARangeEntry tuples
self.entries = self._get_entries()
# Sort entries by the beginning address
self.entries.sort(key=lambda entry: entry.begin_addr)
# Create list of keys (first addresses) for better searching
self.keys = [entry.begin_addr for entry in self.entries]
def cu_offset_at_addr(self, addr):
""" Given an address, get the offset of the CU it belongs to, where
'offset' refers to the offset in the .debug_info section.
"""
tup = self.entries[bisect_right(self.keys, addr) - 1]
if tup.begin_addr <= addr < tup.begin_addr + tup.length:
return tup.info_offset
else:
return None
#------ PRIVATE ------#
def _get_entries(self):
""" Populate self.entries with ARangeEntry tuples for each range of addresses
"""
self.stream.seek(0)
entries = []
offset = 0
# one loop == one "set" == one CU
while offset < self.size :
aranges_header = struct_parse(self.structs.Dwarf_aranges_header,
self.stream, offset)
addr_size = self._get_addr_size_struct(aranges_header["address_size"])
# No segmentation
if aranges_header["segment_size"] == 0:
# pad to nearest multiple of tuple size
tuple_size = aranges_header["address_size"] * 2
fp = self.stream.tell()
seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size)
self.stream.seek(seek_to)
# entries in this set/CU
addr = struct_parse(addr_size('addr'), self.stream)
length = struct_parse(addr_size('length'), self.stream)
while addr != 0 or length != 0:
# 'begin_addr length info_offset version address_size segment_size'
entries.append(
ARangeEntry(begin_addr=addr,
length=length,
info_offset=aranges_header["debug_info_offset"],
unit_length=aranges_header["unit_length"],
version=aranges_header["version"],
address_size=aranges_header["address_size"],
segment_size=aranges_header["segment_size"]))
addr = struct_parse(addr_size('addr'), self.stream)
length = struct_parse(addr_size('length'), self.stream)
# Segmentation exists in executable
elif aranges_header["segment_size"] != 0:
raise NotImplementedError("Segmentation not implemented")
offset = (offset
+ aranges_header.unit_length
+ self.structs.initial_length_field_size())
return entries
def _get_addr_size_struct(self, addr_header_value):
""" Given this set's header value (int) for the address size,
get the Construct representation of that size
"""
if addr_header_value == 4:
return self.structs.Dwarf_uint32
else:
assert addr_header_value == 8
return self.structs.Dwarf_uint64

View File

@@ -1,724 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/callframe.py
#
# DWARF call frame information
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import copy
from collections import namedtuple
from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
from ..common.py3compat import iterbytes, iterkeys
from ..construct import Struct, Switch
from .enums import DW_EH_encoding_flags
from .structs import DWARFStructs
from .constants import *
class CallFrameInfo(object):
""" DWARF CFI (Call Frame Info)
Note that this also supports unwinding information as found in .eh_frame
sections: its format differs slightly from the one in .debug_frame. See
<http://www.airs.com/blog/archives/460>.
stream, size:
A stream holding the .debug_frame section, and the size of the
section in it.
address:
Virtual address for this section. This is used to decode relative
addresses.
base_structs:
The structs to be used as the base for parsing this section.
Eventually, each entry gets its own structs based on the initial
length field it starts with. The address_size, however, is taken
from base_structs. This appears to be a limitation of the DWARFv3
standard, fixed in v4.
A discussion I had on dwarf-discuss confirms this.
So for DWARFv4 we'll take the address size from the CIE header,
but for earlier versions will use the elfclass of the containing
file; more sophisticated methods are used by libdwarf and others,
such as guessing which CU contains which FDEs (based on their
address ranges) and taking the address_size from those CUs.
"""
def __init__(self, stream, size, address, base_structs,
for_eh_frame=False):
self.stream = stream
self.size = size
self.address = address
self.base_structs = base_structs
self.entries = None
# Map between an offset in the stream and the entry object found at this
# offset. Useful for assigning CIE to FDEs according to the CIE_pointer
# header field which contains a stream offset.
self._entry_cache = {}
# The .eh_frame and .debug_frame section use almost the same CFI
# encoding, but there are tiny variations we need to handle during
# parsing.
self.for_eh_frame = for_eh_frame
def get_entries(self):
""" Get a list of entries that constitute this CFI. The list consists
of CIE or FDE objects, in the order of their appearance in the
section.
"""
if self.entries is None:
self.entries = self._parse_entries()
return self.entries
#-------------------------
def _parse_entries(self):
entries = []
offset = 0
while offset < self.size:
entries.append(self._parse_entry_at(offset))
offset = self.stream.tell()
return entries
def _parse_entry_at(self, offset):
""" Parse an entry from self.stream starting with the given offset.
Return the entry object. self.stream will point right after the
entry.
"""
if offset in self._entry_cache:
return self._entry_cache[offset]
entry_length = struct_parse(
self.base_structs.Dwarf_uint32(''), self.stream, offset)
if self.for_eh_frame and entry_length == 0:
return ZERO(offset)
dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32
entry_structs = DWARFStructs(
little_endian=self.base_structs.little_endian,
dwarf_format=dwarf_format,
address_size=self.base_structs.address_size)
# Read the next field to see whether this is a CIE or FDE
CIE_id = struct_parse(
entry_structs.Dwarf_offset(''), self.stream)
if self.for_eh_frame:
is_CIE = CIE_id == 0
else:
is_CIE = (
(dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or
CIE_id == 0xFFFFFFFFFFFFFFFF)
# Parse the header, which goes up to and excluding the sequence of
# instructions.
if is_CIE:
header_struct = (entry_structs.EH_CIE_header
if self.for_eh_frame else
entry_structs.Dwarf_CIE_header)
header = struct_parse(
header_struct, self.stream, offset)
else:
header = self._parse_fde_header(entry_structs, offset)
# If this is DWARF version 4 or later, we can have a more precise
# address size, read from the CIE header.
if not self.for_eh_frame and entry_structs.dwarf_version >= 4:
entry_structs = DWARFStructs(
little_endian=entry_structs.little_endian,
dwarf_format=entry_structs.dwarf_format,
address_size=header.address_size)
# If the augmentation string is not empty, hope to find a length field
# in order to skip the data specified augmentation.
if is_CIE:
aug_bytes, aug_dict = self._parse_cie_augmentation(
header, entry_structs)
else:
cie = self._parse_cie_for_fde(offset, header, entry_structs)
aug_bytes = self._read_augmentation_data(entry_structs)
lsda_encoding = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit'])
if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']:
# parse LSDA pointer
lsda_pointer = self._parse_lsda_pointer(entry_structs,
self.stream.tell() - len(aug_bytes),
lsda_encoding)
else:
lsda_pointer = None
# For convenience, compute the end offset for this entry
end_offset = (
offset + header.length +
entry_structs.initial_length_field_size())
# At this point self.stream is at the start of the instruction list
# for this entry
instructions = self._parse_instructions(
entry_structs, self.stream.tell(), end_offset)
if is_CIE:
self._entry_cache[offset] = CIE(
header=header, instructions=instructions, offset=offset,
augmentation_dict=aug_dict,
augmentation_bytes=aug_bytes,
structs=entry_structs)
else: # FDE
cie = self._parse_cie_for_fde(offset, header, entry_structs)
self._entry_cache[offset] = FDE(
header=header, instructions=instructions, offset=offset,
structs=entry_structs, cie=cie,
augmentation_bytes=aug_bytes,
lsda_pointer=lsda_pointer,
)
return self._entry_cache[offset]
def _parse_instructions(self, structs, offset, end_offset):
""" Parse a list of CFI instructions from self.stream, starting with
the offset and until (not including) end_offset.
Return a list of CallFrameInstruction objects.
"""
instructions = []
while offset < end_offset:
opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset)
args = []
primary = opcode & _PRIMARY_MASK
primary_arg = opcode & _PRIMARY_ARG_MASK
if primary == DW_CFA_advance_loc:
args = [primary_arg]
elif primary == DW_CFA_offset:
args = [
primary_arg,
struct_parse(structs.Dwarf_uleb128(''), self.stream)]
elif primary == DW_CFA_restore:
args = [primary_arg]
# primary == 0 and real opcode is extended
elif opcode in (DW_CFA_nop, DW_CFA_remember_state,
DW_CFA_restore_state):
args = []
elif opcode == DW_CFA_set_loc:
args = [
struct_parse(structs.Dwarf_target_addr(''), self.stream)]
elif opcode == DW_CFA_advance_loc1:
args = [struct_parse(structs.Dwarf_uint8(''), self.stream)]
elif opcode == DW_CFA_advance_loc2:
args = [struct_parse(structs.Dwarf_uint16(''), self.stream)]
elif opcode == DW_CFA_advance_loc4:
args = [struct_parse(structs.Dwarf_uint32(''), self.stream)]
elif opcode in (DW_CFA_offset_extended, DW_CFA_register,
DW_CFA_def_cfa, DW_CFA_val_offset):
args = [
struct_parse(structs.Dwarf_uleb128(''), self.stream),
struct_parse(structs.Dwarf_uleb128(''), self.stream)]
elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined,
DW_CFA_same_value, DW_CFA_def_cfa_register,
DW_CFA_def_cfa_offset):
args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
elif opcode == DW_CFA_def_cfa_offset_sf:
args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)]
elif opcode == DW_CFA_def_cfa_expression:
args = [struct_parse(
structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
elif opcode in (DW_CFA_expression, DW_CFA_val_expression):
args = [
struct_parse(structs.Dwarf_uleb128(''), self.stream),
struct_parse(
structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
elif opcode in (DW_CFA_offset_extended_sf,
DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf):
args = [
struct_parse(structs.Dwarf_uleb128(''), self.stream),
struct_parse(structs.Dwarf_sleb128(''), self.stream)]
elif opcode == DW_CFA_GNU_args_size:
args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
else:
dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode)
instructions.append(CallFrameInstruction(opcode=opcode, args=args))
offset = self.stream.tell()
return instructions
def _parse_cie_for_fde(self, fde_offset, fde_header, entry_structs):
""" Parse the CIE that corresponds to an FDE.
"""
# Determine the offset of the CIE that corresponds to this FDE
if self.for_eh_frame:
# CIE_pointer contains the offset for a reverse displacement from
# the section offset of the CIE_pointer field itself (not from the
# FDE header offset).
cie_displacement = fde_header['CIE_pointer']
cie_offset = (fde_offset + entry_structs.dwarf_format // 8
- cie_displacement)
else:
cie_offset = fde_header['CIE_pointer']
# Then read it
with preserve_stream_pos(self.stream):
return self._parse_entry_at(cie_offset)
def _parse_cie_augmentation(self, header, entry_structs):
""" Parse CIE augmentation data from the annotation string in `header`.
Return a tuple that contains 1) the augmentation data as a string
(without the length field) and 2) the augmentation data as a dict.
"""
augmentation = header.get('augmentation')
if not augmentation:
return ('', {})
# Augmentation parsing works in minimal mode here: we need the length
# field to be able to skip unhandled augmentation fields.
assert augmentation.startswith(b'z'), (
'Unhandled augmentation string: {}'.format(repr(augmentation)))
available_fields = {
b'z': entry_structs.Dwarf_uleb128('length'),
b'L': entry_structs.Dwarf_uint8('LSDA_encoding'),
b'R': entry_structs.Dwarf_uint8('FDE_encoding'),
b'S': True,
b'P': Struct(
'personality',
entry_structs.Dwarf_uint8('encoding'),
Switch('function', lambda ctx: ctx.encoding & 0x0f, {
enc: fld_cons('function')
for enc, fld_cons
in self._eh_encoding_to_field(entry_structs).items()})),
}
# Build the Struct we will be using to parse the augmentation data.
# Stop as soon as we are not able to match the augmentation string.
fields = []
aug_dict = {}
for b in iterbytes(augmentation):
try:
fld = available_fields[b]
except KeyError:
break
if fld is True:
aug_dict[fld] = True
else:
fields.append(fld)
# Read the augmentation twice: once with the Struct, once for the raw
# bytes. Read the raw bytes last so we are sure we leave the stream
# pointing right after the augmentation: the Struct may be incomplete
# (missing trailing fields) due to an unknown char: see the KeyError
# above.
offset = self.stream.tell()
struct = Struct('Augmentation_Data', *fields)
aug_dict.update(struct_parse(struct, self.stream, offset))
self.stream.seek(offset)
aug_bytes = self._read_augmentation_data(entry_structs)
return (aug_bytes, aug_dict)
def _read_augmentation_data(self, entry_structs):
""" Read augmentation data.
This assumes that the augmentation string starts with 'z', i.e. that
augmentation data is prefixed by a length field, which is not returned.
"""
if not self.for_eh_frame:
return b''
augmentation_data_length = struct_parse(
Struct('Dummy_Augmentation_Data',
entry_structs.Dwarf_uleb128('length')),
self.stream)['length']
return self.stream.read(augmentation_data_length)
def _parse_lsda_pointer(self, structs, stream_offset, encoding):
""" Parse bytes to get an LSDA pointer.
The basic encoding (lower four bits of the encoding) describes how the values are encoded in a CIE or an FDE.
The modifier (upper four bits of the encoding) describes how the raw values, after decoded using a basic
encoding, should be modified before using.
Ref: https://www.airs.com/blog/archives/460
"""
assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit']
basic_encoding = encoding & 0x0f
modifier = encoding & 0xf0
formats = self._eh_encoding_to_field(structs)
ptr = struct_parse(
Struct('Augmentation_Data',
formats[basic_encoding]('LSDA_pointer')),
self.stream, stream_pos=stream_offset)['LSDA_pointer']
if modifier == DW_EH_encoding_flags['DW_EH_PE_absptr']:
pass
elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
ptr += self.address + stream_offset
else:
assert False, 'Unsupported encoding modifier for LSDA pointer: {:#x}'.format(modifier)
return ptr
def _parse_fde_header(self, entry_structs, offset):
""" Compute a struct to parse the header of the current FDE.
"""
if not self.for_eh_frame:
return struct_parse(entry_structs.Dwarf_FDE_header, self.stream,
offset)
fields = [entry_structs.Dwarf_initial_length('length'),
entry_structs.Dwarf_offset('CIE_pointer')]
# Parse the couple of header fields that are always here so we can
# fetch the corresponding CIE.
minimal_header = struct_parse(Struct('eh_frame_minimal_header',
*fields), self.stream, offset)
cie = self._parse_cie_for_fde(offset, minimal_header, entry_structs)
initial_location_offset = self.stream.tell()
# Try to parse the initial location. We need the initial location in
# order to create a meaningful FDE, so assume it's there. Omission does
# not seem to happen in practice.
encoding = cie.augmentation_dict['FDE_encoding']
assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit']
basic_encoding = encoding & 0x0f
encoding_modifier = encoding & 0xf0
# Depending on the specified encoding, complete the header Struct
formats = self._eh_encoding_to_field(entry_structs)
fields.append(formats[basic_encoding]('initial_location'))
fields.append(formats[basic_encoding]('address_range'))
result = struct_parse(Struct('Dwarf_FDE_header', *fields),
self.stream, offset)
if encoding_modifier == 0:
pass
elif encoding_modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
# Start address is relative to the address of the
# "initial_location" field.
result['initial_location'] += (
self.address + initial_location_offset)
else:
assert False, 'Unsupported encoding: {:#x}'.format(encoding)
return result
@staticmethod
def _eh_encoding_to_field(entry_structs):
"""
Return a mapping from basic encodings (DW_EH_encoding_flags) the
corresponding field constructors (for instance
entry_structs.Dwarf_uint32).
"""
return {
DW_EH_encoding_flags['DW_EH_PE_absptr']:
entry_structs.Dwarf_target_addr,
DW_EH_encoding_flags['DW_EH_PE_uleb128']:
entry_structs.Dwarf_uleb128,
DW_EH_encoding_flags['DW_EH_PE_udata2']:
entry_structs.Dwarf_uint16,
DW_EH_encoding_flags['DW_EH_PE_udata4']:
entry_structs.Dwarf_uint32,
DW_EH_encoding_flags['DW_EH_PE_udata8']:
entry_structs.Dwarf_uint64,
DW_EH_encoding_flags['DW_EH_PE_sleb128']:
entry_structs.Dwarf_sleb128,
DW_EH_encoding_flags['DW_EH_PE_sdata2']:
entry_structs.Dwarf_int16,
DW_EH_encoding_flags['DW_EH_PE_sdata4']:
entry_structs.Dwarf_int32,
DW_EH_encoding_flags['DW_EH_PE_sdata8']:
entry_structs.Dwarf_int64,
}
def instruction_name(opcode):
""" Given an opcode, return the instruction name.
"""
primary = opcode & _PRIMARY_MASK
if primary == 0:
return _OPCODE_NAME_MAP[opcode]
else:
return _OPCODE_NAME_MAP[primary]
class CallFrameInstruction(object):
""" An instruction in the CFI section. opcode is the instruction
opcode, numeric - as it appears in the section. args is a list of
arguments (including arguments embedded in the low bits of some
instructions, when applicable), decoded from the stream.
"""
def __init__(self, opcode, args):
self.opcode = opcode
self.args = args
def __repr__(self):
return '%s (0x%x): %s' % (
instruction_name(self.opcode), self.opcode, self.args)
class CFIEntry(object):
""" A common base class for CFI entries.
Contains a header and a list of instructions (CallFrameInstruction).
offset: the offset of this entry from the beginning of the section
cie: for FDEs, a CIE pointer is required
augmentation_dict: Augmentation data as a parsed struct (dict): see
CallFrameInfo._parse_cie_augmentation and
http://www.airs.com/blog/archives/460.
augmentation_bytes: Augmentation data as a chain of bytes: see
CallFrameInfo._parse_cie_augmentation and
http://www.airs.com/blog/archives/460.
"""
def __init__(self, header, structs, instructions, offset,
augmentation_dict=None, augmentation_bytes=b'', cie=None):
self.header = header
self.structs = structs
self.instructions = instructions
self.offset = offset
self.cie = cie
self._decoded_table = None
self.augmentation_dict = augmentation_dict if augmentation_dict else {}
self.augmentation_bytes = augmentation_bytes
def get_decoded(self):
""" Decode the CFI contained in this entry and return a
DecodedCallFrameTable object representing it. See the documentation
of that class to understand how to interpret the decoded table.
"""
if self._decoded_table is None:
self._decoded_table = self._decode_CFI_table()
return self._decoded_table
def __getitem__(self, name):
""" Implement dict-like access to header entries
"""
return self.header[name]
def _decode_CFI_table(self):
""" Decode the instructions contained in the given CFI entry and return
a DecodedCallFrameTable.
"""
if isinstance(self, CIE):
# For a CIE, initialize cur_line to an "empty" line
cie = self
cur_line = dict(pc=0, cfa=CFARule(reg=None, offset=0))
reg_order = []
else: # FDE
# For a FDE, we need to decode the attached CIE first, because its
# decoded table is needed. Its "initial instructions" describe a
# line that serves as the base (first) line in the FDE's table.
cie = self.cie
cie_decoded_table = cie.get_decoded()
if len(cie_decoded_table.table) > 0:
last_line_in_CIE = copy.copy(cie_decoded_table.table[-1])
cur_line = copy.copy(last_line_in_CIE)
else:
cur_line = dict(cfa=CFARule(reg=None, offset=0))
cur_line['pc'] = self['initial_location']
reg_order = copy.copy(cie_decoded_table.reg_order)
table = []
# Keeps a stack for the use of DW_CFA_{remember|restore}_state
# instructions.
line_stack = []
def _add_to_order(regnum):
# DW_CFA_restore and others remove registers from cur_line,
# but they stay in reg_order. Avoid duplicates.
if regnum not in reg_order:
reg_order.append(regnum)
for instr in self.instructions:
# Throughout this loop, cur_line is the current line. Some
# instructions add it to the table, but most instructions just
# update it without adding it to the table.
name = instruction_name(instr.opcode)
if name == 'DW_CFA_set_loc':
table.append(copy.copy(cur_line))
cur_line['pc'] = instr.args[0]
elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
table.append(copy.copy(cur_line))
cur_line['pc'] += instr.args[0] * cie['code_alignment_factor']
elif name == 'DW_CFA_def_cfa':
cur_line['cfa'] = CFARule(
reg=instr.args[0],
offset=instr.args[1])
elif name == 'DW_CFA_def_cfa_sf':
cur_line['cfa'] = CFARule(
reg=instr.args[0],
offset=instr.args[1] * cie['code_alignment_factor'])
elif name == 'DW_CFA_def_cfa_register':
cur_line['cfa'] = CFARule(
reg=instr.args[0],
offset=cur_line['cfa'].offset)
elif name == 'DW_CFA_def_cfa_offset':
cur_line['cfa'] = CFARule(
reg=cur_line['cfa'].reg,
offset=instr.args[0])
elif name == 'DW_CFA_def_cfa_expression':
cur_line['cfa'] = CFARule(expr=instr.args[0])
elif name == 'DW_CFA_undefined':
_add_to_order(instr.args[0])
cur_line[instr.args[0]] = RegisterRule(RegisterRule.UNDEFINED)
elif name == 'DW_CFA_same_value':
_add_to_order(instr.args[0])
cur_line[instr.args[0]] = RegisterRule(RegisterRule.SAME_VALUE)
elif name in ( 'DW_CFA_offset', 'DW_CFA_offset_extended',
'DW_CFA_offset_extended_sf'):
_add_to_order(instr.args[0])
cur_line[instr.args[0]] = RegisterRule(
RegisterRule.OFFSET,
instr.args[1] * cie['data_alignment_factor'])
elif name in ('DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
_add_to_order(instr.args[0])
cur_line[instr.args[0]] = RegisterRule(
RegisterRule.VAL_OFFSET,
instr.args[1] * cie['data_alignment_factor'])
elif name == 'DW_CFA_register':
_add_to_order(instr.args[0])
cur_line[instr.args[0]] = RegisterRule(
RegisterRule.REGISTER,
instr.args[1])
elif name == 'DW_CFA_expression':
_add_to_order(instr.args[0])
cur_line[instr.args[0]] = RegisterRule(
RegisterRule.EXPRESSION,
instr.args[1])
elif name == 'DW_CFA_val_expression':
_add_to_order(instr.args[0])
cur_line[instr.args[0]] = RegisterRule(
RegisterRule.VAL_EXPRESSION,
instr.args[1])
elif name in ('DW_CFA_restore', 'DW_CFA_restore_extended'):
_add_to_order(instr.args[0])
dwarf_assert(
isinstance(self, FDE),
'%s instruction must be in a FDE' % name)
if instr.args[0] in last_line_in_CIE:
cur_line[instr.args[0]] = last_line_in_CIE[instr.args[0]]
else:
cur_line.pop(instr.args[0], None)
elif name == 'DW_CFA_remember_state':
line_stack.append(copy.deepcopy(cur_line))
elif name == 'DW_CFA_restore_state':
pc = cur_line['pc']
cur_line = line_stack.pop()
cur_line['pc'] = pc
# The current line is appended to the table after all instructions
# have ended, if there were instructions.
if cur_line['cfa'].reg is not None or len(cur_line) > 2:
table.append(cur_line)
return DecodedCallFrameTable(table=table, reg_order=reg_order)
# A CIE and FDE have exactly the same functionality, except that a FDE has
# a pointer to its CIE. The functionality was wholly encapsulated in CFIEntry,
# so the CIE and FDE classes exists separately for identification (instead
# of having an explicit "entry_type" field in CFIEntry).
#
class CIE(CFIEntry):
pass
class FDE(CFIEntry):
def __init__(self, header, structs, instructions, offset, augmentation_bytes=None, cie=None, lsda_pointer=None):
super(FDE, self).__init__(header, structs, instructions, offset, augmentation_bytes=augmentation_bytes, cie=cie)
self.lsda_pointer = lsda_pointer
class ZERO(object):
""" End marker for the sequence of CIE/FDE.
This is specific to `.eh_frame` sections: this kind of entry does not exist
in pure DWARF. `readelf` displays these as "ZERO terminator", hence the
class name.
"""
def __init__(self, offset):
self.offset = offset
class RegisterRule(object):
""" Register rules are used to find registers in call frames. Each rule
consists of a type (enumeration following DWARFv3 section 6.4.1)
and an optional argument to augment the type.
"""
UNDEFINED = 'UNDEFINED'
SAME_VALUE = 'SAME_VALUE'
OFFSET = 'OFFSET'
VAL_OFFSET = 'VAL_OFFSET'
REGISTER = 'REGISTER'
EXPRESSION = 'EXPRESSION'
VAL_EXPRESSION = 'VAL_EXPRESSION'
ARCHITECTURAL = 'ARCHITECTURAL'
def __init__(self, type, arg=None):
self.type = type
self.arg = arg
def __repr__(self):
return 'RegisterRule(%s, %s)' % (self.type, self.arg)
class CFARule(object):
""" A CFA rule is used to compute the CFA for each location. It either
consists of a register+offset, or a DWARF expression.
"""
def __init__(self, reg=None, offset=None, expr=None):
self.reg = reg
self.offset = offset
self.expr = expr
def __repr__(self):
return 'CFARule(reg=%s, offset=%s, expr=%s)' % (
self.reg, self.offset, self.expr)
# Represents the decoded CFI for an entry, which is just a large table,
# according to DWARFv3 section 6.4.1
#
# DecodedCallFrameTable is a simple named tuple to group together the table
# and the register appearance order.
#
# table:
#
# A list of dicts that represent "lines" in the decoded table. Each line has
# some special dict entries: 'pc' for the location/program counter (LOC),
# and 'cfa' for the CFARule to locate the CFA on that line.
# The other entries are keyed by register numbers with RegisterRule values,
# and describe the rules for these registers.
#
# reg_order:
#
# A list of register numbers that are described in the table by the order of
# their appearance.
#
DecodedCallFrameTable = namedtuple(
'DecodedCallFrameTable', 'table reg_order')
#---------------- PRIVATE ----------------#
_PRIMARY_MASK = 0b11000000
_PRIMARY_ARG_MASK = 0b00111111
# This dictionary is filled by automatically scanning the constants module
# for DW_CFA_* instructions, and mapping their values to names. Since all
# names were imported from constants with `import *`, we look in globals()
_OPCODE_NAME_MAP = {}
for name in list(iterkeys(globals())):
if name.startswith('DW_CFA'):
_OPCODE_NAME_MAP[globals()[name]] = name

View File

@@ -1,226 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/compileunit.py
#
# DWARF compile unit
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from bisect import bisect_right
from .die import DIE
from ..common.utils import dwarf_assert
class CompileUnit(object):
""" A DWARF compilation unit (CU).
A normal compilation unit typically represents the text and data
contributed to an executable by a single relocatable object file.
It may be derived from several source files,
including pre-processed "include files"
Serves as a container and context to DIEs that describe objects and code
belonging to a compilation unit.
CU header entries can be accessed as dict keys from this object, i.e.
cu = CompileUnit(...)
cu['version'] # version field of the CU header
To get the top-level DIE describing the compilation unit, call the
get_top_DIE method.
"""
def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset):
""" header:
CU header for this compile unit
dwarfinfo:
The DWARFInfo context object which created this one
structs:
A DWARFStructs instance suitable for this compile unit
cu_offset:
Offset in the stream to the beginning of this CU (its header)
cu_die_offset:
Offset in the stream of the top DIE of this CU
"""
self.dwarfinfo = dwarfinfo
self.header = header
self.structs = structs
self.cu_offset = cu_offset
self.cu_die_offset = cu_die_offset
# The abbreviation table for this CU. Filled lazily when DIEs are
# requested.
self._abbrev_table = None
# A list of DIEs belonging to this CU.
# This list is lazily constructed as DIEs are iterated over.
self._dielist = []
# A list of file offsets, corresponding (by index) to the DIEs
# in `self._dielist`. This list exists separately from
# `self._dielist` to make it binary searchable, enabling the
# DIE population strategy used in `iter_DIE_children`.
# Like `self._dielist`, this list is lazily constructed
# as DIEs are iterated over.
self._diemap = []
def dwarf_format(self):
""" Get the DWARF format (32 or 64) for this CU
"""
return self.structs.dwarf_format
def get_abbrev_table(self):
""" Get the abbreviation table (AbbrevTable object) for this CU
"""
if self._abbrev_table is None:
self._abbrev_table = self.dwarfinfo.get_abbrev_table(
self['debug_abbrev_offset'])
return self._abbrev_table
def get_top_DIE(self):
""" Get the top DIE (which is either a DW_TAG_compile_unit or
DW_TAG_partial_unit) of this CU
"""
# Note that a top DIE always has minimal offset and is therefore
# at the beginning of our lists, so no bisect is required.
if len(self._diemap) > 0:
return self._dielist[0]
top = DIE(
cu=self,
stream=self.dwarfinfo.debug_info_sec.stream,
offset=self.cu_die_offset)
self._dielist.insert(0, top)
self._diemap.insert(0, self.cu_die_offset)
return top
@property
def size(self):
return self['unit_length'] + self.structs.initial_length_field_size()
def get_DIE_from_refaddr(self, refaddr):
""" Obtain a DIE contained in this CU from a reference.
refaddr:
The offset into the .debug_info section, which must be
contained in this CU or a DWARFError will be raised.
When using a reference class attribute with a form that is
relative to the compile unit, add unit add the compile unit's
.cu_addr before calling this function.
"""
# All DIEs are after the cu header and within the unit
dwarf_assert(
self.cu_die_offset <= refaddr < self.cu_offset + self.size,
'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset))
return self._get_cached_DIE(refaddr)
def iter_DIEs(self):
""" Iterate over all the DIEs in the CU, in order of their appearance.
Note that null DIEs will also be returned.
"""
return self._iter_DIE_subtree(self.get_top_DIE())
def iter_DIE_children(self, die):
""" Given a DIE, yields either its children, without null DIE list
terminator, or nothing, if that DIE has no children.
The null DIE terminator is saved in that DIE when iteration ended.
"""
if not die.has_children:
return
# `cur_offset` tracks the stream offset of the next DIE to yield
# as we iterate over our children,
cur_offset = die.offset + die.size
while True:
child = self._get_cached_DIE(cur_offset)
child.set_parent(die)
if child.is_null():
die._terminator = child
return
yield child
if not child.has_children:
cur_offset += child.size
elif "DW_AT_sibling" in child.attributes:
sibling = child.attributes["DW_AT_sibling"]
cur_offset = sibling.value + self.cu_offset
else:
# If no DW_AT_sibling attribute is provided by the producer
# then the whole child subtree must be parsed to find its next
# sibling. There is one zero byte representing null DIE
# terminating children list. It is used to locate child subtree
# bounds.
# If children are not parsed yet, this instruction will manage
# to recursive call of this function which will result in
# setting of `_terminator` attribute of the `child`.
if child._terminator is None:
for _ in self.iter_DIE_children(child):
pass
cur_offset = child._terminator.offset + child._terminator.size
#------ PRIVATE ------#
def __getitem__(self, name):
""" Implement dict-like access to header entries
"""
return self.header[name]
def _iter_DIE_subtree(self, die):
""" Given a DIE, this yields it with its subtree including null DIEs
(child list terminators).
"""
yield die
if die.has_children:
for c in die.iter_children():
for d in self._iter_DIE_subtree(c):
yield d
yield die._terminator
def _get_cached_DIE(self, offset):
""" Given a DIE offset, look it up in the cache. If not present,
parse the DIE and insert it into the cache.
offset:
The offset of the DIE in the debug_info section to retrieve.
The stream reference is copied from the top DIE. The top die will
also be parsed and cached if needed.
See also get_DIE_from_refaddr(self, refaddr).
"""
# The top die must be in the cache if any DIE is in the cache.
# The stream is the same for all DIEs in this CU, so populate
# the top DIE and obtain a reference to its stream.
top_die_stream = self.get_top_DIE().stream
# `offset` is the offset in the stream of the DIE we want to return.
# The map is maintined as a parallel array to the list. We call
# bisect each time to ensure new DIEs are inserted in the correct
# order within both `self._dielist` and `self._diemap`.
i = bisect_right(self._diemap, offset)
# Note that `self._diemap` cannot be empty because a the top DIE
# was inserted by the call to .get_top_DIE(). Also it has the minimal
# offset, so the bisect_right insert point will always be at least 1.
if offset == self._diemap[i - 1]:
die = self._dielist[i - 1]
else:
die = DIE(cu=self, stream=top_die_stream, offset=offset)
self._dielist.insert(i, die)
self._diemap.insert(i, offset)
return die

View File

@@ -1,224 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/constants.py
#
# Constants and flags
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
# Inline codes
#
DW_INL_not_inlined = 0
DW_INL_inlined = 1
DW_INL_declared_not_inlined = 2
DW_INL_declared_inlined = 3
# Source languages
#
DW_LANG_C89 = 0x0001
DW_LANG_C = 0x0002
DW_LANG_Ada83 = 0x0003
DW_LANG_C_plus_plus = 0x0004
DW_LANG_Cobol74 = 0x0005
DW_LANG_Cobol85 = 0x0006
DW_LANG_Fortran77 = 0x0007
DW_LANG_Fortran90 = 0x0008
DW_LANG_Pascal83 = 0x0009
DW_LANG_Modula2 = 0x000a
DW_LANG_Java = 0x000b
DW_LANG_C99 = 0x000c
DW_LANG_Ada95 = 0x000d
DW_LANG_Fortran95 = 0x000e
DW_LANG_PLI = 0x000f
DW_LANG_ObjC = 0x0010
DW_LANG_ObjC_plus_plus = 0x0011
DW_LANG_UPC = 0x0012
DW_LANG_D = 0x0013
DW_LANG_Python = 0x0014
DW_LANG_OpenCL = 0x0015
DW_LANG_Go = 0x0016
DW_LANG_Modula3 = 0x0017
DW_LANG_Haskell = 0x0018
DW_LANG_C_plus_plus_03 = 0x0019
DW_LANG_C_plus_plus_11 = 0x001a
DW_LANG_OCaml = 0x001b
DW_LANG_Rust = 0x001c
DW_LANG_C11 = 0x001d
DW_LANG_Swift = 0x001e
DW_LANG_Julia = 0x001f
DW_LANG_Dylan = 0x0020
DW_LANG_C_plus_plus_14 = 0x0021
DW_LANG_Fortran03 = 0x0022
DW_LANG_Fortran08 = 0x0023
DW_LANG_RenderScript = 0x0024
DW_LANG_BLISS = 0x0025
DW_LANG_Mips_Assembler = 0x8001
DW_LANG_Upc = 0x8765
DW_LANG_HP_Bliss = 0x8003
DW_LANG_HP_Basic91 = 0x8004
DW_LANG_HP_Pascal91 = 0x8005
DW_LANG_HP_IMacro = 0x8006
DW_LANG_HP_Assembler = 0x8007
DW_LANG_GOOGLE_RenderScript = 0x8e57
DW_LANG_BORLAND_Delphi = 0xb000
# Encoding
#
DW_ATE_void = 0x0
DW_ATE_address = 0x1
DW_ATE_boolean = 0x2
DW_ATE_complex_float = 0x3
DW_ATE_float = 0x4
DW_ATE_signed = 0x5
DW_ATE_signed_char = 0x6
DW_ATE_unsigned = 0x7
DW_ATE_unsigned_char = 0x8
DW_ATE_imaginary_float = 0x9
DW_ATE_packed_decimal = 0xa
DW_ATE_numeric_string = 0xb
DW_ATE_edited = 0xc
DW_ATE_signed_fixed = 0xd
DW_ATE_unsigned_fixed = 0xe
DW_ATE_decimal_float = 0xf
DW_ATE_UTF = 0x10
DW_ATE_UCS = 0x11
DW_ATE_ASCII = 0x12
DW_ATE_lo_user = 0x80
DW_ATE_hi_user = 0xff
DW_ATE_HP_float80 = 0x80
DW_ATE_HP_complex_float80 = 0x81
DW_ATE_HP_float128 = 0x82
DW_ATE_HP_complex_float128 = 0x83
DW_ATE_HP_floathpintel = 0x84
DW_ATE_HP_imaginary_float80 = 0x85
DW_ATE_HP_imaginary_float128 = 0x86
# Access
#
DW_ACCESS_public = 1
DW_ACCESS_protected = 2
DW_ACCESS_private = 3
# Visibility
#
DW_VIS_local = 1
DW_VIS_exported = 2
DW_VIS_qualified = 3
# Virtuality
#
DW_VIRTUALITY_none = 0
DW_VIRTUALITY_virtual = 1
DW_VIRTUALITY_pure_virtual = 2
# ID case
#
DW_ID_case_sensitive = 0
DW_ID_up_case = 1
DW_ID_down_case = 2
DW_ID_case_insensitive = 3
# Calling convention
#
DW_CC_normal = 0x1
DW_CC_program = 0x2
DW_CC_nocall = 0x3
# Ordering
#
DW_ORD_row_major = 0
DW_ORD_col_major = 1
# Line program opcodes
#
DW_LNS_copy = 0x01
DW_LNS_advance_pc = 0x02
DW_LNS_advance_line = 0x03
DW_LNS_set_file = 0x04
DW_LNS_set_column = 0x05
DW_LNS_negate_stmt = 0x06
DW_LNS_set_basic_block = 0x07
DW_LNS_const_add_pc = 0x08
DW_LNS_fixed_advance_pc = 0x09
DW_LNS_set_prologue_end = 0x0a
DW_LNS_set_epilogue_begin = 0x0b
DW_LNS_set_isa = 0x0c
DW_LNE_end_sequence = 0x01
DW_LNE_set_address = 0x02
DW_LNE_define_file = 0x03
DW_LNE_set_discriminator = 0x04
DW_LNE_lo_user = 0x80
DW_LNE_hi_user = 0xff
# Line program header content types
#
DW_LNCT_path = 0x01
DW_LNCT_directory_index = 0x02
DW_LNCT_timestamp = 0x03
DW_LNCT_size = 0x04
DW_LNCT_MD5 = 0x05
DW_LNCT_lo_user = 0x2000
DW_LNCT_hi_user = 0x3fff
# Call frame instructions
#
# Note that the first 3 instructions have the so-called "primary opcode"
# (as described in DWARFv3 7.23), so only their highest 2 bits take part
# in the opcode decoding. They are kept as constants with the low bits masked
# out, and the callframe module knows how to handle this.
# The other instructions use an "extended opcode" encoded just in the low 6
# bits, with the high 2 bits, so these constants are exactly as they would
# appear in an actual file.
#
DW_CFA_advance_loc = 0b01000000
DW_CFA_offset = 0b10000000
DW_CFA_restore = 0b11000000
DW_CFA_nop = 0x00
DW_CFA_set_loc = 0x01
DW_CFA_advance_loc1 = 0x02
DW_CFA_advance_loc2 = 0x03
DW_CFA_advance_loc4 = 0x04
DW_CFA_offset_extended = 0x05
DW_CFA_restore_extended = 0x06
DW_CFA_undefined = 0x07
DW_CFA_same_value = 0x08
DW_CFA_register = 0x09
DW_CFA_remember_state = 0x0a
DW_CFA_restore_state = 0x0b
DW_CFA_def_cfa = 0x0c
DW_CFA_def_cfa_register = 0x0d
DW_CFA_def_cfa_offset = 0x0e
DW_CFA_def_cfa_expression = 0x0f
DW_CFA_expression = 0x10
DW_CFA_offset_extended_sf = 0x11
DW_CFA_def_cfa_sf = 0x12
DW_CFA_def_cfa_offset_sf = 0x13
DW_CFA_val_offset = 0x14
DW_CFA_val_offset_sf = 0x15
DW_CFA_val_expression = 0x16
DW_CFA_GNU_args_size = 0x2e
# Compilation unit types
#
# DWARFv5 introduces the "unit_type" field to each CU header, allowing
# individual CUs to indicate whether they're complete, partial, and so forth.
# See DWARFv5 3.1 ("Unit Entries") and 7.5.1 ("Unit Headers").
DW_UT_compile = 0x01
DW_UT_type = 0x02
DW_UT_partial = 0x03
DW_UT_skeleton = 0x04
DW_UT_split_compile = 0x05
DW_UT_split_type = 0x06
DW_UT_lo_user = 0x80
DW_UT_hi_user = 0xff

View File

@@ -1,649 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/descriptions.py
#
# Textual descriptions of the various values and enums of DWARF
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import defaultdict
from .constants import *
from .dwarf_expr import DWARFExprParser
from .die import DIE
from ..common.utils import preserve_stream_pos, dwarf_assert
from ..common.py3compat import bytes2str
from .callframe import instruction_name, CIE, FDE
def set_global_machine_arch(machine_arch):
global _MACHINE_ARCH
_MACHINE_ARCH = machine_arch
def describe_attr_value(attr, die, section_offset):
""" Given an attribute attr, return the textual representation of its
value, suitable for tools like readelf.
To cover all cases, this function needs some extra arguments:
die: the DIE this attribute was extracted from
section_offset: offset in the stream of the section the DIE belongs to
"""
descr_func = _ATTR_DESCRIPTION_MAP[attr.form]
val_description = descr_func(attr, die, section_offset)
# For some attributes we can display further information
extra_info_func = _EXTRA_INFO_DESCRIPTION_MAP[attr.name]
extra_info = extra_info_func(attr, die, section_offset)
return str(val_description) + '\t' + extra_info
def describe_CFI_instructions(entry):
""" Given a CFI entry (CIE or FDE), return the textual description of its
instructions.
"""
def _assert_FDE_instruction(instr):
dwarf_assert(
isinstance(entry, FDE),
'Unexpected instruction "%s" for a CIE' % instr)
def _full_reg_name(regnum):
regname = describe_reg_name(regnum, _MACHINE_ARCH, False)
if regname:
return 'r%s (%s)' % (regnum, regname)
else:
return 'r%s' % regnum
if isinstance(entry, CIE):
cie = entry
else: # FDE
cie = entry.cie
pc = entry['initial_location']
s = ''
for instr in entry.instructions:
name = instruction_name(instr.opcode)
if name in ('DW_CFA_offset',
'DW_CFA_offset_extended', 'DW_CFA_offset_extended_sf',
'DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
s += ' %s: %s at cfa%+d\n' % (
name, _full_reg_name(instr.args[0]),
instr.args[1] * cie['data_alignment_factor'])
elif name in ( 'DW_CFA_restore', 'DW_CFA_restore_extended',
'DW_CFA_undefined', 'DW_CFA_same_value',
'DW_CFA_def_cfa_register'):
s += ' %s: %s\n' % (name, _full_reg_name(instr.args[0]))
elif name == 'DW_CFA_register':
s += ' %s: %s in %s' % (
name, _full_reg_name(instr.args[0]),
_full_reg_name(instr.args[1]))
elif name == 'DW_CFA_set_loc':
pc = instr.args[0]
s += ' %s: %08x\n' % (name, pc)
elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
_assert_FDE_instruction(instr)
factored_offset = instr.args[0] * cie['code_alignment_factor']
s += ' %s: %s to %08x\n' % (
name, factored_offset, factored_offset + pc)
pc += factored_offset
elif name in ( 'DW_CFA_remember_state', 'DW_CFA_restore_state',
'DW_CFA_nop'):
s += ' %s\n' % name
elif name == 'DW_CFA_def_cfa':
s += ' %s: %s ofs %s\n' % (
name, _full_reg_name(instr.args[0]), instr.args[1])
elif name == 'DW_CFA_def_cfa_sf':
s += ' %s: %s ofs %s\n' % (
name, _full_reg_name(instr.args[0]),
instr.args[1] * cie['data_alignment_factor'])
elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'):
s += ' %s: %s\n' % (name, instr.args[0])
elif name == 'DW_CFA_def_cfa_expression':
expr_dumper = ExprDumper(entry.structs)
# readelf output is missing a colon for DW_CFA_def_cfa_expression
s += ' %s (%s)\n' % (name, expr_dumper.dump_expr(instr.args[0]))
elif name == 'DW_CFA_expression':
expr_dumper = ExprDumper(entry.structs)
s += ' %s: %s (%s)\n' % (
name, _full_reg_name(instr.args[0]),
expr_dumper.dump_expr(instr.args[1]))
else:
s += ' %s: <??>\n' % name
return s
def describe_CFI_register_rule(rule):
s = _DESCR_CFI_REGISTER_RULE_TYPE[rule.type]
if rule.type in ('OFFSET', 'VAL_OFFSET'):
s += '%+d' % rule.arg
elif rule.type == 'REGISTER':
s += describe_reg_name(rule.arg)
return s
def describe_CFI_CFA_rule(rule):
if rule.expr:
return 'exp'
else:
return '%s%+d' % (describe_reg_name(rule.reg), rule.offset)
def describe_DWARF_expr(expr, structs, cu_offset=None):
""" Textual description of a DWARF expression encoded in 'expr'.
structs should come from the entity encompassing the expression - it's
needed to be able to parse it correctly.
"""
# Since this function can be called a lot, initializing a fresh new
# ExprDumper per call is expensive. So a rudimentary caching scheme is in
# place to create only one such dumper per instance of structs.
cache_key = id(structs)
if cache_key not in _DWARF_EXPR_DUMPER_CACHE:
_DWARF_EXPR_DUMPER_CACHE[cache_key] = \
ExprDumper(structs)
dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key]
return '(' + dwarf_expr_dumper.dump_expr(expr, cu_offset) + ')'
def describe_reg_name(regnum, machine_arch=None, default=True):
""" Provide a textual description for a register name, given its serial
number. The number is expected to be valid.
"""
if machine_arch is None:
machine_arch = _MACHINE_ARCH
if machine_arch == 'x86':
return _REG_NAMES_x86[regnum]
elif machine_arch == 'x64':
return _REG_NAMES_x64[regnum]
elif machine_arch == 'AArch64':
return _REG_NAMES_AArch64[regnum]
elif default:
return 'r%s' % regnum
else:
return None
def describe_form_class(form):
"""For a given form name, determine its value class.
For example, given 'DW_FORM_data1' returns 'constant'.
For some forms, like DW_FORM_indirect and DW_FORM_sec_offset, the class is
not hard-coded and extra information is required. For these, None is
returned.
"""
return _FORM_CLASS[form]
#-------------------------------------------------------------------------------
# The machine architecture. Set globally via set_global_machine_arch
#
_MACHINE_ARCH = None
def _describe_attr_ref(attr, die, section_offset):
return '<0x%x>' % (attr.value + die.cu.cu_offset)
def _describe_attr_value_passthrough(attr, die, section_offset):
return attr.value
def _describe_attr_hex(attr, die, section_offset):
return '0x%x' % (attr.value)
def _describe_attr_hex_addr(attr, die, section_offset):
return '<0x%x>' % (attr.value)
def _describe_attr_split_64bit(attr, die, section_offset):
low_word = attr.value & 0xFFFFFFFF
high_word = (attr.value >> 32) & 0xFFFFFFFF
return '0x%x 0x%x' % (low_word, high_word)
def _describe_attr_strp(attr, die, section_offset):
return '(indirect string, offset: 0x%x): %s' % (
attr.raw_value, bytes2str(attr.value))
def _describe_attr_string(attr, die, section_offset):
return bytes2str(attr.value)
def _describe_attr_debool(attr, die, section_offset):
""" To be consistent with readelf, generate 1 for True flags, 0 for False
flags.
"""
return '1' if attr.value else '0'
def _describe_attr_present(attr, die, section_offset):
""" Some forms may simply mean that an attribute is present,
without providing any value.
"""
return '1'
def _describe_attr_block(attr, die, section_offset):
s = '%s byte block: ' % len(attr.value)
s += ' '.join('%x' % item for item in attr.value) + ' '
return s
_ATTR_DESCRIPTION_MAP = defaultdict(
lambda: _describe_attr_value_passthrough, # default_factory
DW_FORM_ref1=_describe_attr_ref,
DW_FORM_ref2=_describe_attr_ref,
DW_FORM_ref4=_describe_attr_ref,
DW_FORM_ref8=_describe_attr_split_64bit,
DW_FORM_ref_udata=_describe_attr_ref,
DW_FORM_ref_addr=_describe_attr_hex_addr,
DW_FORM_data4=_describe_attr_hex,
DW_FORM_data8=_describe_attr_hex,
DW_FORM_addr=_describe_attr_hex,
DW_FORM_sec_offset=_describe_attr_hex,
DW_FORM_flag=_describe_attr_debool,
DW_FORM_data1=_describe_attr_value_passthrough,
DW_FORM_data2=_describe_attr_value_passthrough,
DW_FORM_sdata=_describe_attr_value_passthrough,
DW_FORM_udata=_describe_attr_value_passthrough,
DW_FORM_string=_describe_attr_string,
DW_FORM_strp=_describe_attr_strp,
DW_FORM_block1=_describe_attr_block,
DW_FORM_block2=_describe_attr_block,
DW_FORM_block4=_describe_attr_block,
DW_FORM_block=_describe_attr_block,
DW_FORM_flag_present=_describe_attr_present,
DW_FORM_exprloc=_describe_attr_block,
DW_FORM_ref_sig8=_describe_attr_ref,
)
_FORM_CLASS = dict(
DW_FORM_addr='address',
DW_FORM_block2='block',
DW_FORM_block4='block',
DW_FORM_data2='constant',
DW_FORM_data4='constant',
DW_FORM_data8='constant',
DW_FORM_string='string',
DW_FORM_block='block',
DW_FORM_block1='block',
DW_FORM_data1='constant',
DW_FORM_flag='flag',
DW_FORM_sdata='constant',
DW_FORM_strp='string',
DW_FORM_udata='constant',
DW_FORM_ref_addr='reference',
DW_FORM_ref1='reference',
DW_FORM_ref2='reference',
DW_FORM_ref4='reference',
DW_FORM_ref8='reference',
DW_FORM_ref_udata='reference',
DW_FORM_indirect=None,
DW_FORM_sec_offset=None,
DW_FORM_exprloc='exprloc',
DW_FORM_flag_present='flag',
DW_FORM_ref_sig8='reference',
)
_DESCR_DW_INL = {
DW_INL_not_inlined: '(not inlined)',
DW_INL_inlined: '(inlined)',
DW_INL_declared_not_inlined: '(declared as inline but ignored)',
DW_INL_declared_inlined: '(declared as inline and inlined)',
}
_DESCR_DW_LANG = {
DW_LANG_C89: '(ANSI C)',
DW_LANG_C: '(non-ANSI C)',
DW_LANG_Ada83: '(Ada)',
DW_LANG_C_plus_plus: '(C++)',
DW_LANG_Cobol74: '(Cobol 74)',
DW_LANG_Cobol85: '(Cobol 85)',
DW_LANG_Fortran77: '(FORTRAN 77)',
DW_LANG_Fortran90: '(Fortran 90)',
DW_LANG_Pascal83: '(ANSI Pascal)',
DW_LANG_Modula2: '(Modula 2)',
DW_LANG_Java: '(Java)',
DW_LANG_C99: '(ANSI C99)',
DW_LANG_Ada95: '(ADA 95)',
DW_LANG_Fortran95: '(Fortran 95)',
DW_LANG_PLI: '(PLI)',
DW_LANG_ObjC: '(Objective C)',
DW_LANG_ObjC_plus_plus: '(Objective C++)',
DW_LANG_UPC: '(Unified Parallel C)',
DW_LANG_D: '(D)',
DW_LANG_Python: '(Python)',
DW_LANG_Mips_Assembler: '(MIPS assembler)',
DW_LANG_HP_Bliss: '(HP Bliss)',
DW_LANG_HP_Basic91: '(HP Basic 91)',
DW_LANG_HP_Pascal91: '(HP Pascal 91)',
DW_LANG_HP_IMacro: '(HP IMacro)',
DW_LANG_HP_Assembler: '(HP assembler)',
}
_DESCR_DW_ATE = {
DW_ATE_void: '(void)',
DW_ATE_address: '(machine address)',
DW_ATE_boolean: '(boolean)',
DW_ATE_complex_float: '(complex float)',
DW_ATE_float: '(float)',
DW_ATE_signed: '(signed)',
DW_ATE_signed_char: '(signed char)',
DW_ATE_unsigned: '(unsigned)',
DW_ATE_unsigned_char: '(unsigned char)',
DW_ATE_imaginary_float: '(imaginary float)',
DW_ATE_decimal_float: '(decimal float)',
DW_ATE_packed_decimal: '(packed_decimal)',
DW_ATE_numeric_string: '(numeric_string)',
DW_ATE_edited: '(edited)',
DW_ATE_signed_fixed: '(signed_fixed)',
DW_ATE_unsigned_fixed: '(unsigned_fixed)',
DW_ATE_UTF: '(unicode string)',
DW_ATE_HP_float80: '(HP_float80)',
DW_ATE_HP_complex_float80: '(HP_complex_float80)',
DW_ATE_HP_float128: '(HP_float128)',
DW_ATE_HP_complex_float128: '(HP_complex_float128)',
DW_ATE_HP_floathpintel: '(HP_floathpintel)',
DW_ATE_HP_imaginary_float80: '(HP_imaginary_float80)',
DW_ATE_HP_imaginary_float128: '(HP_imaginary_float128)',
}
_DESCR_DW_ACCESS = {
DW_ACCESS_public: '(public)',
DW_ACCESS_protected: '(protected)',
DW_ACCESS_private: '(private)',
}
_DESCR_DW_VIS = {
DW_VIS_local: '(local)',
DW_VIS_exported: '(exported)',
DW_VIS_qualified: '(qualified)',
}
_DESCR_DW_VIRTUALITY = {
DW_VIRTUALITY_none: '(none)',
DW_VIRTUALITY_virtual: '(virtual)',
DW_VIRTUALITY_pure_virtual: '(pure virtual)',
}
_DESCR_DW_ID_CASE = {
DW_ID_case_sensitive: '(case_sensitive)',
DW_ID_up_case: '(up_case)',
DW_ID_down_case: '(down_case)',
DW_ID_case_insensitive: '(case_insensitive)',
}
_DESCR_DW_CC = {
DW_CC_normal: '(normal)',
DW_CC_program: '(program)',
DW_CC_nocall: '(nocall)',
}
_DESCR_DW_ORD = {
DW_ORD_row_major: '(row major)',
DW_ORD_col_major: '(column major)',
}
_DESCR_CFI_REGISTER_RULE_TYPE = dict(
UNDEFINED='u',
SAME_VALUE='s',
OFFSET='c',
VAL_OFFSET='v',
REGISTER='',
EXPRESSION='exp',
VAL_EXPRESSION='vexp',
ARCHITECTURAL='a',
)
def _make_extra_mapper(mapping, default, default_interpolate_value=False):
""" Create a mapping function from attribute parameters to an extra
value that should be displayed.
"""
def mapper(attr, die, section_offset):
if default_interpolate_value:
d = default % attr.value
else:
d = default
return mapping.get(attr.value, d)
return mapper
def _make_extra_string(s=''):
""" Create an extra function that just returns a constant string.
"""
def extra(attr, die, section_offset):
return s
return extra
_DWARF_EXPR_DUMPER_CACHE = {}
def _location_list_extra(attr, die, section_offset):
# According to section 2.6 of the DWARF spec v3, class loclistptr means
# a location list, and class block means a location expression.
# DW_FORM_sec_offset is new in DWARFv4 as a section offset.
if attr.form in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'):
return '(location list)'
else:
return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset)
def _data_member_location_extra(attr, die, section_offset):
# According to section 5.5.6 of the DWARF spec v4, a data member location
# can be an integer offset, or a location description.
#
if attr.form in ('DW_FORM_data1', 'DW_FORM_data2',
'DW_FORM_data4', 'DW_FORM_data8'):
return '' # No extra description needed
elif attr.form == 'DW_FORM_sdata':
return str(attr.value)
else:
return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset)
def _import_extra(attr, die, section_offset):
# For DW_AT_import the value points to a DIE (that can be either in the
# current DIE's CU or in another CU, depending on the FORM). The extra
# information for it is the abbreviation number in this DIE and its tag.
if attr.form == 'DW_FORM_ref_addr':
# Absolute offset value
ref_die_offset = section_offset + attr.value
else:
# Relative offset to the current DIE's CU
ref_die_offset = attr.value + die.cu.cu_offset
# Now find the CU this DIE belongs to (since we have to find its abbrev
# table). This is done by linearly scanning through all CUs, looking for
# one spanning an address space containing the referred DIE's offset.
for cu in die.dwarfinfo.iter_CUs():
if cu['unit_length'] + cu.cu_offset > ref_die_offset >= cu.cu_offset:
# Once we have the CU, we can actually parse this DIE from the
# stream.
with preserve_stream_pos(die.stream):
ref_die = DIE(cu, die.stream, ref_die_offset)
#print '&&& ref_die', ref_die
return '[Abbrev Number: %s (%s)]' % (
ref_die.abbrev_code, ref_die.tag)
return '[unknown]'
_EXTRA_INFO_DESCRIPTION_MAP = defaultdict(
lambda: _make_extra_string(''), # default_factory
DW_AT_inline=_make_extra_mapper(
_DESCR_DW_INL, '(Unknown inline attribute value: %x',
default_interpolate_value=True),
DW_AT_language=_make_extra_mapper(
_DESCR_DW_LANG, '(Unknown: %x)', default_interpolate_value=True),
DW_AT_encoding=_make_extra_mapper(_DESCR_DW_ATE, '(unknown type)'),
DW_AT_accessibility=_make_extra_mapper(
_DESCR_DW_ACCESS, '(unknown accessibility)'),
DW_AT_visibility=_make_extra_mapper(
_DESCR_DW_VIS, '(unknown visibility)'),
DW_AT_virtuality=_make_extra_mapper(
_DESCR_DW_VIRTUALITY, '(unknown virtuality)'),
DW_AT_identifier_case=_make_extra_mapper(
_DESCR_DW_ID_CASE, '(unknown case)'),
DW_AT_calling_convention=_make_extra_mapper(
_DESCR_DW_CC, '(unknown convention)'),
DW_AT_ordering=_make_extra_mapper(
_DESCR_DW_ORD, '(undefined)'),
DW_AT_frame_base=_location_list_extra,
DW_AT_location=_location_list_extra,
DW_AT_string_length=_location_list_extra,
DW_AT_return_addr=_location_list_extra,
DW_AT_data_member_location=_data_member_location_extra,
DW_AT_vtable_elem_location=_location_list_extra,
DW_AT_segment=_location_list_extra,
DW_AT_static_link=_location_list_extra,
DW_AT_use_location=_location_list_extra,
DW_AT_allocated=_location_list_extra,
DW_AT_associated=_location_list_extra,
DW_AT_data_location=_location_list_extra,
DW_AT_stride=_location_list_extra,
DW_AT_import=_import_extra,
DW_AT_GNU_call_site_value=_location_list_extra,
DW_AT_GNU_call_site_data_value=_location_list_extra,
DW_AT_GNU_call_site_target=_location_list_extra,
DW_AT_GNU_call_site_target_clobbered=_location_list_extra,
)
# 8 in a line, for easier counting
_REG_NAMES_x86 = [
'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi',
'eip', 'eflags', '<none>', 'st0', 'st1', 'st2', 'st3', 'st4',
'st5', 'st6', 'st7', '<none>', '<none>', 'xmm0', 'xmm1', 'xmm2',
'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'mm0', 'mm1', 'mm2',
'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'fcw', 'fsw', 'mxcsr',
'es', 'cs', 'ss', 'ds', 'fs', 'gs', '<none>', '<none>', 'tr', 'ldtr'
]
_REG_NAMES_x64 = [
'rax', 'rdx', 'rcx', 'rbx', 'rsi', 'rdi', 'rbp', 'rsp',
'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15',
'rip', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6',
'xmm7', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14',
'xmm15', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6',
'st7', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
'mm7', 'rflags', 'es', 'cs', 'ss', 'ds', 'fs', 'gs',
'<none>', '<none>', 'fs.base', 'gs.base', '<none>', '<none>', 'tr', 'ldtr',
'mxcsr', 'fcw', 'fsw'
]
# https://developer.arm.com/documentation/ihi0057/e/?lang=en#dwarf-register-names
_REG_NAMES_AArch64 = [
'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7',
'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15',
'x16', 'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x23',
'x24', 'x25', 'x26', 'x27', 'x28', 'x29', 'x30', 'sp',
'<none>', 'ELR_mode', 'RA_SIGN_STATE', '<none>', '<none>', '<none>', '<none>', '<none>',
'<none>', '<none>', '<none>', '<none>', '<none>', '<none>', 'VG', 'FFR',
'p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7',
'p8', 'p9', 'p10', 'p11', 'p12', 'p13', 'p14', 'p15',
'v0', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7',
'v8', 'v9', 'v10', 'v11', 'v12', 'v13', 'v14', 'v15',
'v16', 'v17', 'v18', 'v19', 'v20', 'v21', 'v22', 'v23',
'v24', 'v25', 'v26', 'v27', 'v28', 'v29', 'v30', 'v31',
'z0', 'z1', 'z2', 'z3', 'z4', 'z5', 'z6', 'z7',
'z8', 'z9', 'z10', 'z11', 'z12', 'z13', 'z14', 'z15',
'z16', 'z17', 'z18', 'z19', 'z20', 'z21', 'z22', 'z23',
'z24', 'z25', 'z26', 'z27', 'z28', 'z29', 'z30', 'z31'
]
class ExprDumper(object):
""" A dumper for DWARF expressions that dumps a textual
representation of the complete expression.
Usage: after creation, call dump_expr repeatedly - it's stateless.
"""
def __init__(self, structs):
self.structs = structs
self.expr_parser = DWARFExprParser(self.structs)
self._init_lookups()
def dump_expr(self, expr, cu_offset=None):
""" Parse and dump a DWARF expression. expr should be a list of
(integer) byte values. cu_offset is the cu_offset
value from the CU object where the expression resides.
Only affects a handful of GNU opcodes, if None is provided,
that's not a crash condition, only the expression dump will
not be consistent of that of readelf.
Returns a string representing the expression.
"""
parsed = self.expr_parser.parse_expr(expr)
s = []
for deo in parsed:
s.append(self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset))
return '; '.join(s)
def _init_lookups(self):
self._ops_with_decimal_arg = set([
'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s',
'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_const8u', 'DW_OP_const8s',
'DW_OP_constu', 'DW_OP_consts', 'DW_OP_pick', 'DW_OP_plus_uconst',
'DW_OP_bra', 'DW_OP_skip', 'DW_OP_fbreg', 'DW_OP_piece',
'DW_OP_deref_size', 'DW_OP_xderef_size', 'DW_OP_regx',])
for n in range(0, 32):
self._ops_with_decimal_arg.add('DW_OP_breg%s' % n)
self._ops_with_two_decimal_args = set(['DW_OP_bregx', 'DW_OP_bit_piece'])
self._ops_with_hex_arg = set(
['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref'])
def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None):
# Some GNU ops contain an offset from the current CU as an argument,
# but readelf emits those ops with offset from the info section
# so we need the base offset of the parent CU.
# If omitted, arguments on some GNU opcodes will be off.
if cu_offset is None:
cu_offset = 0
if len(args) == 0:
if opcode_name.startswith('DW_OP_reg'):
regnum = int(opcode_name[9:])
return '%s (%s)' % (
opcode_name,
describe_reg_name(regnum, _MACHINE_ARCH))
else:
return opcode_name
elif opcode_name in self._ops_with_decimal_arg:
if opcode_name.startswith('DW_OP_breg'):
regnum = int(opcode_name[10:])
return '%s (%s): %s' % (
opcode_name,
describe_reg_name(regnum, _MACHINE_ARCH),
args[0])
elif opcode_name.endswith('regx'):
# applies to both regx and bregx
return '%s: %s (%s)' % (
opcode_name,
args[0],
describe_reg_name(args[0], _MACHINE_ARCH))
else:
return '%s: %s' % (opcode_name, args[0])
elif opcode_name in self._ops_with_hex_arg:
return '%s: %x' % (opcode_name, args[0])
elif opcode_name in self._ops_with_two_decimal_args:
return '%s: %s %s' % (opcode_name, args[0], args[1])
elif opcode_name == 'DW_OP_GNU_entry_value':
return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]]))
elif opcode_name == 'DW_OP_implicit_value':
return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]]))
elif opcode_name == 'DW_OP_GNU_parameter_ref':
return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset)
elif opcode_name == 'DW_OP_GNU_implicit_pointer':
return "%s: <0x%x> %d" % (opcode_name, args[0], args[1])
elif opcode_name == 'DW_OP_GNU_convert':
return "%s <0x%x>" % (opcode_name, args[0] + cu_offset)
elif opcode_name == 'DW_OP_GNU_deref_type':
return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset)
elif opcode_name == 'DW_OP_GNU_const_type':
return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1]))
elif opcode_name == 'DW_OP_GNU_regval_type':
return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset)
else:
return '<unknown %s>' % opcode_name

View File

@@ -1,279 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/die.py
#
# DWARF Debugging Information Entry
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import namedtuple, OrderedDict
import os
from ..common.exceptions import DWARFError
from ..common.py3compat import bytes2str, iteritems
from ..common.utils import struct_parse, preserve_stream_pos
from .enums import DW_FORM_raw2name
# AttributeValue - describes an attribute value in the DIE:
#
# name:
# The name (DW_AT_*) of this attribute
#
# form:
# The DW_FORM_* name of this attribute
#
# value:
# The value parsed from the section and translated accordingly to the form
# (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
#
# raw_value:
# Raw value as parsed from the section - used for debugging and presentation
# (e.g. for a DW_FORM_strp it's the raw string offset into the table)
#
# offset:
# Offset of this attribute's value in the stream (absolute offset, relative
# the beginning of the whole stream)
#
AttributeValue = namedtuple(
'AttributeValue', 'name form value raw_value offset')
class DIE(object):
""" A DWARF debugging information entry. On creation, parses itself from
the stream. Each DIE is held by a CU.
Accessible attributes:
tag:
The DIE tag
size:
The size this DIE occupies in the section
offset:
The offset of this DIE in the stream
attributes:
An ordered dictionary mapping attribute names to values. It's
ordered to preserve the order of attributes in the section
has_children:
Specifies whether this DIE has children
abbrev_code:
The abbreviation code pointing to an abbreviation entry (note
that this is for informational pusposes only - this object
interacts with its abbreviation table transparently).
See also the public methods.
"""
def __init__(self, cu, stream, offset):
""" cu:
CompileUnit object this DIE belongs to. Used to obtain context
information (structs, abbrev table, etc.)
stream, offset:
The stream and offset into it where this DIE's data is located
"""
self.cu = cu
self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
self.stream = stream
self.offset = offset
self.attributes = OrderedDict()
self.tag = None
self.has_children = None
self.abbrev_code = None
self.size = 0
# Null DIE terminator. It can be used to obtain offset range occupied
# by this DIE including its whole subtree.
self._terminator = None
self._parent = None
self._parse_DIE()
def is_null(self):
""" Is this a null entry?
"""
return self.tag is None
def get_DIE_from_attribute(self, name):
""" Return the DIE referenced by the named attribute of this DIE.
The attribute must be in the reference attribute class.
name:
The name of the attribute in the reference class.
"""
attr = self.attributes[name]
if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
'DW_FORM_ref8', 'DW_FORM_ref'):
refaddr = self.cu.cu_offset + attr.raw_value
return self.cu.get_DIE_from_refaddr(refaddr)
elif attr.form in ('DW_FORM_ref_addr'):
return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
elif attr.form in ('DW_FORM_ref_sig8'):
# Implement search type units for matching signature
raise NotImplementedError('%s (type unit by signature)' % attr.form)
elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'):
raise NotImplementedError('%s to dwo' % attr.form)
else:
raise DWARFError('%s is not a reference class form attribute' % attr)
def get_parent(self):
""" Return the parent DIE of this DIE, or None if the DIE has no
parent (i.e. is a top-level DIE).
"""
if self._parent is None:
self._search_ancestor_offspring()
return self._parent
def get_full_path(self):
""" Return the full path filename for the DIE.
The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
either of which may be missing in practice. Note that its value is
usually a string taken from the .debug_string section and the
returned value will be a string.
"""
comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
fname_attr = self.attributes.get('DW_AT_name', None)
fname = bytes2str(fname_attr.value) if fname_attr else ''
return os.path.join(comp_dir, fname)
def iter_children(self):
""" Iterates all children of this DIE
"""
return self.cu.iter_DIE_children(self)
def iter_siblings(self):
""" Yield all siblings of this DIE
"""
parent = self.get_parent()
if parent:
for sibling in parent.iter_children():
if sibling is not self:
yield sibling
else:
raise StopIteration()
# The following methods are used while creating the DIE and should not be
# interesting to consumers
#
def set_parent(self, die):
self._parent = die
#------ PRIVATE ------#
def _search_ancestor_offspring(self):
""" Search our ancestors identifying their offspring to find our parent.
DIEs are stored as a flattened tree. The top DIE is the ancestor
of all DIEs in the unit. Each parent is guaranteed to be at
an offset less than their children. In each generation of children
the sibling with the closest offset not greater than our offset is
our ancestor.
"""
# This code is called when get_parent notices that the _parent has
# not been identified. To avoid execution for each sibling record all
# the children of any parent iterated. Assuming get_parent will also be
# called for siblings, it is more efficient if siblings references are
# provided and no worse than a single walk if they are missing, while
# stopping iteration early could result in O(n^2) walks.
search = self.cu.get_top_DIE()
while search.offset < self.offset:
prev = search
for child in search.iter_children():
child.set_parent(search)
if child.offset <= self.offset:
prev = child
# We also need to check the offset of the terminator DIE
if search.has_children and search._terminator.offset <= self.offset:
prev = search._terminator
# If we didn't find a closer parent, give up, don't loop.
# Either we mis-parsed an ancestor or someone created a DIE
# by an offset that was not actually the start of a DIE.
if prev is search:
raise ValueError("offset %s not in CU %s DIE tree" %
(self.offset, self.cu.cu_offset))
search = prev
def __repr__(self):
s = 'DIE %s, size=%s, has_children=%s\n' % (
self.tag, self.size, self.has_children)
for attrname, attrval in iteritems(self.attributes):
s += ' |%-18s: %s\n' % (attrname, attrval)
return s
def __str__(self):
return self.__repr__()
def _parse_DIE(self):
""" Parses the DIE info from the section, based on the abbreviation
table of the CU
"""
structs = self.cu.structs
# A DIE begins with the abbreviation code. Read it and use it to
# obtain the abbrev declaration for this DIE.
# Note: here and elsewhere, preserve_stream_pos is used on operations
# that manipulate the stream by reading data from it.
self.abbrev_code = struct_parse(
structs.Dwarf_uleb128(''), self.stream, self.offset)
# This may be a null entry
if self.abbrev_code == 0:
self.size = self.stream.tell() - self.offset
return
abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
self.tag = abbrev_decl['tag']
self.has_children = abbrev_decl.has_children()
# Guided by the attributes listed in the abbreviation declaration, parse
# values from the stream.
for name, form in abbrev_decl.iter_attr_specs():
attr_offset = self.stream.tell()
raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
value = self._translate_attr_value(form, raw_value)
self.attributes[name] = AttributeValue(
name=name,
form=form,
value=value,
raw_value=raw_value,
offset=attr_offset)
self.size = self.stream.tell() - self.offset
def _translate_attr_value(self, form, raw_value):
""" Translate a raw attr value according to the form
"""
value = None
if form == 'DW_FORM_strp':
with preserve_stream_pos(self.stream):
value = self.dwarfinfo.get_string_from_table(raw_value)
elif form == 'DW_FORM_flag':
value = not raw_value == 0
elif form == 'DW_FORM_flag_present':
value = True
elif form == 'DW_FORM_indirect':
try:
form = DW_FORM_raw2name[raw_value]
except KeyError as err:
raise DWARFError(
'Found DW_FORM_indirect with unknown raw_value=' +
str(raw_value))
raw_value = struct_parse(
self.cu.structs.Dwarf_dw_form[form], self.stream)
# Let's hope this doesn't get too deep :-)
return self._translate_attr_value(form, raw_value)
else:
value = raw_value
return value

View File

@@ -1,257 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/dwarf_expr.py
#
# Decoding DWARF expressions
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import namedtuple
from ..common.py3compat import BytesIO, iteritems
from ..common.utils import struct_parse, bytelist2string, read_blob
# DWARF expression opcodes. name -> opcode mapping
DW_OP_name2opcode = dict(
DW_OP_addr=0x03,
DW_OP_deref=0x06,
DW_OP_const1u=0x08,
DW_OP_const1s=0x09,
DW_OP_const2u=0x0a,
DW_OP_const2s=0x0b,
DW_OP_const4u=0x0c,
DW_OP_const4s=0x0d,
DW_OP_const8u=0x0e,
DW_OP_const8s=0x0f,
DW_OP_constu=0x10,
DW_OP_consts=0x11,
DW_OP_dup=0x12,
DW_OP_drop=0x13,
DW_OP_over=0x14,
DW_OP_pick=0x15,
DW_OP_swap=0x16,
DW_OP_rot=0x17,
DW_OP_xderef=0x18,
DW_OP_abs=0x19,
DW_OP_and=0x1a,
DW_OP_div=0x1b,
DW_OP_minus=0x1c,
DW_OP_mod=0x1d,
DW_OP_mul=0x1e,
DW_OP_neg=0x1f,
DW_OP_not=0x20,
DW_OP_or=0x21,
DW_OP_plus=0x22,
DW_OP_plus_uconst=0x23,
DW_OP_shl=0x24,
DW_OP_shr=0x25,
DW_OP_shra=0x26,
DW_OP_xor=0x27,
DW_OP_bra=0x28,
DW_OP_eq=0x29,
DW_OP_ge=0x2a,
DW_OP_gt=0x2b,
DW_OP_le=0x2c,
DW_OP_lt=0x2d,
DW_OP_ne=0x2e,
DW_OP_skip=0x2f,
DW_OP_regx=0x90,
DW_OP_fbreg=0x91,
DW_OP_bregx=0x92,
DW_OP_piece=0x93,
DW_OP_deref_size=0x94,
DW_OP_xderef_size=0x95,
DW_OP_nop=0x96,
DW_OP_push_object_address=0x97,
DW_OP_call2=0x98,
DW_OP_call4=0x99,
DW_OP_call_ref=0x9a,
DW_OP_form_tls_address=0x9b,
DW_OP_call_frame_cfa=0x9c,
DW_OP_bit_piece=0x9d,
DW_OP_implicit_value=0x9e,
DW_OP_stack_value=0x9f,
DW_OP_implicit_pointer=0xa0,
DW_OP_addrx=0xa1,
DW_OP_constx=0xa2,
DW_OP_entry_value=0xa3,
DW_OP_const_type=0xa4,
DW_OP_regval_type=0xa5,
DW_OP_deref_type=0xa6,
DW_OP_xderef_type=0xa7,
DW_OP_convert=0xa8,
DW_OP_reinterpret=0xa9,
DW_OP_lo_user=0xe0,
DW_OP_GNU_push_tls_address=0xe0,
DW_OP_GNU_implicit_pointer=0xf2,
DW_OP_GNU_entry_value=0xf3,
DW_OP_GNU_const_type=0xf4,
DW_OP_GNU_regval_type=0xf5,
DW_OP_GNU_deref_type=0xf6,
DW_OP_GNU_convert=0xf7,
DW_OP_GNU_parameter_ref=0xfa,
DW_OP_hi_user=0xff,
)
def _generate_dynamic_values(map, prefix, index_start, index_end, value_start):
""" Generate values in a map (dict) dynamically. Each key starts with
a (string) prefix, followed by an index in the inclusive range
[index_start, index_end]. The values start at value_start.
"""
for index in range(index_start, index_end + 1):
name = '%s%s' % (prefix, index)
value = value_start + index - index_start
map[name] = value
_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_lit', 0, 31, 0x30)
_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_reg', 0, 31, 0x50)
_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_breg', 0, 31, 0x70)
# opcode -> name mapping
DW_OP_opcode2name = dict((v, k) for k, v in iteritems(DW_OP_name2opcode))
# Each parsed DWARF expression is returned as this type with its numeric opcode,
# op name (as a string) and a list of arguments.
DWARFExprOp = namedtuple('DWARFExprOp', 'op op_name args')
class DWARFExprParser(object):
"""DWARF expression parser.
When initialized, requires structs to cache a dispatch table. After that,
parse_expr can be called repeatedly - it's stateless.
"""
def __init__(self, structs):
self._dispatch_table = _init_dispatch_table(structs)
def parse_expr(self, expr):
""" Parses expr (a list of integers) into a list of DWARFExprOp.
The list can potentially be nested.
"""
stream = BytesIO(bytelist2string(expr))
parsed = []
while True:
# Get the next opcode from the stream. If nothing is left in the
# stream, we're done.
byte = stream.read(1)
if len(byte) == 0:
break
# Decode the opcode and its name.
op = ord(byte)
op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op)
# Use dispatch table to parse args.
arg_parser = self._dispatch_table[op]
args = arg_parser(stream)
parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args))
return parsed
def _init_dispatch_table(structs):
"""Creates a dispatch table for parsing args of an op.
Returns a dict mapping opcode to a function. The function accepts a stream
and return a list of parsed arguments for the opcode from the stream;
the stream is advanced by the function as needed.
"""
table = {}
def add(opcode_name, func):
table[DW_OP_name2opcode[opcode_name]] = func
def parse_noargs():
return lambda stream: []
def parse_op_addr():
return lambda stream: [struct_parse(structs.Dwarf_target_addr(''),
stream)]
def parse_arg_struct(arg_struct):
return lambda stream: [struct_parse(arg_struct, stream)]
def parse_arg_struct2(arg1_struct, arg2_struct):
return lambda stream: [struct_parse(arg1_struct, stream),
struct_parse(arg2_struct, stream)]
# ULEB128, then an expression of that length
def parse_nestedexpr():
def parse(stream):
size = struct_parse(structs.Dwarf_uleb128(''), stream)
nested_expr_blob = read_blob(stream, size)
return [DWARFExprParser(structs).parse_expr(nested_expr_blob)]
return parse
# ULEB128, then a blob of that size
def parse_blob():
return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128(''), stream))]
# ULEB128 with datatype DIE offset, then byte, then a blob of that size
def parse_typedblob():
return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))]
add('DW_OP_addr', parse_op_addr())
add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128('')))
add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8('')))
add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8('')))
add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16('')))
add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16('')))
add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32('')))
add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32('')))
add('DW_OP_const8u', parse_arg_struct(structs.Dwarf_uint64('')))
add('DW_OP_const8s', parse_arg_struct(structs.Dwarf_int64('')))
add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128('')))
add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128('')))
add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8('')))
add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128('')))
add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16('')))
add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16('')))
for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over',
'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef',
'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus',
'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not',
'DW_OP_or', 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr',
'DW_OP_shra', 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge',
'DW_OP_gt', 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop',
'DW_OP_push_object_address', 'DW_OP_form_tls_address',
'DW_OP_call_frame_cfa', 'DW_OP_stack_value',
'DW_OP_GNU_push_tls_address']:
add(opname, parse_noargs())
for n in range(0, 32):
add('DW_OP_lit%s' % n, parse_noargs())
add('DW_OP_reg%s' % n, parse_noargs())
add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128('')))
add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128('')))
add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128('')))
add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128(''),
structs.Dwarf_sleb128('')))
add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128('')))
add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128(''),
structs.Dwarf_uleb128('')))
add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8('')))
add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8('')))
add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16('')))
add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32('')))
add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset('')))
add('DW_OP_implicit_value', parse_blob())
add('DW_OP_GNU_entry_value', parse_nestedexpr())
add('DW_OP_GNU_const_type', parse_typedblob())
add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''),
structs.Dwarf_uleb128('')))
add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''),
structs.Dwarf_uleb128('')))
add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''),
structs.Dwarf_sleb128('')))
add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset('')))
add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128('')))
return table

View File

@@ -1,460 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/dwarfinfo.py
#
# DWARFInfo - Main class for accessing DWARF debug information
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import namedtuple
from bisect import bisect_right
from ..common.exceptions import DWARFError
from ..common.utils import (struct_parse, dwarf_assert,
parse_cstring_from_stream)
from .structs import DWARFStructs
from .compileunit import CompileUnit
from .abbrevtable import AbbrevTable
from .lineprogram import LineProgram
from .callframe import CallFrameInfo
from .locationlists import LocationLists
from .ranges import RangeLists
from .aranges import ARanges
from .namelut import NameLUT
# Describes a debug section
#
# stream: a stream object containing the data of this section
# name: section name in the container file
# global_offset: the global offset of the section in its container file
# size: the size of the section's data, in bytes
# address: the virtual address for the section's data
#
# 'name' and 'global_offset' are for descriptional purposes only and
# aren't strictly required for the DWARF parsing to work. 'address' is required
# to properly decode the special '.eh_frame' format.
#
DebugSectionDescriptor = namedtuple('DebugSectionDescriptor',
'stream name global_offset size address')
# Some configuration parameters for the DWARF reader. This exists to allow
# DWARFInfo to be independent from any specific file format/container.
#
# little_endian:
# boolean flag specifying whether the data in the file is little endian
#
# machine_arch:
# Machine architecture as a string. For example 'x86' or 'x64'
#
# default_address_size:
# The default address size for the container file (sizeof pointer, in bytes)
#
DwarfConfig = namedtuple('DwarfConfig',
'little_endian machine_arch default_address_size')
class DWARFInfo(object):
""" Acts also as a "context" to other major objects, bridging between
various parts of the debug infromation.
"""
def __init__(self,
config,
debug_info_sec,
debug_aranges_sec,
debug_abbrev_sec,
debug_frame_sec,
eh_frame_sec,
debug_str_sec,
debug_loc_sec,
debug_ranges_sec,
debug_line_sec,
debug_pubtypes_sec,
debug_pubnames_sec,
debug_addr_sec,
debug_str_offsets_sec):
""" config:
A DwarfConfig object
debug_*_sec:
DebugSectionDescriptor for a section. Pass None for sections
that don't exist. These arguments are best given with
keyword syntax.
"""
self.config = config
self.debug_info_sec = debug_info_sec
self.debug_aranges_sec = debug_aranges_sec
self.debug_abbrev_sec = debug_abbrev_sec
self.debug_frame_sec = debug_frame_sec
self.eh_frame_sec = eh_frame_sec
self.debug_str_sec = debug_str_sec
self.debug_loc_sec = debug_loc_sec
self.debug_ranges_sec = debug_ranges_sec
self.debug_line_sec = debug_line_sec
self.debug_pubtypes_sec = debug_pubtypes_sec
self.debug_pubnames_sec = debug_pubnames_sec
# This is the DWARFStructs the context uses, so it doesn't depend on
# DWARF format and address_size (these are determined per CU) - set them
# to default values.
self.structs = DWARFStructs(
little_endian=self.config.little_endian,
dwarf_format=32,
address_size=self.config.default_address_size)
# Cache for abbrev tables: a dict keyed by offset
self._abbrevtable_cache = {}
# Cache of compile units and map of their offsets for bisect lookup.
# Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at().
self._cu_cache = []
self._cu_offsets_map = []
@property
def has_debug_info(self):
""" Return whether this contains debug information.
It can be not the case when the ELF only contains .eh_frame, which is
encoded DWARF but not actually for debugging.
"""
return bool(self.debug_info_sec)
def get_DIE_from_lut_entry(self, lut_entry):
""" Get the DIE from the pubnames or putbtypes lookup table entry.
lut_entry:
A NameLUTEntry object from a NameLUT instance (see
.get_pubmames and .get_pubtypes methods).
"""
cu = self.get_CU_at(lut_entry.cu_ofs)
return self.get_DIE_from_refaddr(lut_entry.die_ofs, cu)
def get_DIE_from_refaddr(self, refaddr, cu=None):
""" Given a .debug_info section offset of a DIE, return the DIE.
refaddr:
The refaddr may come from a DW_FORM_ref_addr attribute.
cu:
The compile unit object, if known. If None a search
from the closest offset less than refaddr will be performed.
"""
if cu is None:
cu = self.get_CU_containing(refaddr)
return cu.get_DIE_from_refaddr(refaddr)
def get_CU_containing(self, refaddr):
""" Find the CU that includes the given reference address in the
.debug_info section.
refaddr:
Either a refaddr of a DIE (possibly from a DW_FORM_ref_addr
attribute) or the section offset of a CU (possibly from an
aranges table).
This function will parse and cache CUs until the search criteria
is met, starting from the closest known offset lessthan or equal
to the given address.
"""
dwarf_assert(
self.has_debug_info,
'CU lookup but no debug info section')
dwarf_assert(
0 <= refaddr < self.debug_info_sec.size,
"refaddr %s beyond .debug_info size" % refaddr)
# The CU containing the DIE we desire will be to the right of the
# DIE insert point. If we have a CU address, then it will be a
# match but the right insert minus one will still be the item.
# The first CU starts at offset 0, so start there if cache is empty.
i = bisect_right(self._cu_offsets_map, refaddr)
start = self._cu_offsets_map[i - 1] if i > 0 else 0
# parse CUs until we find one containing the desired address
for cu in self._parse_CUs_iter(start):
if cu.cu_offset <= refaddr < cu.cu_offset + cu.size:
return cu
raise ValueError("CU for reference address %s not found" % refaddr)
def get_CU_at(self, offset):
""" Given a CU header offset, return the parsed CU.
offset:
The offset may be from an accelerated access table such as
the public names, public types, address range table, or
prior use.
This function will directly parse the CU doing no validation of
the offset beyond checking the size of the .debug_info section.
"""
dwarf_assert(
self.has_debug_info,
'CU lookup but no debug info section')
dwarf_assert(
0 <= offset < self.debug_info_sec.size,
"offset %s beyond .debug_info size" % offset)
return self._cached_CU_at_offset(offset)
def iter_CUs(self):
""" Yield all the compile units (CompileUnit objects) in the debug info
"""
return self._parse_CUs_iter()
def get_abbrev_table(self, offset):
""" Get an AbbrevTable from the given offset in the debug_abbrev
section.
The only verification done on the offset is that it's within the
bounds of the section (if not, an exception is raised).
It is the caller's responsibility to make sure the offset actually
points to a valid abbreviation table.
AbbrevTable objects are cached internally (two calls for the same
offset will return the same object).
"""
dwarf_assert(
offset < self.debug_abbrev_sec.size,
"Offset '0x%x' to abbrev table out of section bounds" % offset)
if offset not in self._abbrevtable_cache:
self._abbrevtable_cache[offset] = AbbrevTable(
structs=self.structs,
stream=self.debug_abbrev_sec.stream,
offset=offset)
return self._abbrevtable_cache[offset]
def get_string_from_table(self, offset):
""" Obtain a string from the string table section, given an offset
relative to the section.
"""
return parse_cstring_from_stream(self.debug_str_sec.stream, offset)
def line_program_for_CU(self, CU):
""" Given a CU object, fetch the line program it points to from the
.debug_line section.
If the CU doesn't point to a line program, return None.
"""
# The line program is pointed to by the DW_AT_stmt_list attribute of
# the top DIE of a CU.
top_DIE = CU.get_top_DIE()
if 'DW_AT_stmt_list' in top_DIE.attributes:
return self._parse_line_program_at_offset(
top_DIE.attributes['DW_AT_stmt_list'].value, CU.structs)
else:
return None
def has_CFI(self):
""" Does this dwarf info have a dwarf_frame CFI section?
"""
return self.debug_frame_sec is not None
def CFI_entries(self):
""" Get a list of dwarf_frame CFI entries from the .debug_frame section.
"""
cfi = CallFrameInfo(
stream=self.debug_frame_sec.stream,
size=self.debug_frame_sec.size,
address=self.debug_frame_sec.address,
base_structs=self.structs)
return cfi.get_entries()
def has_EH_CFI(self):
""" Does this dwarf info have a eh_frame CFI section?
"""
return self.eh_frame_sec is not None
def EH_CFI_entries(self):
""" Get a list of eh_frame CFI entries from the .eh_frame section.
"""
cfi = CallFrameInfo(
stream=self.eh_frame_sec.stream,
size=self.eh_frame_sec.size,
address=self.eh_frame_sec.address,
base_structs=self.structs,
for_eh_frame=True)
return cfi.get_entries()
def get_pubtypes(self):
"""
Returns a NameLUT object that contains information read from the
.debug_pubtypes section in the ELF file.
NameLUT is essentially a dictionary containing the CU/DIE offsets of
each symbol. See the NameLUT doc string for more details.
"""
if self.debug_pubtypes_sec:
return NameLUT(self.debug_pubtypes_sec.stream,
self.debug_pubtypes_sec.size,
self.structs)
else:
return None
def get_pubnames(self):
"""
Returns a NameLUT object that contains information read from the
.debug_pubnames section in the ELF file.
NameLUT is essentially a dictionary containing the CU/DIE offsets of
each symbol. See the NameLUT doc string for more details.
"""
if self.debug_pubnames_sec:
return NameLUT(self.debug_pubnames_sec.stream,
self.debug_pubnames_sec.size,
self.structs)
else:
return None
def get_aranges(self):
""" Get an ARanges object representing the .debug_aranges section of
the DWARF data, or None if the section doesn't exist
"""
if self.debug_aranges_sec:
return ARanges(self.debug_aranges_sec.stream,
self.debug_aranges_sec.size,
self.structs)
else:
return None
def location_lists(self):
""" Get a LocationLists object representing the .debug_loc section of
the DWARF data, or None if this section doesn't exist.
"""
if self.debug_loc_sec:
return LocationLists(self.debug_loc_sec.stream, self.structs)
else:
return None
def range_lists(self):
""" Get a RangeLists object representing the .debug_ranges section of
the DWARF data, or None if this section doesn't exist.
"""
if self.debug_ranges_sec:
return RangeLists(self.debug_ranges_sec.stream, self.structs)
else:
return None
#------ PRIVATE ------#
def _parse_CUs_iter(self, offset=0):
""" Iterate CU objects in order of appearance in the debug_info section.
offset:
The offset of the first CU to yield. Additional iterations
will return the sequential unit objects.
See .iter_CUs(), .get_CU_containing(), and .get_CU_at().
"""
if self.debug_info_sec is None:
return
while offset < self.debug_info_sec.size:
cu = self._cached_CU_at_offset(offset)
# Compute the offset of the next CU in the section. The unit_length
# field of the CU header contains its size not including the length
# field itself.
offset = ( offset +
cu['unit_length'] +
cu.structs.initial_length_field_size())
yield cu
def _cached_CU_at_offset(self, offset):
""" Return the CU with unit header at the given offset into the
debug_info section from the cache. If not present, the unit is
header is parsed and the object is installed in the cache.
offset:
The offset of the unit header in the .debug_info section
to of the unit to fetch from the cache.
See get_CU_at().
"""
# Find the insert point for the requested offset. With bisect_right,
# if this entry is present in the cache it will be the prior entry.
i = bisect_right(self._cu_offsets_map, offset)
if i >= 1 and offset == self._cu_offsets_map[i - 1]:
return self._cu_cache[i - 1]
# Parse the CU and insert the offset and object into the cache.
# The ._cu_offsets_map[] contains just the numeric offsets for the
# bisect_right search while the parallel indexed ._cu_cache[] holds
# the object references.
cu = self._parse_CU_at_offset(offset)
self._cu_offsets_map.insert(i, offset)
self._cu_cache.insert(i, cu)
return cu
def _parse_CU_at_offset(self, offset):
""" Parse and return a CU at the given offset in the debug_info stream.
"""
# Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3
# states that the first 32-bit word of the CU header determines
# whether the CU is represented with 32-bit or 64-bit DWARF format.
#
# So we peek at the first word in the CU header to determine its
# dwarf format. Based on it, we then create a new DWARFStructs
# instance suitable for this CU and use it to parse the rest.
#
initial_length = struct_parse(
self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset)
dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
# Temporary structs for parsing the header
# The structs for the rest of the CU depend on the header data.
#
cu_structs = DWARFStructs(
little_endian=self.config.little_endian,
dwarf_format=dwarf_format,
address_size=4,
dwarf_version=2)
cu_header = struct_parse(
cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset)
# structs for the rest of the CU, taking into account bitness and DWARF version
cu_structs = DWARFStructs(
little_endian=self.config.little_endian,
dwarf_format=dwarf_format,
address_size=cu_header['address_size'],
dwarf_version=cu_header['version'])
cu_die_offset = self.debug_info_sec.stream.tell()
dwarf_assert(
self._is_supported_version(cu_header['version']),
"Expected supported DWARF version. Got '%s'" % cu_header['version'])
return CompileUnit(
header=cu_header,
dwarfinfo=self,
structs=cu_structs,
cu_offset=offset,
cu_die_offset=cu_die_offset)
def _is_supported_version(self, version):
""" DWARF version supported by this parser
"""
return 2 <= version <= 5
def _parse_line_program_at_offset(self, debug_line_offset, structs):
""" Given an offset to the .debug_line section, parse the line program
starting at this offset in the section and return it.
structs is the DWARFStructs object used to do this parsing.
"""
lineprog_header = struct_parse(
structs.Dwarf_lineprog_header,
self.debug_line_sec.stream,
debug_line_offset)
# Calculate the offset to the next line program (see DWARF 6.2.4)
end_offset = ( debug_line_offset + lineprog_header['unit_length'] +
structs.initial_length_field_size())
return LineProgram(
header=lineprog_header,
stream=self.debug_line_sec.stream,
structs=structs,
program_start_offset=self.debug_line_sec.stream.tell(),
program_end_offset=end_offset)

View File

@@ -1,396 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/enums.py
#
# Mappings of enum names to values
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..construct import Pass
from ..common.py3compat import iteritems
ENUM_DW_TAG = dict(
DW_TAG_null = 0x00,
DW_TAG_array_type = 0x01,
DW_TAG_class_type = 0x02,
DW_TAG_entry_point = 0x03,
DW_TAG_enumeration_type = 0x04,
DW_TAG_formal_parameter = 0x05,
DW_TAG_global_subroutine = 0x06,
DW_TAG_global_variable = 0x07,
DW_TAG_imported_declaration = 0x08,
DW_TAG_label = 0x0a,
DW_TAG_lexical_block = 0x0b,
DW_TAG_local_variable = 0x0c,
DW_TAG_member = 0x0d,
DW_TAG_pointer_type = 0x0f,
DW_TAG_reference_type = 0x10,
DW_TAG_compile_unit = 0x11,
DW_TAG_string_type = 0x12,
DW_TAG_structure_type = 0x13,
DW_TAG_subroutine = 0x14,
DW_TAG_subroutine_type = 0x15,
DW_TAG_typedef = 0x16,
DW_TAG_union_type = 0x17,
DW_TAG_unspecified_parameters = 0x18,
DW_TAG_variant = 0x19,
DW_TAG_common_block = 0x1a,
DW_TAG_common_inclusion = 0x1b,
DW_TAG_inheritance = 0x1c,
DW_TAG_inlined_subroutine = 0x1d,
DW_TAG_module = 0x1e,
DW_TAG_ptr_to_member_type = 0x1f,
DW_TAG_set_type = 0x20,
DW_TAG_subrange_type = 0x21,
DW_TAG_with_stmt = 0x22,
DW_TAG_access_declaration = 0x23,
DW_TAG_base_type = 0x24,
DW_TAG_catch_block = 0x25,
DW_TAG_const_type = 0x26,
DW_TAG_constant = 0x27,
DW_TAG_enumerator = 0x28,
DW_TAG_file_type = 0x29,
DW_TAG_friend = 0x2a,
DW_TAG_namelist = 0x2b,
DW_TAG_namelist_item = 0x2c,
DW_TAG_namelist_items = 0x2c,
DW_TAG_packed_type = 0x2d,
DW_TAG_subprogram = 0x2e,
# The DWARF standard defines these as _parameter, not _param, but we
# maintain compatibility with readelf.
DW_TAG_template_type_param = 0x2f,
DW_TAG_template_value_param = 0x30,
DW_TAG_thrown_type = 0x31,
DW_TAG_try_block = 0x32,
DW_TAG_variant_part = 0x33,
DW_TAG_variable = 0x34,
DW_TAG_volatile_type = 0x35,
DW_TAG_dwarf_procedure = 0x36,
DW_TAG_restrict_type = 0x37,
DW_TAG_interface_type = 0x38,
DW_TAG_namespace = 0x39,
DW_TAG_imported_module = 0x3a,
DW_TAG_unspecified_type = 0x3b,
DW_TAG_partial_unit = 0x3c,
DW_TAG_imported_unit = 0x3d,
DW_TAG_mutable_type = 0x3e,
DW_TAG_condition = 0x3f,
DW_TAG_shared_type = 0x40,
DW_TAG_type_unit = 0x41,
DW_TAG_rvalue_reference_type = 0x42,
DW_TAG_atomic_type = 0x47,
DW_TAG_call_site = 0x48,
DW_TAG_call_site_parameter = 0x49,
DW_TAG_skeleton_unit = 0x4a,
DW_TAG_immutable_type = 0x4b,
DW_TAG_lo_user = 0x4080,
DW_TAG_GNU_template_template_param = 0x4106,
DW_TAG_GNU_template_parameter_pack = 0x4107,
DW_TAG_GNU_formal_parameter_pack = 0x4108,
DW_TAG_GNU_call_site = 0x4109,
DW_TAG_GNU_call_site_parameter = 0x410a,
DW_TAG_APPLE_property = 0x4200,
DW_TAG_hi_user = 0xffff,
_default_ = Pass,
)
ENUM_DW_CHILDREN = dict(
DW_CHILDREN_no = 0x00,
DW_CHILDREN_yes = 0x01,
)
ENUM_DW_AT = dict(
DW_AT_null = 0x00,
DW_AT_sibling = 0x01,
DW_AT_location = 0x02,
DW_AT_name = 0x03,
DW_AT_fund_type = 0x05,
DW_AT_mod_fund_type = 0x06,
DW_AT_user_def_type = 0x07,
DW_AT_mod_u_d_type = 0x08,
DW_AT_ordering = 0x09,
DW_AT_subscr_data = 0x0a,
DW_AT_byte_size = 0x0b,
DW_AT_bit_offset = 0x0c,
DW_AT_bit_size = 0x0d,
DW_AT_element_list = 0x0f,
DW_AT_stmt_list = 0x10,
DW_AT_low_pc = 0x11,
DW_AT_high_pc = 0x12,
DW_AT_language = 0x13,
DW_AT_member = 0x14,
DW_AT_discr = 0x15,
DW_AT_discr_value = 0x16,
DW_AT_visibility = 0x17,
DW_AT_import = 0x18,
DW_AT_string_length = 0x19,
DW_AT_common_reference = 0x1a,
DW_AT_comp_dir = 0x1b,
DW_AT_const_value = 0x1c,
DW_AT_containing_type = 0x1d,
DW_AT_default_value = 0x1e,
DW_AT_inline = 0x20,
DW_AT_is_optional = 0x21,
DW_AT_lower_bound = 0x22,
DW_AT_program = 0x23,
DW_AT_private = 0x24,
DW_AT_producer = 0x25,
DW_AT_protected = 0x26,
DW_AT_prototyped = 0x27,
DW_AT_public = 0x28,
DW_AT_return_addr = 0x2a,
DW_AT_start_scope = 0x2c,
DW_AT_bit_stride = 0x2e,
DW_AT_stride_size = 0x2e,
DW_AT_upper_bound = 0x2f,
DW_AT_virtual = 0x30,
DW_AT_abstract_origin = 0x31,
DW_AT_accessibility = 0x32,
DW_AT_address_class = 0x33,
DW_AT_artificial = 0x34,
DW_AT_base_types = 0x35,
DW_AT_calling_convention = 0x36,
DW_AT_count = 0x37,
DW_AT_data_member_location = 0x38,
DW_AT_decl_column = 0x39,
DW_AT_decl_file = 0x3a,
DW_AT_decl_line = 0x3b,
DW_AT_declaration = 0x3c,
DW_AT_discr_list = 0x3d,
DW_AT_encoding = 0x3e,
DW_AT_external = 0x3f,
DW_AT_frame_base = 0x40,
DW_AT_friend = 0x41,
DW_AT_identifier_case = 0x42,
DW_AT_macro_info = 0x43,
DW_AT_namelist_item = 0x44,
DW_AT_priority = 0x45,
DW_AT_segment = 0x46,
DW_AT_specification = 0x47,
DW_AT_static_link = 0x48,
DW_AT_type = 0x49,
DW_AT_use_location = 0x4a,
DW_AT_variable_parameter = 0x4b,
DW_AT_virtuality = 0x4c,
DW_AT_vtable_elem_location = 0x4d,
DW_AT_allocated = 0x4e,
DW_AT_associated = 0x4f,
DW_AT_data_location = 0x50,
DW_AT_byte_stride = 0x51,
DW_AT_stride = 0x51,
DW_AT_entry_pc = 0x52,
DW_AT_use_UTF8 = 0x53,
DW_AT_extension = 0x54,
DW_AT_ranges = 0x55,
DW_AT_trampoline = 0x56,
DW_AT_call_column = 0x57,
DW_AT_call_file = 0x58,
DW_AT_call_line = 0x59,
DW_AT_description = 0x5a,
DW_AT_binary_scale = 0x5b,
DW_AT_decimal_scale = 0x5c,
DW_AT_small = 0x5d,
DW_AT_decimal_sign = 0x5e,
DW_AT_digit_count = 0x5f,
DW_AT_picture_string = 0x60,
DW_AT_mutable = 0x61,
DW_AT_threads_scaled = 0x62,
DW_AT_explicit = 0x63,
DW_AT_object_pointer = 0x64,
DW_AT_endianity = 0x65,
DW_AT_elemental = 0x66,
DW_AT_pure = 0x67,
DW_AT_recursive = 0x68,
DW_AT_signature = 0x69,
DW_AT_main_subprogram = 0x6a,
DW_AT_data_bit_offset = 0x6b,
DW_AT_const_expr = 0x6c,
DW_AT_enum_class = 0x6d,
DW_AT_linkage_name = 0x6e,
DW_AT_string_length_bit_size = 0x6f,
DW_AT_string_length_byte_size = 0x70,
DW_AT_rank = 0x71,
DW_AT_str_offsets_base = 0x72,
DW_AT_addr_base = 0x73,
DW_AT_rnglists_base = 0x74,
DW_AT_dwo_name = 0x76,
DW_AT_reference = 0x77,
DW_AT_rvalue_reference = 0x78,
DW_AT_macros = 0x79,
DW_AT_call_all_calls = 0x7a,
DW_AT_call_all_source_calls = 0x7b,
DW_AT_call_all_tail_calls = 0x7c,
DW_AT_call_return_pc = 0x7d,
DW_AT_call_value = 0x7e,
DW_AT_call_origin = 0x7f,
DW_AT_call_parameter = 0x80,
DW_AT_call_pc = 0x81,
DW_AT_call_tail_call = 0x82,
DW_AT_call_target = 0x83,
DW_AT_call_target_clobbered = 0x84,
DW_AT_call_data_location = 0x85,
DW_AT_call_data_value = 0x86,
DW_AT_noreturn = 0x87,
DW_AT_alignment = 0x88,
DW_AT_export_symbols = 0x89,
DW_AT_deleted = 0x8a,
DW_AT_defaulted = 0x8b,
DW_AT_loclists_base = 0x8c,
DW_AT_MIPS_fde = 0x2001,
DW_AT_MIPS_loop_begin = 0x2002,
DW_AT_MIPS_tail_loop_begin = 0x2003,
DW_AT_MIPS_epilog_begin = 0x2004,
DW_AT_MIPS_loop_unroll_factor = 0x2005,
DW_AT_MIPS_software_pipeline_depth = 0x2006,
DW_AT_MIPS_linkage_name = 0x2007,
DW_AT_MIPS_stride = 0x2008,
DW_AT_MIPS_abstract_name = 0x2009,
DW_AT_MIPS_clone_origin = 0x200a,
DW_AT_MIPS_has_inlines = 0x200b,
DW_AT_MIPS_stride_byte = 0x200c,
DW_AT_MIPS_stride_elem = 0x200d,
DW_AT_MIPS_ptr_dopetype = 0x200e,
DW_AT_MIPS_allocatable_dopetype = 0x200f,
DW_AT_MIPS_assumed_shape_dopetype = 0x2010,
DW_AT_MIPS_assumed_size = 0x2011,
DW_AT_sf_names = 0x2101,
DW_AT_src_info = 0x2102,
DW_AT_mac_info = 0x2103,
DW_AT_src_coords = 0x2104,
DW_AT_body_begin = 0x2105,
DW_AT_body_end = 0x2106,
DW_AT_GNU_vector = 0x2107,
DW_AT_GNU_template_name = 0x2110,
DW_AT_GNU_odr_signature = 0x210f,
DW_AT_GNU_call_site_value = 0x2111,
DW_AT_GNU_call_site_data_value = 0x2112,
DW_AT_GNU_call_site_target = 0x2113,
DW_AT_GNU_call_site_target_clobbered = 0x2114,
DW_AT_GNU_tail_call = 0x2115,
DW_AT_GNU_all_tail_call_sites = 0x2116,
DW_AT_GNU_all_call_sites = 0x2117,
DW_AT_GNU_all_source_call_sites = 0x2118,
DW_AT_GNU_macros = 0x2119,
DW_AT_GNU_deleted = 0x211a,
DW_AT_GNU_dwo_id = 0x2131,
DW_AT_GNU_pubnames = 0x2134,
DW_AT_GNU_pubtypes = 0x2135,
DW_AT_GNU_discriminator = 0x2136,
DW_AT_LLVM_include_path = 0x3e00,
DW_AT_LLVM_config_macros = 0x3e01,
DW_AT_LLVM_isysroot = 0x3e02,
DW_AT_LLVM_tag_offset = 0x3e03,
DW_AT_APPLE_optimized = 0x3fe1,
DW_AT_APPLE_flags = 0x3fe2,
DW_AT_APPLE_isa = 0x3fe3,
DW_AT_APPLE_block = 0x3fe4,
DW_AT_APPLE_major_runtime_vers = 0x3fe5,
DW_AT_APPLE_runtime_class = 0x3fe6,
DW_AT_APPLE_omit_frame_ptr = 0x3fe7,
DW_AT_APPLE_property_name = 0x3fe8,
DW_AT_APPLE_property_getter = 0x3fe9,
DW_AT_APPLE_property_setter = 0x3fea,
DW_AT_APPLE_property_attribute = 0x3feb,
DW_AT_APPLE_objc_complete_type = 0x3fec,
DW_AT_APPLE_property = 0x3fed,
_default_ = Pass,
)
ENUM_DW_FORM = dict(
DW_FORM_null = 0x00,
DW_FORM_addr = 0x01,
DW_FORM_ref = 0x02,
DW_FORM_block2 = 0x03,
DW_FORM_block4 = 0x04,
DW_FORM_data2 = 0x05,
DW_FORM_data4 = 0x06,
DW_FORM_data8 = 0x07,
DW_FORM_string = 0x08,
DW_FORM_block = 0x09,
DW_FORM_block1 = 0x0a,
DW_FORM_data1 = 0x0b,
DW_FORM_flag = 0x0c,
DW_FORM_sdata = 0x0d,
DW_FORM_strp = 0x0e,
DW_FORM_udata = 0x0f,
DW_FORM_ref_addr = 0x10,
DW_FORM_ref1 = 0x11,
DW_FORM_ref2 = 0x12,
DW_FORM_ref4 = 0x13,
DW_FORM_ref8 = 0x14,
DW_FORM_ref_udata = 0x15,
DW_FORM_indirect = 0x16,
DW_FORM_sec_offset = 0x17,
DW_FORM_exprloc = 0x18,
DW_FORM_flag_present = 0x19,
DW_FORM_strx = 0x1a,
DW_FORM_addrx = 0x1b,
DW_FORM_ref_sup4 = 0x1c,
DW_FORM_strp_sup = 0x1d,
DW_FORM_data16 = 0x1e,
DW_FORM_line_strp = 0x1f,
DW_FORM_ref_sig8 = 0x20,
DW_FORM_implicit_const = 0x21,
DW_FORM_loclistx = 0x22,
DW_FORM_rnglistx = 0x23,
DW_FORM_ref_sup8 = 0x24,
DW_FORM_strx1 = 0x25,
DW_FORM_strx2 = 0x26,
DW_FORM_strx3 = 0x27,
DW_FORM_strx4 = 0x28,
DW_FORM_addrx1 = 0x29,
DW_FORM_addrx2 = 0x2a,
DW_FORM_addrx3 = 0x2b,
DW_FORM_addrx4 = 0x2c,
DW_FORM_GNU_addr_index = 0x1f01,
DW_FORM_GNU_str_index = 0x1f02,
DW_FORM_GNU_ref_alt = 0x1f20,
DW_FORM_GNU_strp_alt = 0x1f21,
_default_ = Pass,
)
# Inverse mapping for ENUM_DW_FORM
DW_FORM_raw2name = dict((v, k) for k, v in iteritems(ENUM_DW_FORM))
# See http://www.airs.com/blog/archives/460
DW_EH_encoding_flags = dict(
DW_EH_PE_absptr = 0x00,
DW_EH_PE_uleb128 = 0x01,
DW_EH_PE_udata2 = 0x02,
DW_EH_PE_udata4 = 0x03,
DW_EH_PE_udata8 = 0x04,
DW_EH_PE_signed = 0x08,
DW_EH_PE_sleb128 = 0x09,
DW_EH_PE_sdata2 = 0x0a,
DW_EH_PE_sdata4 = 0x0b,
DW_EH_PE_sdata8 = 0x0c,
DW_EH_PE_pcrel = 0x10,
DW_EH_PE_textrel = 0x20,
DW_EH_PE_datarel = 0x30,
DW_EH_PE_funcrel = 0x40,
DW_EH_PE_aligned = 0x50,
DW_EH_PE_indirect = 0x80,
DW_EH_PE_omit = 0xff,
)

View File

@@ -1,262 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/lineprogram.py
#
# DWARF line number program
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
import copy
from collections import namedtuple
from ..common.utils import struct_parse, dwarf_assert
from .constants import *
# LineProgramEntry - an entry in the line program.
# A line program is a sequence of encoded entries. Some of these entries add a
# new LineState (mapping between line and address), and some don't.
#
# command:
# The command/opcode - always numeric. For standard commands - it's the opcode
# that can be matched with one of the DW_LNS_* constants. For extended commands
# it's the extended opcode that can be matched with one of the DW_LNE_*
# constants. For special commands, it's the opcode itself.
#
# args:
# A list of decoded arguments of the command.
#
# is_extended:
# Since extended commands are encoded by a zero followed by an extended
# opcode, and these extended opcodes overlap with other opcodes, this
# flag is needed to mark that the command has an extended opcode.
#
# state:
# For commands that add a new state, it's the relevant LineState object.
# For commands that don't add a new state, it's None.
#
LineProgramEntry = namedtuple(
'LineProgramEntry', 'command is_extended args state')
class LineState(object):
""" Represents a line program state (or a "row" in the matrix
describing debug location information for addresses).
The instance variables of this class are the "state machine registers"
described in section 6.2.2 of DWARFv3
"""
def __init__(self, default_is_stmt):
self.address = 0
self.file = 1
self.line = 1
self.column = 0
self.op_index = 0
self.is_stmt = default_is_stmt
self.basic_block = False
self.end_sequence = False
self.prologue_end = False
self.epilogue_begin = False
self.isa = 0
self.discriminator = 0
def __repr__(self):
a = ['<LineState %x:' % id(self)]
a.append(' address = 0x%x' % self.address)
for attr in ('file', 'line', 'column', 'is_stmt', 'basic_block',
'end_sequence', 'prologue_end', 'epilogue_begin', 'isa',
'discriminator'):
a.append(' %s = %s' % (attr, getattr(self, attr)))
return '\n'.join(a) + '>\n'
class LineProgram(object):
""" Builds a "line table", which is essentially the matrix described
in section 6.2 of DWARFv3. It's a list of LineState objects,
sorted by increasing address, so it can be used to obtain the
state information for each address.
"""
def __init__(self, header, stream, structs,
program_start_offset, program_end_offset):
"""
header:
The header of this line program. Note: LineProgram may modify
its header by appending file entries if DW_LNE_define_file
instructions are encountered.
stream:
The stream this program can be read from.
structs:
A DWARFStructs instance suitable for this line program
program_{start|end}_offset:
Offset in the debug_line section stream where this program
starts (the actual program, after the header), and where it
ends.
The actual range includes start but not end: [start, end - 1]
"""
self.stream = stream
self.header = header
self.structs = structs
self.program_start_offset = program_start_offset
self.program_end_offset = program_end_offset
self._decoded_entries = None
def get_entries(self):
""" Get the decoded entries for this line program. Return a list of
LineProgramEntry objects.
Note that this contains more information than absolutely required
for the line table. The line table can be easily extracted from
the list of entries by looking only at entries with non-None
state. The extra information is mainly for the purposes of display
with readelf and debugging.
"""
if self._decoded_entries is None:
self._decoded_entries = self._decode_line_program()
return self._decoded_entries
#------ PRIVATE ------#
def __getitem__(self, name):
""" Implement dict-like access to header entries
"""
return self.header[name]
def _decode_line_program(self):
entries = []
state = LineState(self.header['default_is_stmt'])
def add_entry_new_state(cmd, args, is_extended=False):
# Add an entry that sets a new state.
# After adding, clear some state registers.
entries.append(LineProgramEntry(
cmd, is_extended, args, copy.copy(state)))
state.discriminator = 0
state.basic_block = False
state.prologue_end = False
state.epilogue_begin = False
def add_entry_old_state(cmd, args, is_extended=False):
# Add an entry that doesn't visibly set a new state
entries.append(LineProgramEntry(cmd, is_extended, args, None))
offset = self.program_start_offset
while offset < self.program_end_offset:
opcode = struct_parse(
self.structs.Dwarf_uint8(''),
self.stream,
offset)
# As an exercise in avoiding premature optimization, if...elif
# chains are used here for standard and extended opcodes instead
# of dispatch tables. This keeps the code much cleaner. Besides,
# the majority of instructions in a typical program are special
# opcodes anyway.
if opcode >= self.header['opcode_base']:
# Special opcode (follow the recipe in 6.2.5.1)
maximum_operations_per_instruction = self['maximum_operations_per_instruction']
adjusted_opcode = opcode - self['opcode_base']
operation_advance = adjusted_opcode // self['line_range']
address_addend = (
self['minimum_instruction_length'] *
((state.op_index + operation_advance) //
maximum_operations_per_instruction))
state.address += address_addend
state.op_index = (state.op_index + operation_advance) % maximum_operations_per_instruction
line_addend = self['line_base'] + (adjusted_opcode % self['line_range'])
state.line += line_addend
add_entry_new_state(
opcode, [line_addend, address_addend, state.op_index])
elif opcode == 0:
# Extended opcode: start with a zero byte, followed by
# instruction size and the instruction itself.
inst_len = struct_parse(self.structs.Dwarf_uleb128(''),
self.stream)
ex_opcode = struct_parse(self.structs.Dwarf_uint8(''),
self.stream)
if ex_opcode == DW_LNE_end_sequence:
state.end_sequence = True
state.is_stmt = 0
add_entry_new_state(ex_opcode, [], is_extended=True)
# reset state
state = LineState(self.header['default_is_stmt'])
elif ex_opcode == DW_LNE_set_address:
operand = struct_parse(self.structs.Dwarf_target_addr(''),
self.stream)
state.address = operand
add_entry_old_state(ex_opcode, [operand], is_extended=True)
elif ex_opcode == DW_LNE_define_file:
operand = struct_parse(
self.structs.Dwarf_lineprog_file_entry, self.stream)
self['file_entry'].append(operand)
add_entry_old_state(ex_opcode, [operand], is_extended=True)
elif ex_opcode == DW_LNE_set_discriminator:
operand = struct_parse(self.structs.Dwarf_uleb128(''),
self.stream)
state.discriminator = operand
else:
# Unknown, but need to roll forward the stream because the
# length is specified. Seek forward inst_len - 1 because
# we've already read the extended opcode, which takes part
# in the length.
self.stream.seek(inst_len - 1, os.SEEK_CUR)
else: # 0 < opcode < opcode_base
# Standard opcode
if opcode == DW_LNS_copy:
add_entry_new_state(opcode, [])
elif opcode == DW_LNS_advance_pc:
operand = struct_parse(self.structs.Dwarf_uleb128(''),
self.stream)
address_addend = (
operand * self.header['minimum_instruction_length'])
state.address += address_addend
add_entry_old_state(opcode, [address_addend])
elif opcode == DW_LNS_advance_line:
operand = struct_parse(self.structs.Dwarf_sleb128(''),
self.stream)
state.line += operand
elif opcode == DW_LNS_set_file:
operand = struct_parse(self.structs.Dwarf_uleb128(''),
self.stream)
state.file = operand
add_entry_old_state(opcode, [operand])
elif opcode == DW_LNS_set_column:
operand = struct_parse(self.structs.Dwarf_uleb128(''),
self.stream)
state.column = operand
add_entry_old_state(opcode, [operand])
elif opcode == DW_LNS_negate_stmt:
state.is_stmt = not state.is_stmt
add_entry_old_state(opcode, [])
elif opcode == DW_LNS_set_basic_block:
state.basic_block = True
add_entry_old_state(opcode, [])
elif opcode == DW_LNS_const_add_pc:
adjusted_opcode = 255 - self['opcode_base']
address_addend = ((adjusted_opcode // self['line_range']) *
self['minimum_instruction_length'])
state.address += address_addend
add_entry_old_state(opcode, [address_addend])
elif opcode == DW_LNS_fixed_advance_pc:
operand = struct_parse(self.structs.Dwarf_uint16(''),
self.stream)
state.address += operand
add_entry_old_state(opcode, [operand])
elif opcode == DW_LNS_set_prologue_end:
state.prologue_end = True
add_entry_old_state(opcode, [])
elif opcode == DW_LNS_set_epilogue_begin:
state.epilogue_begin = True
add_entry_old_state(opcode, [])
elif opcode == DW_LNS_set_isa:
operand = struct_parse(self.structs.Dwarf_uleb128(''),
self.stream)
state.isa = operand
add_entry_old_state(opcode, [operand])
else:
dwarf_assert(False, 'Invalid standard line program opcode: %s' % (
opcode,))
offset = self.stream.tell()
return entries

View File

@@ -1,130 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/locationlists.py
#
# DWARF location lists section decoding (.debug_loc)
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
from collections import namedtuple
from ..common.utils import struct_parse
LocationExpr = namedtuple('LocationExpr', 'loc_expr')
LocationEntry = namedtuple('LocationEntry', 'entry_offset begin_offset end_offset loc_expr')
BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address')
class LocationLists(object):
""" A single location list is a Python list consisting of LocationEntry or
BaseAddressEntry objects.
"""
def __init__(self, stream, structs):
self.stream = stream
self.structs = structs
self._max_addr = 2 ** (self.structs.address_size * 8) - 1
def get_location_list_at_offset(self, offset):
""" Get a location list at the given offset in the section.
"""
self.stream.seek(offset, os.SEEK_SET)
return self._parse_location_list_from_stream()
def iter_location_lists(self):
""" Yield all location lists found in the section.
"""
# Just call _parse_location_list_from_stream until the stream ends
self.stream.seek(0, os.SEEK_END)
endpos = self.stream.tell()
self.stream.seek(0, os.SEEK_SET)
while self.stream.tell() < endpos:
yield self._parse_location_list_from_stream()
#------ PRIVATE ------#
def _parse_location_list_from_stream(self):
lst = []
while True:
entry_offset = self.stream.tell()
begin_offset = struct_parse(
self.structs.Dwarf_target_addr(''), self.stream)
end_offset = struct_parse(
self.structs.Dwarf_target_addr(''), self.stream)
if begin_offset == 0 and end_offset == 0:
# End of list - we're done.
break
elif begin_offset == self._max_addr:
# Base address selection entry
lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset))
else:
# Location list entry
expr_len = struct_parse(
self.structs.Dwarf_uint16(''), self.stream)
loc_expr = [struct_parse(self.structs.Dwarf_uint8(''),
self.stream)
for i in range(expr_len)]
lst.append(LocationEntry(
entry_offset=entry_offset,
begin_offset=begin_offset,
end_offset=end_offset,
loc_expr=loc_expr))
return lst
class LocationParser(object):
""" A parser for location information in DIEs.
Handles both location information contained within the attribute
itself (represented as a LocationExpr object) and references to
location lists in the .debug_loc section (represented as a
list).
"""
def __init__(self, location_lists):
self.location_lists = location_lists
@staticmethod
def attribute_has_location(attr, dwarf_version):
""" Checks if a DIE attribute contains location information.
"""
return (LocationParser._attribute_is_loclistptr_class(attr) and
(LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
LocationParser._attribute_has_loc_list(attr, dwarf_version)))
def parse_from_attribute(self, attr, dwarf_version):
""" Parses a DIE attribute and returns either a LocationExpr or
a list.
"""
if self.attribute_has_location(attr, dwarf_version):
if self._attribute_has_loc_expr(attr, dwarf_version):
return LocationExpr(attr.value)
elif self._attribute_has_loc_list(attr, dwarf_version):
return self.location_lists.get_location_list_at_offset(
attr.value)
else:
raise ValueError("Attribute does not have location information")
#------ PRIVATE ------#
@staticmethod
def _attribute_has_loc_expr(attr, dwarf_version):
return ((dwarf_version < 4 and attr.form.startswith('DW_FORM_block') and
not attr.name == 'DW_AT_const_value') or
attr.form == 'DW_FORM_exprloc')
@staticmethod
def _attribute_has_loc_list(attr, dwarf_version):
return ((dwarf_version < 4 and
attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and
not attr.name == 'DW_AT_const_value') or
attr.form == 'DW_FORM_sec_offset')
@staticmethod
def _attribute_is_loclistptr_class(attr):
return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length',
'DW_AT_const_value', 'DW_AT_return_addr',
'DW_AT_data_member_location',
'DW_AT_frame_base', 'DW_AT_segment',
'DW_AT_static_link', 'DW_AT_use_location',
'DW_AT_vtable_elem_location',
'DW_AT_GNU_call_site_value',
'DW_AT_GNU_call_site_target',
'DW_AT_GNU_call_site_data_value'))

View File

@@ -1,198 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/namelut.py
#
# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
#
# Vijay Ramasami (rvijayc@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
import collections
from collections import OrderedDict
from ..common.utils import struct_parse
from ..common.py3compat import Mapping
from bisect import bisect_right
import math
from ..construct import CString, Struct, If
NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')
class NameLUT(Mapping):
"""
A "Name LUT" holds any of the tables specified by .debug_pubtypes or
.debug_pubnames sections. This is basically a dictionary where the key is
the symbol name (either a public variable, function or a type), and the
value is the tuple (cu_offset, die_offset) corresponding to the variable.
The die_offset is an absolute offset (meaning, it can be used to search the
CU by iterating until a match is obtained).
An ordered dictionary is used to preserve the CU order (i.e, items are
stored on a per-CU basis (as it was originally in the .debug_* section).
Usage:
The NameLUT walks and talks like a dictionary and hence it can be used as
such. Some examples below:
# get the pubnames (a NameLUT from DWARF info).
pubnames = dwarf_info.get_pubnames()
# lookup a variable.
entry1 = pubnames["var_name1"]
entry2 = pubnames.get("var_name2", default=<default_var>)
print(entry2.cu_ofs)
...
# iterate over items.
for (name, entry) in pubnames.items():
# do stuff with name, entry.cu_ofs, entry.die_ofs
# iterate over items on a per-CU basis.
import itertools
for cu_ofs, item_list in itertools.groupby(pubnames.items(),
key = lambda x: x[1].cu_ofs):
# items are now grouped by cu_ofs.
# item_list is an iterator yeilding NameLUTEntry'ies belonging
# to cu_ofs.
# We can parse the CU at cu_offset and use the parsed CU results
# to parse the pubname DIEs in the CU listed by item_list.
for item in item_list:
# work with item which is part of the CU with cu_ofs.
"""
def __init__(self, stream, size, structs):
self._stream = stream
self._size = size
self._structs = structs
# entries are lazily loaded on demand.
self._entries = None
# CU headers (for readelf).
self._cu_headers = None
def get_entries(self):
"""
Returns the parsed NameLUT entries. The returned object is a dictionary
with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
the value.
This is useful when dealing with very large ELF files with millions of
entries. The returned entries can be pickled to a file and restored by
calling set_entries on subsequent loads.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries
def set_entries(self, entries, cu_headers):
"""
Set the NameLUT entries from an external source. The input is a
dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
die_ofs) as the value.
This option is useful when dealing with very large ELF files with
millions of entries. The entries can be parsed once and pickled to a
file and can be restored via this function on subsequent loads.
"""
self._entries = entries
self._cu_headers = cu_headers
def __len__(self):
"""
Returns the number of entries in the NameLUT.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return len(self._entries)
def __getitem__(self, name):
"""
Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
to the given symbol name.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries.get(name)
def __iter__(self):
"""
Returns an iterator to the NameLUT dictionary.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return iter(self._entries)
def items(self):
"""
Returns the NameLUT dictionary items.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries.items()
def get(self, name, default=None):
"""
Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
None if the symbol does not exist in the corresponding section.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries.get(name, default)
def get_cu_headers(self):
"""
Returns all CU headers. Mainly required for readelf.
"""
if self._cu_headers is None:
self._entries, self._cu_headers = self._get_entries()
return self._cu_headers
def _get_entries(self):
"""
Parse the (name, cu_ofs, die_ofs) information from this section and
store as a dictionary.
"""
self._stream.seek(0)
entries = OrderedDict()
cu_headers = []
offset = 0
# According to 6.1.1. of DWARFv4, each set of names is terminated by
# an offset field containing zero (and no following string). Because
# of sequential parsing, every next entry may be that terminator.
# So, field "name" is conditional.
entry_struct = Struct("Dwarf_offset_name_pair",
self._structs.Dwarf_offset('die_ofs'),
If(lambda ctx: ctx['die_ofs'], CString('name')))
# each run of this loop will fetch one CU worth of entries.
while offset < self._size:
# read the header for this CU.
namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
self._stream, offset)
cu_headers.append(namelut_hdr)
# compute the next offset.
offset = (offset + namelut_hdr.unit_length +
self._structs.initial_length_field_size())
# before inner loop, latch data that will be used in the inner
# loop to avoid attribute access and other computation.
hdr_cu_ofs = namelut_hdr.debug_info_offset
# while die_ofs of the entry is non-zero (which indicates the end) ...
while True:
entry = struct_parse(entry_struct, self._stream)
# if it is zero, this is the terminating record.
if entry.die_ofs == 0:
break
# add this entry to the look-up dictionary.
entries[entry.name.decode('utf-8')] = NameLUTEntry(
cu_ofs = hdr_cu_ofs,
die_ofs = hdr_cu_ofs + entry.die_ofs)
# return the entries parsed so far.
return (entries, cu_headers)

View File

@@ -1,65 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/ranges.py
#
# DWARF ranges section decoding (.debug_ranges)
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
from collections import namedtuple
from ..common.utils import struct_parse
RangeEntry = namedtuple('RangeEntry', 'begin_offset end_offset')
BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address')
class RangeLists(object):
""" A single range list is a Python list consisting of RangeEntry or
BaseAddressEntry objects.
"""
def __init__(self, stream, structs):
self.stream = stream
self.structs = structs
self._max_addr = 2 ** (self.structs.address_size * 8) - 1
def get_range_list_at_offset(self, offset):
""" Get a range list at the given offset in the section.
"""
self.stream.seek(offset, os.SEEK_SET)
return self._parse_range_list_from_stream()
def iter_range_lists(self):
""" Yield all range lists found in the section.
"""
# Just call _parse_range_list_from_stream until the stream ends
self.stream.seek(0, os.SEEK_END)
endpos = self.stream.tell()
self.stream.seek(0, os.SEEK_SET)
while self.stream.tell() < endpos:
yield self._parse_range_list_from_stream()
#------ PRIVATE ------#
def _parse_range_list_from_stream(self):
lst = []
while True:
begin_offset = struct_parse(
self.structs.Dwarf_target_addr(''), self.stream)
end_offset = struct_parse(
self.structs.Dwarf_target_addr(''), self.stream)
if begin_offset == 0 and end_offset == 0:
# End of list - we're done.
break
elif begin_offset == self._max_addr:
# Base address selection entry
lst.append(BaseAddressEntry(base_address=end_offset))
else:
# Range entry
lst.append(RangeEntry(
begin_offset=begin_offset,
end_offset=end_offset))
return lst

View File

@@ -1,354 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: dwarf/structs.py
#
# Encapsulation of Construct structs for parsing DWARF, adjusted for correct
# endianness and word-size.
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..construct import (
UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
CString, Embed, StaticField, IfThenElse
)
from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
from .enums import *
class DWARFStructs(object):
""" Exposes Construct structs suitable for parsing information from DWARF
sections. Each compile unit in DWARF info can have its own structs
object. Keep in mind that these structs have to be given a name (by
calling them with a name) before being used for parsing (like other
Construct structs). Those that should be used without a name are marked
by (+).
Accessible attributes (mostly as described in chapter 7 of the DWARF
spec v3):
Dwarf_[u]int{8,16,32,64):
Data chunks of the common sizes
Dwarf_offset:
32-bit or 64-bit word, depending on dwarf_format
Dwarf_length:
32-bit or 64-bit word, depending on dwarf_format
Dwarf_target_addr:
32-bit or 64-bit word, depending on address size
Dwarf_initial_length:
"Initial length field" encoding
section 7.4
Dwarf_{u,s}leb128:
ULEB128 and SLEB128 variable-length encoding
Dwarf_CU_header (+):
Compilation unit header
Dwarf_abbrev_declaration (+):
Abbreviation table declaration - doesn't include the initial
code, only the contents.
Dwarf_dw_form (+):
A dictionary mapping 'DW_FORM_*' keys into construct Structs
that parse such forms. These Structs have already been given
dummy names.
Dwarf_lineprog_header (+):
Line program header
Dwarf_lineprog_file_entry (+):
A single file entry in a line program header or instruction
Dwarf_CIE_header (+):
A call-frame CIE
Dwarf_FDE_header (+):
A call-frame FDE
See also the documentation of public methods.
"""
def __init__(self,
little_endian, dwarf_format, address_size, dwarf_version=2):
""" dwarf_version:
Numeric DWARF version
little_endian:
True if the file is little endian, False if big
dwarf_format:
DWARF Format: 32 or 64-bit (see spec section 7.4)
address_size:
Target machine address size, in bytes (4 or 8). (See spec
section 7.5.1)
"""
assert dwarf_format == 32 or dwarf_format == 64
assert address_size == 8 or address_size == 4, str(address_size)
self.little_endian = little_endian
self.dwarf_format = dwarf_format
self.address_size = address_size
self.dwarf_version = dwarf_version
self._create_structs()
def initial_length_field_size(self):
""" Size of an initial length field.
"""
return 4 if self.dwarf_format == 32 else 12
def _create_structs(self):
if self.little_endian:
self.Dwarf_uint8 = ULInt8
self.Dwarf_uint16 = ULInt16
self.Dwarf_uint32 = ULInt32
self.Dwarf_uint64 = ULInt64
self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
self.Dwarf_target_addr = (
ULInt32 if self.address_size == 4 else ULInt64)
self.Dwarf_int8 = SLInt8
self.Dwarf_int16 = SLInt16
self.Dwarf_int32 = SLInt32
self.Dwarf_int64 = SLInt64
else:
self.Dwarf_uint8 = UBInt8
self.Dwarf_uint16 = UBInt16
self.Dwarf_uint32 = UBInt32
self.Dwarf_uint64 = UBInt64
self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
self.Dwarf_target_addr = (
UBInt32 if self.address_size == 4 else UBInt64)
self.Dwarf_int8 = SBInt8
self.Dwarf_int16 = SBInt16
self.Dwarf_int32 = SBInt32
self.Dwarf_int64 = SBInt64
self._create_initial_length()
self._create_leb128()
self._create_cu_header()
self._create_abbrev_declaration()
self._create_dw_form()
self._create_lineprog_header()
self._create_callframe_entry_headers()
self._create_aranges_header()
self._create_nameLUT_header()
self._create_string_offsets_table_header()
self._create_address_table_header()
def _create_initial_length(self):
def _InitialLength(name):
# Adapts a Struct that parses forward a full initial length field.
# Only if the first word is the continuation value, the second
# word is parsed from the stream.
return _InitialLengthAdapter(
Struct(name,
self.Dwarf_uint32('first'),
If(lambda ctx: ctx.first == 0xFFFFFFFF,
self.Dwarf_uint64('second'),
elsevalue=None)))
self.Dwarf_initial_length = _InitialLength
def _create_leb128(self):
self.Dwarf_uleb128 = ULEB128
self.Dwarf_sleb128 = SLEB128
def _create_cu_header(self):
self.Dwarf_CU_header = Struct('Dwarf_CU_header',
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
# DWARFv5 reverses the order of address_size and debug_abbrev_offset.
IfThenElse('', lambda ctx: ctx['version'] >= 5,
Embed(Struct('',
self.Dwarf_uint8('unit_type'),
self.Dwarf_uint8('address_size'),
self.Dwarf_offset('debug_abbrev_offset'))),
Embed(Struct('',
self.Dwarf_offset('debug_abbrev_offset'),
self.Dwarf_uint8('address_size'))),
))
def _create_abbrev_declaration(self):
self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
RepeatUntilExcluding(
lambda obj, ctx:
obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
Struct('attr_spec',
Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM))))
def _create_dw_form(self):
self.Dwarf_dw_form = dict(
DW_FORM_addr=self.Dwarf_target_addr(''),
DW_FORM_addrx=self.Dwarf_uleb128(''),
DW_FORM_addrx1=self.Dwarf_uint8(''),
DW_FORM_addrx2=self.Dwarf_uint16(''),
# DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
DW_FORM_addrx4=self.Dwarf_uint32(''),
DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
# All DW_FORM_data<n> forms are assumed to be unsigned
DW_FORM_data1=self.Dwarf_uint8(''),
DW_FORM_data2=self.Dwarf_uint16(''),
DW_FORM_data4=self.Dwarf_uint32(''),
DW_FORM_data8=self.Dwarf_uint64(''),
DW_FORM_sdata=self.Dwarf_sleb128(''),
DW_FORM_udata=self.Dwarf_uleb128(''),
DW_FORM_string=CString(''),
DW_FORM_strp=self.Dwarf_offset(''),
DW_FORM_strx1=self.Dwarf_uint8(''),
DW_FORM_strx2=self.Dwarf_uint16(''),
# DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
DW_FORM_strx4=self.Dwarf_uint64(''),
DW_FORM_flag=self.Dwarf_uint8(''),
DW_FORM_ref=self.Dwarf_uint32(''),
DW_FORM_ref1=self.Dwarf_uint8(''),
DW_FORM_ref2=self.Dwarf_uint16(''),
DW_FORM_ref4=self.Dwarf_uint32(''),
DW_FORM_ref8=self.Dwarf_uint64(''),
DW_FORM_ref_udata=self.Dwarf_uleb128(''),
DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
DW_FORM_indirect=self.Dwarf_uleb128(''),
# New forms in DWARFv4
DW_FORM_flag_present = StaticField('', 0),
DW_FORM_sec_offset = self.Dwarf_offset(''),
DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
)
def _create_aranges_header(self):
self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
self.Dwarf_offset('debug_info_offset'), # a little tbd
self.Dwarf_uint8('address_size'),
self.Dwarf_uint8('segment_size')
)
def _create_nameLUT_header(self):
self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
self.Dwarf_offset('debug_info_offset'),
self.Dwarf_length('debug_info_length')
)
def _create_string_offsets_table_header(self):
self.Dwarf_string_offsets_table_header = Struct(
"Dwarf_string_offets_table_header",
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
self.Dwarf_uint16('padding'),
)
def _create_address_table_header(self):
self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
self.Dwarf_uint8('address_size'),
self.Dwarf_uint8('segment_selector_size'),
)
def _create_lineprog_header(self):
# A file entry is terminated by a NULL byte, so we don't want to parse
# past it. Therefore an If is used.
self.Dwarf_lineprog_file_entry = Struct('file_entry',
CString('name'),
If(lambda ctx: len(ctx.name) != 0,
Embed(Struct('',
self.Dwarf_uleb128('dir_index'),
self.Dwarf_uleb128('mtime'),
self.Dwarf_uleb128('length')))))
self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
self.Dwarf_offset('header_length'),
self.Dwarf_uint8('minimum_instruction_length'),
If(lambda ctx: ctx['version'] >= 4,
self.Dwarf_uint8("maximum_operations_per_instruction"),
1),
self.Dwarf_uint8('default_is_stmt'),
self.Dwarf_int8('line_base'),
self.Dwarf_uint8('line_range'),
self.Dwarf_uint8('opcode_base'),
Array(lambda ctx: ctx['opcode_base'] - 1,
self.Dwarf_uint8('standard_opcode_lengths')),
RepeatUntilExcluding(
lambda obj, ctx: obj == b'',
CString('include_directory')),
RepeatUntilExcluding(
lambda obj, ctx: len(obj.name) == 0,
self.Dwarf_lineprog_file_entry),
)
def _create_callframe_entry_headers(self):
self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
self.Dwarf_initial_length('length'),
self.Dwarf_offset('CIE_id'),
self.Dwarf_uint8('version'),
CString('augmentation'),
self.Dwarf_uleb128('code_alignment_factor'),
self.Dwarf_sleb128('data_alignment_factor'),
self.Dwarf_uleb128('return_address_register'))
self.EH_CIE_header = self.Dwarf_CIE_header
# The CIE header was modified in DWARFv4.
if self.dwarf_version == 4:
self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
self.Dwarf_initial_length('length'),
self.Dwarf_offset('CIE_id'),
self.Dwarf_uint8('version'),
CString('augmentation'),
self.Dwarf_uint8('address_size'),
self.Dwarf_uint8('segment_size'),
self.Dwarf_uleb128('code_alignment_factor'),
self.Dwarf_sleb128('data_alignment_factor'),
self.Dwarf_uleb128('return_address_register'))
self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
self.Dwarf_initial_length('length'),
self.Dwarf_offset('CIE_pointer'),
self.Dwarf_target_addr('initial_location'),
self.Dwarf_target_addr('address_range'))
def _make_block_struct(self, length_field):
""" Create a struct for DW_FORM_block<size>
"""
return PrefixedArray(
subcon=self.Dwarf_uint8('elem'),
length_field=length_field(''))
class _InitialLengthAdapter(Adapter):
""" A standard Construct adapter that expects a sub-construct
as a struct with one or two values (first, second).
"""
def _decode(self, obj, context):
if obj.first < 0xFFFFFF00:
return obj.first
else:
if obj.first == 0xFFFFFFFF:
return obj.second
else:
raise ConstructError("Failed decoding initial length for %X" % (
obj.first))

View File

@@ -1 +0,0 @@
EHABI_INDEX_ENTRY_SIZE = 8

View File

@@ -1,284 +0,0 @@
# -------------------------------------------------------------------------------
# elftools: ehabi/decoder.py
#
# Decode ARM exception handler bytecode.
#
# LeadroyaL (leadroyal@qq.com)
# This code is in the public domain
# -------------------------------------------------------------------------------
from collections import namedtuple
class EHABIBytecodeDecoder(object):
""" Decoder of a sequence of ARM exception handler abi bytecode.
Reference:
https://github.com/llvm/llvm-project/blob/master/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
https://developer.arm.com/documentation/ihi0038/b/
Accessible attributes:
mnemonic_array:
MnemonicItem array.
Parameters:
bytecode_array:
Integer array, raw data of bytecode.
"""
def __init__(self, bytecode_array):
self._bytecode_array = bytecode_array
self._index = None
self.mnemonic_array = None
self._decode()
def _decode(self):
""" Decode bytecode array, put result into mnemonic_array.
"""
self._index = 0
self.mnemonic_array = []
while self._index < len(self._bytecode_array):
for mask, value, handler in self.ring:
if (self._bytecode_array[self._index] & mask) == value:
start_idx = self._index
mnemonic = handler(self)
end_idx = self._index
self.mnemonic_array.append(
MnemonicItem(self._bytecode_array[start_idx: end_idx], mnemonic))
break
def _decode_00xxxxxx(self):
# SW.startLine() << format("0x%02X ; vsp = vsp + %u\n", Opcode,
# ((Opcode & 0x3f) << 2) + 4);
opcode = self._bytecode_array[self._index]
self._index += 1
return 'vsp = vsp + %u' % (((opcode & 0x3f) << 2) + 4)
def _decode_01xxxxxx(self):
# SW.startLine() << format("0x%02X ; vsp = vsp - %u\n", Opcode,
# ((Opcode & 0x3f) << 2) + 4);
opcode = self._bytecode_array[self._index]
self._index += 1
return 'vsp = vsp - %u' % (((opcode & 0x3f) << 2) + 4)
gpr_register_names = ("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc")
def _calculate_range(self, start, count):
return ((1 << (count + 1)) - 1) << start
def _printGPR(self, gpr_mask):
hits = [self.gpr_register_names[i] for i in range(32) if gpr_mask & (1 << i) != 0]
return '{%s}' % ', '.join(hits)
def _print_registers(self, vfp_mask, prefix):
hits = [prefix + str(i) for i in range(32) if vfp_mask & (1 << i) != 0]
return '{%s}' % ', '.join(hits)
def _decode_1000iiii_iiiiiiii(self):
op0 = self._bytecode_array[self._index]
self._index += 1
op1 = self._bytecode_array[self._index]
self._index += 1
# uint16_t GPRMask = (Opcode1 << 4) | ((Opcode0 & 0x0f) << 12);
# SW.startLine()
# << format("0x%02X 0x%02X ; %s",
# Opcode0, Opcode1, GPRMask ? "pop " : "refuse to unwind");
# if (GPRMask)
# PrintGPR(GPRMask);
gpr_mask = (op1 << 4) | ((op0 & 0x0f) << 12)
if gpr_mask == 0:
return 'refuse to unwind'
else:
return 'pop %s' % self._printGPR(gpr_mask)
def _decode_10011101(self):
self._index += 1
return 'reserved (ARM MOVrr)'
def _decode_10011111(self):
self._index += 1
return 'reserved (WiMMX MOVrr)'
def _decode_1001nnnn(self):
# SW.startLine() << format("0x%02X ; vsp = r%u\n", Opcode, (Opcode & 0x0f));
opcode = self._bytecode_array[self._index]
self._index += 1
return 'vsp = r%u' % (opcode & 0x0f)
def _decode_10100nnn(self):
# SW.startLine() << format("0x%02X ; pop ", Opcode);
# PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4));
opcode = self._bytecode_array[self._index]
self._index += 1
return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07))
def _decode_10101nnn(self):
# SW.startLine() << format("0x%02X ; pop ", Opcode);
# PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4) | (1 << 14));
opcode = self._bytecode_array[self._index]
self._index += 1
return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07) | (1 << 14))
def _decode_10110000(self):
# SW.startLine() << format("0x%02X ; finish\n", Opcode);
self._index += 1
return 'finish'
def _decode_10110001_0000iiii(self):
# SW.startLine()
# << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1,
# ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop ");
# if (((Opcode1 & 0xf0) == 0x00) && Opcode1)
# PrintGPR((Opcode1 & 0x0f));
self._index += 1 # skip constant byte
op1 = self._bytecode_array[self._index]
self._index += 1
if (op1 & 0xf0) != 0 or op1 == 0x00:
return 'spare'
else:
return 'pop %s' % self._printGPR((op1 & 0x0f))
def _decode_10110010_uleb128(self):
# SmallVector<uint8_t, 4> ULEB;
# do { ULEB.push_back(Opcodes[OI ^ 3]); } while (Opcodes[OI++ ^ 3] & 0x80);
# uint64_t Value = 0;
# for (unsigned BI = 0, BE = ULEB.size(); BI != BE; ++BI)
# Value = Value | ((ULEB[BI] & 0x7f) << (7 * BI));
# OS << format("; vsp = vsp + %" PRIu64 "\n", 0x204 + (Value << 2));
self._index += 1 # skip constant byte
uleb_buffer = [self._bytecode_array[self._index]]
self._index += 1
while self._bytecode_array[self._index] & 0x80 == 0:
uleb_buffer.append(self._bytecode_array[self._index])
self._index += 1
value = 0
for b in reversed(uleb_buffer):
value = (value << 7) + (b & 0x7F)
return 'vsp = vsp + %u' % (0x204 + (value << 2))
def _decode_10110011_sssscccc(self):
# these two decoders are equal
return self._decode_11001001_sssscccc()
def _decode_101101nn(self):
return self._spare()
def _decode_10111nnn(self):
# SW.startLine() << format("0x%02X ; pop ", Opcode);
# PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 8), "d");
opcode = self._bytecode_array[self._index]
self._index += 1
return 'pop %s' % self._print_registers(self._calculate_range(8, opcode & 0x07), "d")
def _decode_11000110_sssscccc(self):
# SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
# uint8_t Start = ((Opcode1 & 0xf0) >> 4);
# uint8_t Count = ((Opcode1 & 0x0f) >> 0);
# PrintRegisters((((1 << (Count + 1)) - 1) << Start), "wR");
self._index += 1 # skip constant byte
op1 = self._bytecode_array[self._index]
self._index += 1
start = ((op1 & 0xf0) >> 4)
count = ((op1 & 0x0f) >> 0)
return 'pop %s' % self._print_registers(self._calculate_range(start, count), "wR")
def _decode_11000111_0000iiii(self):
# SW.startLine()
# << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1,
# ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop ");
# if ((Opcode1 & 0xf0) == 0x00 && Opcode1)
# PrintRegisters(Opcode1 & 0x0f, "wCGR");
self._index += 1 # skip constant byte
op1 = self._bytecode_array[self._index]
self._index += 1
if (op1 & 0xf0) != 0 or op1 == 0x00:
return 'spare'
else:
return 'pop %s' % self._print_registers(op1 & 0x0f, "wCGR")
def _decode_11001000_sssscccc(self):
# SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
# uint8_t Start = 16 + ((Opcode1 & 0xf0) >> 4);
# uint8_t Count = ((Opcode1 & 0x0f) >> 0);
# PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d");
self._index += 1 # skip constant byte
op1 = self._bytecode_array[self._index]
self._index += 1
start = 16 + ((op1 & 0xf0) >> 4)
count = ((op1 & 0x0f) >> 0)
return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d")
def _decode_11001001_sssscccc(self):
# SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1);
# uint8_t Start = ((Opcode1 & 0xf0) >> 4);
# uint8_t Count = ((Opcode1 & 0x0f) >> 0);
# PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d");
self._index += 1 # skip constant byte
op1 = self._bytecode_array[self._index]
self._index += 1
start = ((op1 & 0xf0) >> 4)
count = ((op1 & 0x0f) >> 0)
return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d")
def _decode_11001yyy(self):
return self._spare()
def _decode_11000nnn(self):
# SW.startLine() << format("0x%02X ; pop ", Opcode);
# PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 10), "wR");
opcode = self._bytecode_array[self._index]
self._index += 1
return 'pop %s' % self._print_registers(self._calculate_range(10, opcode & 0x07), "wR")
def _decode_11010nnn(self):
# these two decoders are equal
return self._decode_10111nnn()
def _decode_11xxxyyy(self):
return self._spare()
def _spare(self):
self._index += 1
return 'spare'
_DECODE_RECIPE_TYPE = namedtuple('_DECODE_RECIPE_TYPE', 'mask value handler')
ring = (
_DECODE_RECIPE_TYPE(mask=0xc0, value=0x00, handler=_decode_00xxxxxx),
_DECODE_RECIPE_TYPE(mask=0xc0, value=0x40, handler=_decode_01xxxxxx),
_DECODE_RECIPE_TYPE(mask=0xf0, value=0x80, handler=_decode_1000iiii_iiiiiiii),
_DECODE_RECIPE_TYPE(mask=0xff, value=0x9d, handler=_decode_10011101),
_DECODE_RECIPE_TYPE(mask=0xff, value=0x9f, handler=_decode_10011111),
_DECODE_RECIPE_TYPE(mask=0xf0, value=0x90, handler=_decode_1001nnnn),
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xa0, handler=_decode_10100nnn),
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xa8, handler=_decode_10101nnn),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb0, handler=_decode_10110000),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb1, handler=_decode_10110001_0000iiii),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb2, handler=_decode_10110010_uleb128),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xb3, handler=_decode_10110011_sssscccc),
_DECODE_RECIPE_TYPE(mask=0xfc, value=0xb4, handler=_decode_101101nn),
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xb8, handler=_decode_10111nnn),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc6, handler=_decode_11000110_sssscccc),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc7, handler=_decode_11000111_0000iiii),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc8, handler=_decode_11001000_sssscccc),
_DECODE_RECIPE_TYPE(mask=0xff, value=0xc9, handler=_decode_11001001_sssscccc),
_DECODE_RECIPE_TYPE(mask=0xc8, value=0xc8, handler=_decode_11001yyy),
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xc0, handler=_decode_11000nnn),
_DECODE_RECIPE_TYPE(mask=0xf8, value=0xd0, handler=_decode_11010nnn),
_DECODE_RECIPE_TYPE(mask=0xc0, value=0xc0, handler=_decode_11xxxyyy),
)
class MnemonicItem(object):
""" Single mnemonic item.
"""
def __init__(self, bytecode, mnemonic):
self.bytecode = bytecode
self.mnemonic = mnemonic
def __repr__(self):
return '%s ; %s' % (' '.join(['0x%02x' % x for x in self.bytecode]), self.mnemonic)

View File

@@ -1,209 +0,0 @@
# -------------------------------------------------------------------------------
# elftools: ehabi/ehabiinfo.py
#
# Decoder for ARM exception handler bytecode.
#
# LeadroyaL (leadroyal@qq.com)
# This code is in the public domain
# -------------------------------------------------------------------------------
from ..common.utils import struct_parse
from .decoder import EHABIBytecodeDecoder
from .constants import EHABI_INDEX_ENTRY_SIZE
from .structs import EHABIStructs
class EHABIInfo(object):
""" ARM exception handler abi information class.
Parameters:
arm_idx_section:
elf.sections.Section object, section which type is SHT_ARM_EXIDX.
little_endian:
bool, endianness of elf file.
"""
def __init__(self, arm_idx_section, little_endian):
self._arm_idx_section = arm_idx_section
self._struct = EHABIStructs(little_endian)
self._num_entry = None
def section_name(self):
return self._arm_idx_section.name
def section_offset(self):
return self._arm_idx_section['sh_offset']
def num_entry(self):
""" Number of exception handler entry in the section.
"""
if self._num_entry is None:
self._num_entry = self._arm_idx_section['sh_size'] // EHABI_INDEX_ENTRY_SIZE
return self._num_entry
def get_entry(self, n):
""" Get the exception handler entry at index #n. (EHABIEntry object or a subclass)
"""
if n >= self.num_entry():
raise IndexError('Invalid entry %d/%d' % (n, self._num_entry))
eh_index_entry_offset = self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE
eh_index_data = struct_parse(self._struct.EH_index_struct, self._arm_idx_section.stream, eh_index_entry_offset)
word0, word1 = eh_index_data['word0'], eh_index_data['word1']
if word0 & 0x80000000 != 0:
return CorruptEHABIEntry('Corrupt ARM exception handler table entry: %x' % n)
function_offset = arm_expand_prel31(word0, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE)
if word1 == 1:
# 0x1 means cannot unwind
return CannotUnwindEHABIEntry(function_offset)
elif word1 & 0x80000000 == 0:
# highest bit is zero, point to .ARM.extab data
eh_table_offset = arm_expand_prel31(word1, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + 4)
eh_index_data = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream, eh_table_offset)
word0 = eh_index_data['word0']
if word0 & 0x80000000 == 0:
# highest bit is one, generic model
return GenericEHABIEntry(function_offset, arm_expand_prel31(word0, eh_table_offset))
else:
# highest bit is one, arm compact model
# highest half must be 0b1000 for compact model
if word0 & 0x70000000 != 0:
return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n)
per_index = (word0 >> 24) & 0x7f
if per_index == 0:
# arm compact model 0
opcode = [(word0 & 0xFF0000) >> 16, (word0 & 0xFF00) >> 8, word0 & 0xFF]
return EHABIEntry(function_offset, per_index, opcode)
elif per_index == 1 or per_index == 2:
# arm compact model 1/2
more_word = (word0 >> 16) & 0xff
opcode = [(word0 >> 8) & 0xff, (word0 >> 0) & 0xff]
self._arm_idx_section.stream.seek(eh_table_offset + 4)
for i in range(more_word):
r = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream)['word0']
opcode.append((r >> 24) & 0xFF)
opcode.append((r >> 16) & 0xFF)
opcode.append((r >> 8) & 0xFF)
opcode.append((r >> 0) & 0xFF)
return EHABIEntry(function_offset, per_index, opcode, eh_table_offset=eh_table_offset)
else:
return CorruptEHABIEntry('Unknown ARM compact model %d at table entry: %x' % (per_index, n))
else:
# highest bit is one, compact model must be 0
if word1 & 0x7f000000 != 0:
return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n)
opcode = [(word1 & 0xFF0000) >> 16, (word1 & 0xFF00) >> 8, word1 & 0xFF]
return EHABIEntry(function_offset, 0, opcode)
class EHABIEntry(object):
""" Exception handler abi entry.
Accessible attributes:
function_offset:
Integer.
None if corrupt. (Reference: CorruptEHABIEntry)
personality:
Integer.
None if corrupt or unwindable. (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry)
0/1/2 for ARM personality compact format.
Others for generic personality.
bytecode_array:
Integer array.
None if corrupt or unwindable or generic personality.
(Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry)
eh_table_offset:
Integer.
Only entries who point to .ARM.extab contains this field, otherwise return None.
unwindable:
bool. Whether this function is unwindable.
corrupt:
bool. Whether this entry is corrupt.
"""
def __init__(self,
function_offset,
personality,
bytecode_array,
eh_table_offset=None,
unwindable=True,
corrupt=False):
self.function_offset = function_offset
self.personality = personality
self.bytecode_array = bytecode_array
self.eh_table_offset = eh_table_offset
self.unwindable = unwindable
self.corrupt = corrupt
def mnmemonic_array(self):
if self.bytecode_array:
return EHABIBytecodeDecoder(self.bytecode_array).mnemonic_array
else:
return None
def __repr__(self):
return "<EHABIEntry function_offset=0x%x, personality=%d, %sbytecode=%s>" % (
self.function_offset,
self.personality,
"eh_table_offset=0x%x, " % self.eh_table_offset if self.eh_table_offset else "",
self.bytecode_array)
class CorruptEHABIEntry(EHABIEntry):
""" This entry is corrupt. Attribute #corrupt will be True.
"""
def __init__(self, reason):
super(CorruptEHABIEntry, self).__init__(function_offset=None, personality=None, bytecode_array=None,
corrupt=True)
self.reason = reason
def __repr__(self):
return "<CorruptEHABIEntry reason=%s>" % self.reason
class CannotUnwindEHABIEntry(EHABIEntry):
""" This function cannot be unwind. Attribute #unwindable will be False.
"""
def __init__(self, function_offset):
super(CannotUnwindEHABIEntry, self).__init__(function_offset, personality=None, bytecode_array=None,
unwindable=False)
def __repr__(self):
return "<CannotUnwindEHABIEntry function_offset=0x%x>" % self.function_offset
class GenericEHABIEntry(EHABIEntry):
""" This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None.
"""
def __init__(self, function_offset, personality):
super(GenericEHABIEntry, self).__init__(function_offset, personality, bytecode_array=None)
def __repr__(self):
return "<GenericEHABIEntry function_offset=0x%x, personality=0x%x>" % (self.function_offset, self.personality)
def arm_expand_prel31(address, place):
"""
address: uint32
place: uint32
return: uint64
"""
location = address & 0x7fffffff
if location & 0x04000000:
location |= 0xffffffff80000000
return location + place & 0xffffffffffffffff

View File

@@ -1,47 +0,0 @@
# -------------------------------------------------------------------------------
# elftools: ehabi/structs.py
#
# Encapsulation of Construct structs for parsing an EHABI, adjusted for
# correct endianness and word-size.
#
# LeadroyaL (leadroyal@qq.com)
# This code is in the public domain
# -------------------------------------------------------------------------------
from ..construct import UBInt32, ULInt32, Struct
class EHABIStructs(object):
""" Accessible attributes:
EH_index_struct:
Struct of item in section .ARM.exidx.
EH_table_struct:
Struct of item in section .ARM.extab.
"""
def __init__(self, little_endian):
self._little_endian = little_endian
self._create_structs()
def _create_structs(self):
if self._little_endian:
self.EHABI_uint32 = ULInt32
else:
self.EHABI_uint32 = UBInt32
self._create_exception_handler_index()
self._create_exception_handler_table()
def _create_exception_handler_index(self):
self.EH_index_struct = Struct(
'EH_index',
self.EHABI_uint32('word0'),
self.EHABI_uint32('word1')
)
def _create_exception_handler_table(self):
self.EH_table_struct = Struct(
'EH_table',
self.EHABI_uint32('word0'),
)

View File

@@ -1,151 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/constants.py
#
# Constants and flags, placed into classes for namespacing
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
class E_FLAGS(object):
""" Flag values for the e_flags field of the ELF header
"""
EF_ARM_EABIMASK=0xFF000000
EF_ARM_EABI_VER1=0x01000000
EF_ARM_EABI_VER2=0x02000000
EF_ARM_EABI_VER3=0x03000000
EF_ARM_EABI_VER4=0x04000000
EF_ARM_EABI_VER5=0x05000000
EF_ARM_GCCMASK=0x00400FFF
EF_ARM_RELEXEC=0x01
EF_ARM_HASENTRY=0x02
EF_ARM_SYMSARESORTED=0x04
EF_ARM_DYNSYMSUSESEGIDX=0x8
EF_ARM_MAPSYMSFIRST=0x10
EF_ARM_LE8=0x00400000
EF_ARM_BE8=0x00800000
EF_ARM_ABI_FLOAT_SOFT=0x00000200
EF_ARM_ABI_FLOAT_HARD=0x00000400
EF_PPC64_ABI_V0=0
EF_PPC64_ABI_V1=1
EF_PPC64_ABI_V2=2
EF_MIPS_NOREORDER=1
EF_MIPS_PIC=2
EF_MIPS_CPIC=4
EF_MIPS_XGOT=8
EF_MIPS_64BIT_WHIRL=16
EF_MIPS_ABI2=32
EF_MIPS_ABI_ON32=64
EF_MIPS_32BITMODE = 256
EF_MIPS_NAN2008=1024
EF_MIPS_ARCH=0xf0000000
EF_MIPS_ARCH_1=0x00000000
EF_MIPS_ARCH_2=0x10000000
EF_MIPS_ARCH_3=0x20000000
EF_MIPS_ARCH_4=0x30000000
EF_MIPS_ARCH_5=0x40000000
EF_MIPS_ARCH_32=0x50000000
EF_MIPS_ARCH_64=0x60000000
EF_MIPS_ARCH_32R2=0x70000000
EF_MIPS_ARCH_64R2=0x80000000
class E_FLAGS_MASKS(object):
"""Masks to be used for convenience when working with E_FLAGS
This is a simplified approach that is also used by GNU binutils
readelf
"""
EFM_MIPS_ABI = 0x0000F000
EFM_MIPS_ABI_O32 = 0x00001000
EFM_MIPS_ABI_O64 = 0x00002000
EFM_MIPS_ABI_EABI32 = 0x00003000
EFM_MIPS_ABI_EABI64 = 0x00004000
class SHN_INDICES(object):
""" Special section indices
"""
SHN_UNDEF=0
SHN_LORESERVE=0xff00
SHN_LOPROC=0xff00
SHN_HIPROC=0xff1f
SHN_ABS=0xfff1
SHN_COMMON=0xfff2
SHN_HIRESERVE=0xffff
SHN_XINDEX=0xffff
class SH_FLAGS(object):
""" Flag values for the sh_flags field of section headers
"""
SHF_WRITE=0x1
SHF_ALLOC=0x2
SHF_EXECINSTR=0x4
SHF_MERGE=0x10
SHF_STRINGS=0x20
SHF_INFO_LINK=0x40
SHF_LINK_ORDER=0x80
SHF_OS_NONCONFORMING=0x100
SHF_GROUP=0x200
SHF_TLS=0x400
SHF_COMPRESSED=0x800
SHF_MASKOS=0x0ff00000
SHF_EXCLUDE=0x80000000
SHF_MASKPROC=0xf0000000
class RH_FLAGS(object):
""" Flag values for the DT_MIPS_FLAGS dynamic table entries
"""
RHF_NONE=0x00000000
RHF_QUICKSTART=0x00000001
RHF_NOTPOT=0x00000002
RHF_NO_LIBRARY_REPLACEMENT=0x00000004
RHF_NO_MOVE=0x00000008
RHF_SGI_ONLY=0x00000010
RHF_GUARANTEE_INIT=0x00000020
RHF_DELTA_C_PLUS_PLUS=0x00000040
RHF_GUARANTEE_START_INIT=0x00000080
RHF_PIXIE=0x00000100
RHF_DEFAULT_DELAY_LOAD=0x00000200
RHF_REQUICKSTART=0x00000400
RHF_REQUICKSTARTED=0x00000800
RHF_CORD=0x00001000
RHF_NO_UNRES_UNDEF=0x00002000
RHF_RLD_ORDER_SAFE=0x00004000
class P_FLAGS(object):
""" Flag values for the p_flags field of program headers
"""
PF_X=0x1
PF_W=0x2
PF_R=0x4
PF_MASKOS=0x00FF0000
PF_MASKPROC=0xFF000000
# symbol info flags for entries
# in the .SUNW_syminfo section
class SUNW_SYMINFO_FLAGS(object):
""" Flags for the si_flags field of entries
in the .SUNW_syminfo section
"""
SYMINFO_FLG_DIRECT=0x1
SYMINFO_FLG_FILTER=0x2
SYMINFO_FLG_COPY=0x4
SYMINFO_FLG_LAZYLOAD=0x8
SYMINFO_FLG_DIRECTBIND=0x10
SYMINFO_FLG_NOEXTDIRECT=0x20
SYMINFO_FLG_AUXILIARY=0x40
SYMINFO_FLG_INTERPOSE=0x80
SYMINFO_FLG_CAP=0x100
SYMINFO_FLG_DEFERRED=0x200
class VER_FLAGS(object):
VER_FLG_BASE=0x1
VER_FLG_WEAK=0x2
VER_FLG_INFO=0x4

View File

@@ -1,939 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/descriptions.py
#
# Textual descriptions of the various enums and flags of ELF
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from .enums import (
ENUM_D_TAG, ENUM_E_VERSION, ENUM_P_TYPE_BASE, ENUM_SH_TYPE_BASE,
ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64,
ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
ENUM_RELOC_TYPE_MIPS, ENUM_ATTR_TAG_ARM, ENUM_DT_FLAGS, ENUM_DT_FLAGS_1)
from .constants import (
P_FLAGS, RH_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS)
from ..common.py3compat import bytes2hex, iteritems
def describe_ei_class(x):
return _DESCR_EI_CLASS.get(x, _unknown)
def describe_ei_data(x):
return _DESCR_EI_DATA.get(x, _unknown)
def describe_ei_version(x):
s = '%d' % ENUM_E_VERSION[x]
if x == 'EV_CURRENT':
s += ' (current)'
return s
def describe_ei_osabi(x):
return _DESCR_EI_OSABI.get(x, _unknown)
def describe_e_type(x, elffile=None):
if elffile is not None and x == 'ET_DYN':
# Detect whether this is a normal SO or a PIE executable
dynamic = elffile.get_section_by_name('.dynamic')
for t in dynamic.iter_tags('DT_FLAGS_1'):
if t.entry.d_val & ENUM_DT_FLAGS_1['DF_1_PIE']:
return 'DYN (Position-Independent Executable file)'
return _DESCR_E_TYPE.get(x, _unknown)
def describe_e_machine(x):
return _DESCR_E_MACHINE.get(x, _unknown)
def describe_e_version_numeric(x):
return '0x%x' % ENUM_E_VERSION[x]
def describe_p_type(x):
if x in _DESCR_P_TYPE:
return _DESCR_P_TYPE.get(x)
elif x >= ENUM_P_TYPE_BASE['PT_LOOS'] and x <= ENUM_P_TYPE_BASE['PT_HIOS']:
return 'LOOS+%lx' % (x - ENUM_P_TYPE_BASE['PT_LOOS'])
else:
return _unknown
def describe_p_flags(x):
s = ''
for flag in (P_FLAGS.PF_R, P_FLAGS.PF_W, P_FLAGS.PF_X):
s += _DESCR_P_FLAGS[flag] if (x & flag) else ' '
return s
def describe_rh_flags(x):
return ' '.join(
_DESCR_RH_FLAGS[flag]
for flag in (RH_FLAGS.RHF_NONE, RH_FLAGS.RHF_QUICKSTART,
RH_FLAGS.RHF_NOTPOT, RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT,
RH_FLAGS.RHF_NO_MOVE, RH_FLAGS.RHF_SGI_ONLY,
RH_FLAGS.RHF_GUARANTEE_INIT,
RH_FLAGS.RHF_DELTA_C_PLUS_PLUS,
RH_FLAGS.RHF_GUARANTEE_START_INIT, RH_FLAGS.RHF_PIXIE,
RH_FLAGS.RHF_DEFAULT_DELAY_LOAD,
RH_FLAGS.RHF_REQUICKSTART, RH_FLAGS.RHF_REQUICKSTARTED,
RH_FLAGS.RHF_CORD, RH_FLAGS.RHF_NO_UNRES_UNDEF,
RH_FLAGS.RHF_RLD_ORDER_SAFE)
if x & flag)
def describe_sh_type(x):
if x in _DESCR_SH_TYPE:
return _DESCR_SH_TYPE.get(x)
elif (x >= ENUM_SH_TYPE_BASE['SHT_LOOS'] and
x < ENUM_SH_TYPE_BASE['SHT_GNU_versym']):
return 'loos+0x%lx' % (x - ENUM_SH_TYPE_BASE['SHT_LOOS'])
else:
return _unknown
def describe_sh_flags(x):
s = ''
for flag in (
SH_FLAGS.SHF_WRITE, SH_FLAGS.SHF_ALLOC, SH_FLAGS.SHF_EXECINSTR,
SH_FLAGS.SHF_MERGE, SH_FLAGS.SHF_STRINGS, SH_FLAGS.SHF_INFO_LINK,
SH_FLAGS.SHF_LINK_ORDER, SH_FLAGS.SHF_OS_NONCONFORMING,
SH_FLAGS.SHF_GROUP, SH_FLAGS.SHF_TLS, SH_FLAGS.SHF_MASKOS,
SH_FLAGS.SHF_EXCLUDE):
s += _DESCR_SH_FLAGS[flag] if (x & flag) else ''
if not x & SH_FLAGS.SHF_EXCLUDE:
if x & SH_FLAGS.SHF_MASKPROC:
s += 'p'
return s
def describe_symbol_type(x):
return _DESCR_ST_INFO_TYPE.get(x, _unknown)
def describe_symbol_bind(x):
return _DESCR_ST_INFO_BIND.get(x, _unknown)
def describe_symbol_visibility(x):
return _DESCR_ST_VISIBILITY.get(x, _unknown)
def describe_symbol_local(x):
return '[<localentry>: ' + str(1 << x) + ']'
def describe_symbol_other(x):
vis = describe_symbol_visibility(x['visibility'])
if x['local'] > 1 and x['local'] < 7:
return vis + ' ' + describe_symbol_local(x['local'])
return vis
def describe_symbol_shndx(x):
return _DESCR_ST_SHNDX.get(x, '%3s' % x)
def describe_reloc_type(x, elffile):
arch = elffile.get_machine_arch()
if arch == 'x86':
return _DESCR_RELOC_TYPE_i386.get(x, _unknown)
elif arch == 'x64':
return _DESCR_RELOC_TYPE_x64.get(x, _unknown)
elif arch == 'ARM':
return _DESCR_RELOC_TYPE_ARM.get(x, _unknown)
elif arch == 'AArch64':
return _DESCR_RELOC_TYPE_AARCH64.get(x, _unknown)
elif arch == '64-bit PowerPC':
return _DESCR_RELOC_TYPE_PPC64.get(x, _unknown)
elif arch == 'MIPS':
return _DESCR_RELOC_TYPE_MIPS.get(x, _unknown)
else:
return 'unrecognized: %-7x' % (x & 0xFFFFFFFF)
def describe_dyn_tag(x):
return _DESCR_D_TAG.get(x, _unknown)
def describe_dt_flags(x):
return ' '.join(key[3:] for key, val in
sorted(ENUM_DT_FLAGS.items(), key=lambda t: t[1]) if x & val)
def describe_dt_flags_1(x):
return ' '.join(key[5:] for key, val in
sorted(ENUM_DT_FLAGS_1.items(), key=lambda t: t[1]) if x & val)
def describe_syminfo_flags(x):
return ''.join(_DESCR_SYMINFO_FLAGS[flag] for flag in (
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_CAP,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECT,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_FILTER,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_AUXILIARY,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECTBIND,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_COPY,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_LAZYLOAD,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_NOEXTDIRECT,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_INTERPOSE,
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DEFERRED) if x & flag)
def describe_symbol_boundto(x):
return _DESCR_SYMINFO_BOUNDTO.get(x, '%3s' % x)
def describe_ver_flags(x):
return ' | '.join(_DESCR_VER_FLAGS[flag] for flag in (
VER_FLAGS.VER_FLG_WEAK,
VER_FLAGS.VER_FLG_BASE,
VER_FLAGS.VER_FLG_INFO) if x & flag)
def describe_note(x):
n_desc = x['n_desc']
desc = ''
if x['n_type'] == 'NT_GNU_ABI_TAG':
if x['n_name'] == 'Android':
desc = '\n description data: %s ' % bytes2hex(x['n_descdata'])
else:
desc = '\n OS: %s, ABI: %d.%d.%d' % (
_DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown),
n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny'])
elif x['n_type'] == 'NT_GNU_BUILD_ID':
desc = '\n Build ID: %s' % (n_desc)
elif x['n_type'] == 'NT_GNU_GOLD_VERSION':
desc = '\n Version: %s' % (n_desc)
elif x['n_type'] == 'NT_GNU_PROPERTY_TYPE_0':
desc = '\n Properties: ' + describe_note_gnu_properties(x['n_desc'])
else:
desc = '\n description data: {}'.format(bytes2hex(n_desc))
if x['n_type'] == 'NT_GNU_ABI_TAG' and x['n_name'] == 'Android':
note_type = 'NT_VERSION'
note_type_desc = 'version'
else:
note_type = (x['n_type'] if isinstance(x['n_type'], str)
else 'Unknown note type:')
note_type_desc = ('0x%.8x' % x['n_type']
if isinstance(x['n_type'], int) else
_DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown))
return '%s (%s)%s' % (note_type, note_type_desc, desc)
def describe_attr_tag_arm(tag, val, extra):
idx = ENUM_ATTR_TAG_ARM[tag] - 1
d_entry = _DESCR_ATTR_VAL_ARM[idx]
if d_entry is None:
if tag == 'TAG_COMPATIBILITY':
return (_DESCR_ATTR_TAG_ARM[tag]
+ 'flag = %d, vendor = %s' % (val, extra))
elif tag == 'TAG_ALSO_COMPATIBLE_WITH':
if val.tag == 'TAG_CPU_ARCH':
return _DESCR_ATTR_TAG_ARM[tag] + d_entry[val]
else:
return _DESCR_ATTR_TAG_ARM[tag] + '??? (%d)' % val.tag
elif tag == 'TAG_NODEFAULTS':
return _DESCR_ATTR_TAG_ARM[tag] + 'True'
s = _DESCR_ATTR_TAG_ARM[tag]
s += '"%s"' % val if val else ''
return s
else:
return _DESCR_ATTR_TAG_ARM[tag] + d_entry[val]
def describe_note_gnu_property_x86_feature_1(value):
descs = []
for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS:
if value & mask:
descs.append(desc)
return 'x86 feature: ' + ', '.join(descs)
def describe_note_gnu_properties(properties):
descriptions = []
for prop in properties:
t, d, sz = prop.pr_type, prop.pr_data, prop.pr_datasz
if t == 'GNU_PROPERTY_STACK_SIZE':
if type(d) is int:
prop_desc = 'stack size: 0x%x' % d
else:
prop_desc = 'stack size: <corrupt length: 0x%x>' % sz
elif t == 'GNU_PROPERTY_NO_COPY_ON_PROTECTED':
if sz != 0:
prop_desc = ' <corrupt length: 0x%x>' % sz
else:
prop_desc = 'no copy on protected'
elif t == 'GNU_PROPERTY_X86_FEATURE_1_AND':
if sz != 4:
prop_desc = ' <corrupt length: 0x%x>' % sz
else:
prop_desc = describe_note_gnu_property_x86_feature_1(d)
elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
prop_desc = '<application-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
else:
prop_desc = '<unknown type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
descriptions.append(prop_desc)
return '\n '.join(descriptions)
#-------------------------------------------------------------------------------
_unknown = '<unknown>'
_DESCR_EI_CLASS = dict(
ELFCLASSNONE='none',
ELFCLASS32='ELF32',
ELFCLASS64='ELF64',
)
_DESCR_EI_DATA = dict(
ELFDATANONE='none',
ELFDATA2LSB="2's complement, little endian",
ELFDATA2MSB="2's complement, big endian",
)
_DESCR_EI_OSABI = dict(
ELFOSABI_SYSV='UNIX - System V',
ELFOSABI_HPUX='UNIX - HP-UX',
ELFOSABI_NETBSD='UNIX - NetBSD',
ELFOSABI_LINUX='UNIX - Linux',
ELFOSABI_HURD='UNIX - GNU/Hurd',
ELFOSABI_SOLARIS='UNIX - Solaris',
ELFOSABI_AIX='UNIX - AIX',
ELFOSABI_IRIX='UNIX - IRIX',
ELFOSABI_FREEBSD='UNIX - FreeBSD',
ELFOSABI_TRU64='UNIX - TRU64',
ELFOSABI_MODESTO='Novell - Modesto',
ELFOSABI_OPENBSD='UNIX - OpenBSD',
ELFOSABI_OPENVMS='VMS - OpenVMS',
ELFOSABI_NSK='HP - Non-Stop Kernel',
ELFOSABI_AROS='AROS',
ELFOSABI_FENIXOS='Fenix OS',
ELFOSABI_CLOUD='Nuxi - CloudABI',
ELFOSABI_SORTIX='Sortix',
ELFOSABI_ARM_AEABI='ARM - EABI',
ELFOSABI_ARM='ARM - ABI',
ELFOSABI_CELL_LV2='CellOS Lv-2',
ELFOSABI_STANDALONE='Standalone App',
)
_DESCR_E_TYPE = dict(
ET_NONE='NONE (None)',
ET_REL='REL (Relocatable file)',
ET_EXEC='EXEC (Executable file)',
ET_DYN='DYN (Shared object file)',
ET_CORE='CORE (Core file)',
PROC_SPECIFIC='Processor Specific',
)
_DESCR_E_MACHINE = dict(
EM_NONE='None',
EM_M32='WE32100',
EM_SPARC='Sparc',
EM_386='Intel 80386',
EM_68K='MC68000',
EM_88K='MC88000',
EM_860='Intel 80860',
EM_MIPS='MIPS R3000',
EM_S370='IBM System/370',
EM_MIPS_RS4_BE='MIPS 4000 big-endian',
EM_IA_64='Intel IA-64',
EM_X86_64='Advanced Micro Devices X86-64',
EM_AVR='Atmel AVR 8-bit microcontroller',
EM_ARM='ARM',
EM_AARCH64='AArch64',
EM_BLACKFIN='Analog Devices Blackfin',
EM_PPC='PowerPC',
EM_PPC64='PowerPC64',
RESERVED='RESERVED',
)
_DESCR_P_TYPE = dict(
PT_NULL='NULL',
PT_LOAD='LOAD',
PT_DYNAMIC='DYNAMIC',
PT_INTERP='INTERP',
PT_NOTE='NOTE',
PT_SHLIB='SHLIB',
PT_PHDR='PHDR',
PT_GNU_EH_FRAME='GNU_EH_FRAME',
PT_GNU_STACK='GNU_STACK',
PT_GNU_RELRO='GNU_RELRO',
PT_GNU_PROPERTY='GNU_PROPERTY',
PT_ARM_ARCHEXT='ARM_ARCHEXT',
PT_ARM_EXIDX='EXIDX', # binutils calls this EXIDX, not ARM_EXIDX
PT_AARCH64_ARCHEXT='AARCH64_ARCHEXT',
PT_AARCH64_UNWIND='AARCH64_UNWIND',
PT_TLS='TLS',
PT_MIPS_ABIFLAGS='ABIFLAGS'
)
_DESCR_P_FLAGS = {
P_FLAGS.PF_X: 'E',
P_FLAGS.PF_R: 'R',
P_FLAGS.PF_W: 'W',
}
_DESCR_SH_TYPE = dict(
SHT_NULL='NULL',
SHT_PROGBITS='PROGBITS',
SHT_SYMTAB='SYMTAB',
SHT_STRTAB='STRTAB',
SHT_RELA='RELA',
SHT_HASH='HASH',
SHT_DYNAMIC='DYNAMIC',
SHT_NOTE='NOTE',
SHT_NOBITS='NOBITS',
SHT_REL='REL',
SHT_SHLIB='SHLIB',
SHT_DYNSYM='DYNSYM',
SHT_INIT_ARRAY='INIT_ARRAY',
SHT_FINI_ARRAY='FINI_ARRAY',
SHT_PREINIT_ARRAY='PREINIT_ARRAY',
SHT_GNU_ATTRIBUTES='GNU_ATTRIBUTES',
SHT_GNU_HASH='GNU_HASH',
SHT_GROUP='GROUP',
SHT_SYMTAB_SHNDX='SYMTAB SECTION INDICIES',
SHT_GNU_verdef='VERDEF',
SHT_GNU_verneed='VERNEED',
SHT_GNU_versym='VERSYM',
SHT_GNU_LIBLIST='GNU_LIBLIST',
SHT_ARM_EXIDX='ARM_EXIDX',
SHT_ARM_PREEMPTMAP='ARM_PREEMPTMAP',
SHT_ARM_ATTRIBUTES='ARM_ATTRIBUTES',
SHT_ARM_DEBUGOVERLAY='ARM_DEBUGOVERLAY',
SHT_MIPS_LIBLIST='MIPS_LIBLIST',
SHT_MIPS_DEBUG='MIPS_DEBUG',
SHT_MIPS_REGINFO='MIPS_REGINFO',
SHT_MIPS_PACKAGE='MIPS_PACKAGE',
SHT_MIPS_PACKSYM='MIPS_PACKSYM',
SHT_MIPS_RELD='MIPS_RELD',
SHT_MIPS_IFACE='MIPS_IFACE',
SHT_MIPS_CONTENT='MIPS_CONTENT',
SHT_MIPS_OPTIONS='MIPS_OPTIONS',
SHT_MIPS_SHDR='MIPS_SHDR',
SHT_MIPS_FDESC='MIPS_FDESC',
SHT_MIPS_EXTSYM='MIPS_EXTSYM',
SHT_MIPS_DENSE='MIPS_DENSE',
SHT_MIPS_PDESC='MIPS_PDESC',
SHT_MIPS_LOCSYM='MIPS_LOCSYM',
SHT_MIPS_AUXSYM='MIPS_AUXSYM',
SHT_MIPS_OPTSYM='MIPS_OPTSYM',
SHT_MIPS_LOCSTR='MIPS_LOCSTR',
SHT_MIPS_LINE='MIPS_LINE',
SHT_MIPS_RFDESC='MIPS_RFDESC',
SHT_MIPS_DELTASYM='MIPS_DELTASYM',
SHT_MIPS_DELTAINST='MIPS_DELTAINST',
SHT_MIPS_DELTACLASS='MIPS_DELTACLASS',
SHT_MIPS_DWARF='MIPS_DWARF',
SHT_MIPS_DELTADECL='MIPS_DELTADECL',
SHT_MIPS_SYMBOL_LIB='MIPS_SYMBOL_LIB',
SHT_MIPS_EVENTS='MIPS_EVENTS',
SHT_MIPS_TRANSLATE='MIPS_TRANSLATE',
SHT_MIPS_PIXIE='MIPS_PIXIE',
SHT_MIPS_XLATE='MIPS_XLATE',
SHT_MIPS_XLATE_DEBUG='MIPS_XLATE_DEBUG',
SHT_MIPS_WHIRL='MIPS_WHIRL',
SHT_MIPS_EH_REGION='MIPS_EH_REGION',
SHT_MIPS_XLATE_OLD='MIPS_XLATE_OLD',
SHT_MIPS_PDR_EXCEPTION='MIPS_PDR_EXCEPTION',
SHT_MIPS_ABIFLAGS='MIPS_ABIFLAGS',
)
_DESCR_SH_FLAGS = {
SH_FLAGS.SHF_WRITE: 'W',
SH_FLAGS.SHF_ALLOC: 'A',
SH_FLAGS.SHF_EXECINSTR: 'X',
SH_FLAGS.SHF_MERGE: 'M',
SH_FLAGS.SHF_STRINGS: 'S',
SH_FLAGS.SHF_INFO_LINK: 'I',
SH_FLAGS.SHF_LINK_ORDER: 'L',
SH_FLAGS.SHF_OS_NONCONFORMING: 'O',
SH_FLAGS.SHF_GROUP: 'G',
SH_FLAGS.SHF_TLS: 'T',
SH_FLAGS.SHF_MASKOS: 'o',
SH_FLAGS.SHF_EXCLUDE: 'E',
}
_DESCR_RH_FLAGS = {
RH_FLAGS.RHF_NONE: 'NONE',
RH_FLAGS.RHF_QUICKSTART: 'QUICKSTART',
RH_FLAGS.RHF_NOTPOT: 'NOTPOT',
RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT: 'NO_LIBRARY_REPLACEMENT',
RH_FLAGS.RHF_NO_MOVE: 'NO_MOVE',
RH_FLAGS.RHF_SGI_ONLY: 'SGI_ONLY',
RH_FLAGS.RHF_GUARANTEE_INIT: 'GUARANTEE_INIT',
RH_FLAGS.RHF_DELTA_C_PLUS_PLUS: 'DELTA_C_PLUS_PLUS',
RH_FLAGS.RHF_GUARANTEE_START_INIT: 'GUARANTEE_START_INIT',
RH_FLAGS.RHF_PIXIE: 'PIXIE',
RH_FLAGS.RHF_DEFAULT_DELAY_LOAD: 'DEFAULT_DELAY_LOAD',
RH_FLAGS.RHF_REQUICKSTART: 'REQUICKSTART',
RH_FLAGS.RHF_REQUICKSTARTED: 'REQUICKSTARTED',
RH_FLAGS.RHF_CORD: 'CORD',
RH_FLAGS.RHF_NO_UNRES_UNDEF: 'NO_UNRES_UNDEF',
RH_FLAGS.RHF_RLD_ORDER_SAFE: 'RLD_ORDER_SAFE',
}
_DESCR_ST_INFO_TYPE = dict(
STT_NOTYPE='NOTYPE',
STT_OBJECT='OBJECT',
STT_FUNC='FUNC',
STT_SECTION='SECTION',
STT_FILE='FILE',
STT_COMMON='COMMON',
STT_TLS='TLS',
STT_NUM='NUM',
STT_RELC='RELC',
STT_SRELC='SRELC',
)
_DESCR_ST_INFO_BIND = dict(
STB_LOCAL='LOCAL',
STB_GLOBAL='GLOBAL',
STB_WEAK='WEAK',
)
_DESCR_ST_VISIBILITY = dict(
STV_DEFAULT='DEFAULT',
STV_INTERNAL='INTERNAL',
STV_HIDDEN='HIDDEN',
STV_PROTECTED='PROTECTED',
STV_EXPORTED='EXPORTED',
STV_SINGLETON='SINGLETON',
STV_ELIMINATE='ELIMINATE',
)
_DESCR_ST_SHNDX = dict(
SHN_UNDEF='UND',
SHN_ABS='ABS',
SHN_COMMON='COM',
)
_DESCR_SYMINFO_FLAGS = {
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECT: 'D',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECTBIND: 'B',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_COPY: 'C',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_LAZYLOAD: 'L',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_NOEXTDIRECT: 'N',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_AUXILIARY: 'A',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_FILTER: 'F',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_INTERPOSE: 'I',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_CAP: 'S',
SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DEFERRED: 'P',
}
_DESCR_SYMINFO_BOUNDTO = dict(
SYMINFO_BT_SELF='<self>',
SYMINFO_BT_PARENT='<parent>',
SYMINFO_BT_NONE='',
SYMINFO_BT_EXTERN='<extern>',
)
_DESCR_VER_FLAGS = {
0: '',
VER_FLAGS.VER_FLG_BASE: 'BASE',
VER_FLAGS.VER_FLG_WEAK: 'WEAK',
VER_FLAGS.VER_FLG_INFO: 'INFO',
}
# PT_NOTE section types
_DESCR_NOTE_N_TYPE = dict(
NT_GNU_ABI_TAG='ABI version tag',
NT_GNU_HWCAP='DSO-supplied software HWCAP info',
NT_GNU_BUILD_ID='unique build ID bitstring',
NT_GNU_GOLD_VERSION='gold version',
NT_GNU_PROPERTY_TYPE_0='program properties'
)
# Values in GNU .note.ABI-tag notes (n_type=='NT_GNU_ABI_TAG')
_DESCR_NOTE_ABI_TAG_OS = dict(
ELF_NOTE_OS_LINUX='Linux',
ELF_NOTE_OS_GNU='GNU',
ELF_NOTE_OS_SOLARIS2='Solaris 2',
ELF_NOTE_OS_FREEBSD='FreeBSD',
ELF_NOTE_OS_NETBSD='NetBSD',
ELF_NOTE_OS_SYLLABLE='Syllable',
)
# Values in GNU .note.gnu.property notes (n_type=='NT_GNU_PROPERTY_TYPE_0') have
# different formats which need to be parsed/described differently
_DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC=0xc0000000
_DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC=0xdfffffff
_DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER=0xe0000000
_DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER=0xffffffff
# Bit masks for GNU_PROPERTY_X86_FEATURE_1_xxx flags in the form
# (mask, flag_description) in the desired output order
_DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS = (
(1, 'IBT'),
(2, 'SHSTK'),
(4, 'LAM_U48'),
(8, 'LAM_U57'),
)
def _reverse_dict(d, low_priority=()):
"""
This is a tiny helper function to "reverse" the keys/values of a dictionary
provided in the first argument, i.e. {k: v} becomes {v: k}.
The second argument (optional) provides primitive control over what to do in
the case of conflicting values - if a value is present in this list, it will
not override any other entries of the same value.
"""
out = {}
for k, v in iteritems(d):
if v in out and k in low_priority:
continue
out[v] = k
return out
_DESCR_RELOC_TYPE_i386 = _reverse_dict(ENUM_RELOC_TYPE_i386)
_DESCR_RELOC_TYPE_x64 = _reverse_dict(ENUM_RELOC_TYPE_x64)
_DESCR_RELOC_TYPE_ARM = _reverse_dict(ENUM_RELOC_TYPE_ARM)
_DESCR_RELOC_TYPE_AARCH64 = _reverse_dict(ENUM_RELOC_TYPE_AARCH64)
_DESCR_RELOC_TYPE_PPC64 = _reverse_dict(ENUM_RELOC_TYPE_PPC64)
_DESCR_RELOC_TYPE_MIPS = _reverse_dict(ENUM_RELOC_TYPE_MIPS)
_low_priority_D_TAG = (
# these are 'meta-tags' marking semantics of numeric ranges of the enum
# they should not override other tags with the same numbers
# see https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-42444.html
'DT_LOOS',
'DT_HIOS',
'DT_LOPROC',
'DT_HIPROC',
'DT_ENCODING',
)
_DESCR_D_TAG = _reverse_dict(ENUM_D_TAG, low_priority=_low_priority_D_TAG)
_DESCR_ATTR_TAG_ARM = dict(
TAG_FILE='File Attributes',
TAG_SECTION='Section Attributes:',
TAG_SYMBOL='Symbol Attributes:',
TAG_CPU_RAW_NAME='Tag_CPU_raw_name: ',
TAG_CPU_NAME='Tag_CPU_name: ',
TAG_CPU_ARCH='Tag_CPU_arch: ',
TAG_CPU_ARCH_PROFILE='Tag_CPU_arch_profile: ',
TAG_ARM_ISA_USE='Tag_ARM_ISA_use: ',
TAG_THUMB_ISA_USE='Tag_Thumb_ISA_use: ',
TAG_FP_ARCH='Tag_FP_arch: ',
TAG_WMMX_ARCH='Tag_WMMX_arch: ',
TAG_ADVANCED_SIMD_ARCH='Tag_Advanced_SIMD_arch: ',
TAG_PCS_CONFIG='Tag_PCS_config: ',
TAG_ABI_PCS_R9_USE='Tag_ABI_PCS_R9_use: ',
TAG_ABI_PCS_RW_DATA='Tag_ABI_PCS_RW_use: ',
TAG_ABI_PCS_RO_DATA='Tag_ABI_PCS_RO_use: ',
TAG_ABI_PCS_GOT_USE='Tag_ABI_PCS_GOT_use: ',
TAG_ABI_PCS_WCHAR_T='Tag_ABI_PCS_wchar_t: ',
TAG_ABI_FP_ROUNDING='Tag_ABI_FP_rounding: ',
TAG_ABI_FP_DENORMAL='Tag_ABI_FP_denormal: ',
TAG_ABI_FP_EXCEPTIONS='Tag_ABI_FP_exceptions: ',
TAG_ABI_FP_USER_EXCEPTIONS='Tag_ABI_FP_user_exceptions: ',
TAG_ABI_FP_NUMBER_MODEL='Tag_ABI_FP_number_model: ',
TAG_ABI_ALIGN_NEEDED='Tag_ABI_align_needed: ',
TAG_ABI_ALIGN_PRESERVED='Tag_ABI_align_preserved: ',
TAG_ABI_ENUM_SIZE='Tag_ABI_enum_size: ',
TAG_ABI_HARDFP_USE='Tag_ABI_HardFP_use: ',
TAG_ABI_VFP_ARGS='Tag_ABI_VFP_args: ',
TAG_ABI_WMMX_ARGS='Tag_ABI_WMMX_args: ',
TAG_ABI_OPTIMIZATION_GOALS='Tag_ABI_optimization_goals: ',
TAG_ABI_FP_OPTIMIZATION_GOALS='Tag_ABI_FP_optimization_goals: ',
TAG_COMPATIBILITY='Tag_compatibility: ',
TAG_CPU_UNALIGNED_ACCESS='Tag_CPU_unaligned_access: ',
TAG_FP_HP_EXTENSION='Tag_FP_HP_extension: ',
TAG_ABI_FP_16BIT_FORMAT='Tag_ABI_FP_16bit_format: ',
TAG_MPEXTENSION_USE='Tag_MPextension_use: ',
TAG_DIV_USE='Tag_DIV_use: ',
TAG_NODEFAULTS='Tag_nodefaults: ',
TAG_ALSO_COMPATIBLE_WITH='Tag_also_compatible_with: ',
TAG_T2EE_USE='Tag_T2EE_use: ',
TAG_CONFORMANCE='Tag_conformance: ',
TAG_VIRTUALIZATION_USE='Tag_Virtualization_use: ',
TAG_MPEXTENSION_USE_OLD='Tag_MPextension_use_old: ',
)
_DESCR_ATTR_VAL_ARM = [
None, #1
None, #2
None, #3
None, #4
None, #5
{ #6 TAG_CPU_ARCH
0 : 'Pre-v4',
1 : 'v4',
2 : 'v4T',
3 : 'v5T',
4 : 'v5TE',
5 : 'v5TEJ',
6 : 'v6',
7 : 'v6KZ',
8 : 'v6T2',
9 : 'v6K',
10: 'v7',
11: 'v6-M',
12: 'v6S-M',
13: 'v7E-M',
14: 'v8',
15: 'v8-R',
16: 'v8-M.baseline',
17: 'v8-M.mainline',
},
{ #7 TAG_CPU_ARCH_PROFILE
0x00: 'None',
0x41: 'Application',
0x52: 'Realtime',
0x4D: 'Microcontroller',
0x53: 'Application or Realtime',
},
{ #8 TAG_ARM_ISA
0: 'No',
1: 'Yes',
},
{ #9 TAG_THUMB_ISA
0: 'No',
1: 'Thumb-1',
2: 'Thumb-2',
3: 'Yes',
},
{ #10 TAG_FP_ARCH
0: 'No',
1: 'VFPv1',
2: 'VFPv2 ',
3: 'VFPv3',
4: 'VFPv3-D16',
5: 'VFPv4',
6: 'VFPv4-D16',
7: 'FP ARM v8',
8: 'FPv5/FP-D16 for ARMv8',
},
{ #11 TAG_WMMX_ARCH
0: 'No',
1: 'WMMXv1',
2: 'WMMXv2',
},
{ #12 TAG_ADVANCED_SIMD_ARCH
0: 'No',
1: 'NEONv1',
2: 'NEONv1 with Fused-MAC',
3: 'NEON for ARMv8',
4: 'NEON for ARMv8.1',
},
{ #13 TAG_PCS_CONFIG
0: 'None',
1: 'Bare platform',
2: 'Linux application',
3: 'Linux DSO',
4: 'PalmOS 2004',
5: 'PalmOS (reserved)',
6: 'SymbianOS 2004',
7: 'SymbianOS (reserved)',
},
{ #14 TAG_ABI_PCS_R9_USE
0: 'v6',
1: 'SB',
2: 'TLS',
3: 'Unused',
},
{ #15 TAG_ABI_PCS_RW_DATA
0: 'Absolute',
1: 'PC-relative',
2: 'SB-relative',
3: 'None',
},
{ #16 TAG_ABI_PCS_RO_DATA
0: 'Absolute',
1: 'PC-relative',
2: 'None',
},
{ #17 TAG_ABI_PCS_GOT_USE
0: 'None',
1: 'direct',
2: 'GOT-indirect',
},
{ #18 TAG_ABI_PCS_WCHAR_T
0: 'None',
1: '??? 1',
2: '2',
3: '??? 3',
4: '4',
},
{ #19 TAG_ABI_FP_ROUNDING
0: 'Unused',
1: 'Needed',
},
{ #20 TAG_ABI_FP_DENORMAL
0: 'Unused',
1: 'Needed',
2: 'Sign only',
},
{ #21 TAG_ABI_FP_EXCEPTIONS
0: 'Unused',
1: 'Needed',
},
{ #22 TAG_ABI_FP_USER_EXCEPTIONS
0: 'Unused',
1: 'Needed',
},
{ #23 TAG_ABI_FP_NUMBER_MODEL
0: 'Unused',
1: 'Finite',
2: 'RTABI',
3: 'IEEE 754',
},
{ #24 TAG_ABI_ALIGN_NEEDED
0: 'None',
1: '8-byte',
2: '4-byte',
3: '??? 3',
},
{ #25 TAG_ABI_ALIGN_PRESERVED
0: 'None',
1: '8-byte, except leaf SP',
2: '8-byte',
3: '??? 3',
},
{ #26 TAG_ABI_ENUM_SIZE
0: 'Unused',
1: 'small',
2: 'int',
3: 'forced to int',
},
{ #27 TAG_ABI_HARDFP_USE
0: 'As Tag_FP_arch',
1: 'SP only',
2: 'Reserved',
3: 'Deprecated',
},
{ #28 TAG_ABI_VFP_ARGS
0: 'AAPCS',
1: 'VFP registers',
2: 'custom',
3: 'compatible',
},
{ #29 TAG_ABI_WMMX_ARGS
0: 'AAPCS',
1: 'WMMX registers',
2: 'custom',
},
{ #30 TAG_ABI_OPTIMIZATION_GOALS
0: 'None',
1: 'Prefer Speed',
2: 'Aggressive Speed',
3: 'Prefer Size',
4: 'Aggressive Size',
5: 'Prefer Debug',
6: 'Aggressive Debug',
},
{ #31 TAG_ABI_FP_OPTIMIZATION_GOALS
0: 'None',
1: 'Prefer Speed',
2: 'Aggressive Speed',
3: 'Prefer Size',
4: 'Aggressive Size',
5: 'Prefer Accuracy',
6: 'Aggressive Accuracy',
},
{ #32 TAG_COMPATIBILITY
0: 'No',
1: 'Yes',
},
None, #33
{ #34 TAG_CPU_UNALIGNED_ACCESS
0: 'None',
1: 'v6',
},
None, #35
{ #36 TAG_FP_HP_EXTENSION
0: 'Not Allowed',
1: 'Allowed',
},
None, #37
{ #38 TAG_ABI_FP_16BIT_FORMAT
0: 'None',
1: 'IEEE 754',
2: 'Alternative Format',
},
None, #39
None, #40
None, #41
{ #42 TAG_MPEXTENSION_USE
0: 'Not Allowed',
1: 'Allowed',
},
None, #43
{ #44 TAG_DIV_USE
0: 'Allowed in Thumb-ISA, v7-R or v7-M',
1: 'Not allowed',
2: 'Allowed in v7-A with integer division extension',
},
None, #45
None, #46
None, #47
None, #48
None, #49
None, #50
None, #51
None, #52
None, #53
None, #54
None, #55
None, #56
None, #57
None, #58
None, #59
None, #60
None, #61
None, #62
None, #63
None, #64
None, #65
{ #66 TAG_FP_HP_EXTENSION
0: 'Not Allowed',
1: 'Allowed',
},
None, #67
{ #68 TAG_VIRTUALIZATION_USE
0: 'Not Allowed',
1: 'TrustZone',
2: 'Virtualization Extensions',
3: 'TrustZone and Virtualization Extensions',
},
None, #69
{ #70 TAG_MPEXTENSION_USE_OLD
0: 'Not Allowed',
1: 'Allowed',
},
]

View File

@@ -1,352 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/dynamic.py
#
# ELF Dynamic Tags
#
# Mike Frysinger (vapier@gentoo.org)
# This code is in the public domain
#-------------------------------------------------------------------------------
import itertools
from collections import defaultdict
from .hash import ELFHashTable, GNUHashTable
from .sections import Section, Symbol
from .enums import ENUM_D_TAG
from .segments import Segment
from .relocation import RelocationTable
from ..common.exceptions import ELFError
from ..common.utils import elf_assert, struct_parse, parse_cstring_from_stream
class _DynamicStringTable(object):
""" Bare string table based on values found via ELF dynamic tags and
loadable segments only. Good enough for get_string() only.
"""
def __init__(self, stream, table_offset):
self._stream = stream
self._table_offset = table_offset
def get_string(self, offset):
""" Get the string stored at the given offset in this string table.
"""
s = parse_cstring_from_stream(self._stream, self._table_offset + offset)
return s.decode('utf-8') if s else ''
class DynamicTag(object):
""" Dynamic Tag object - representing a single dynamic tag entry from a
dynamic section.
Allows dictionary-like access to the dynamic structure. For special
tags (those listed in the _HANDLED_TAGS set below), creates additional
attributes for convenience. For example, .soname will contain the actual
value of DT_SONAME (fetched from the dynamic symbol table).
"""
_HANDLED_TAGS = frozenset(
['DT_NEEDED', 'DT_RPATH', 'DT_RUNPATH', 'DT_SONAME',
'DT_SUNW_FILTER'])
def __init__(self, entry, stringtable):
if stringtable is None:
raise ELFError('Creating DynamicTag without string table')
self.entry = entry
if entry.d_tag in self._HANDLED_TAGS:
setattr(self, entry.d_tag[3:].lower(),
stringtable.get_string(self.entry.d_val))
def __getitem__(self, name):
""" Implement dict-like access to entries
"""
return self.entry[name]
def __repr__(self):
return '<DynamicTag (%s): %r>' % (self.entry.d_tag, self.entry)
def __str__(self):
if self.entry.d_tag in self._HANDLED_TAGS:
s = '"%s"' % getattr(self, self.entry.d_tag[3:].lower())
else:
s = '%#x' % self.entry.d_ptr
return '<DynamicTag (%s) %s>' % (self.entry.d_tag, s)
class Dynamic(object):
""" Shared functionality between dynamic sections and segments.
"""
def __init__(self, stream, elffile, stringtable, position, empty):
"""
stream:
The file-like object from which to load data
elffile:
The parent elffile object
stringtable:
A stringtable reference to use for parsing string references in
entries
position:
The file offset of the dynamic segment/section
empty:
Whether this is a degenerate case with zero entries. Normally, every
dynamic table will have at least one entry, the DT_NULL terminator.
"""
self.elffile = elffile
self.elfstructs = elffile.structs
self._stream = stream
self._num_tags = -1 if not empty else 0
self._offset = position
self._tagsize = self.elfstructs.Elf_Dyn.sizeof()
self._empty = empty
# Do not access this directly yourself; use _get_stringtable() instead.
self._stringtable = stringtable
def get_table_offset(self, tag_name):
""" Return the virtual address and file offset of a dynamic table.
"""
ptr = None
for tag in self._iter_tags(type=tag_name):
ptr = tag['d_ptr']
break
# If we found a virtual address, locate the offset in the file
# by using the program headers.
offset = None
if ptr:
offset = next(self.elffile.address_offsets(ptr), None)
return ptr, offset
def _get_stringtable(self):
""" Return a string table for looking up dynamic tag related strings.
This won't be a "full" string table object, but will at least
support the get_string() function.
"""
if self._stringtable:
return self._stringtable
# If the ELF has stripped its section table (which is unusual, but
# perfectly valid), we need to use the dynamic tags to locate the
# dynamic string table.
_, table_offset = self.get_table_offset('DT_STRTAB')
if table_offset is not None:
self._stringtable = _DynamicStringTable(self._stream, table_offset)
return self._stringtable
# That didn't work for some reason. Let's use the section header
# even though this ELF is super weird.
self._stringtable = self.elffile.get_section_by_name('.dynstr')
return self._stringtable
def _iter_tags(self, type=None):
""" Yield all raw tags (limit to |type| if specified)
"""
if self._empty:
return
for n in itertools.count():
tag = self._get_tag(n)
if type is None or tag['d_tag'] == type:
yield tag
if tag['d_tag'] == 'DT_NULL':
break
def iter_tags(self, type=None):
""" Yield all tags (limit to |type| if specified)
"""
for tag in self._iter_tags(type=type):
yield DynamicTag(tag, self._get_stringtable())
def _get_tag(self, n):
""" Get the raw tag at index #n from the file
"""
if self._num_tags != -1 and n >= self._num_tags:
raise IndexError(n)
offset = self._offset + n * self._tagsize
return struct_parse(
self.elfstructs.Elf_Dyn,
self._stream,
stream_pos=offset)
def get_tag(self, n):
""" Get the tag at index #n from the file (DynamicTag object)
"""
return DynamicTag(self._get_tag(n), self._get_stringtable())
def num_tags(self):
""" Number of dynamic tags in the file, including the DT_NULL tag
"""
if self._num_tags != -1:
return self._num_tags
for n in itertools.count():
tag = self.get_tag(n)
if tag.entry.d_tag == 'DT_NULL':
self._num_tags = n + 1
return self._num_tags
def get_relocation_tables(self):
""" Load all available relocation tables from DYNAMIC tags.
Returns a dictionary mapping found table types (REL, RELA,
JMPREL) to RelocationTable objects.
"""
result = {}
if list(self.iter_tags('DT_REL')):
result['REL'] = RelocationTable(self.elffile,
self.get_table_offset('DT_REL')[1],
next(self.iter_tags('DT_RELSZ'))['d_val'], False)
relentsz = next(self.iter_tags('DT_RELENT'))['d_val']
elf_assert(result['REL'].entry_size == relentsz,
'Expected DT_RELENT to be %s' % relentsz)
if list(self.iter_tags('DT_RELA')):
result['RELA'] = RelocationTable(self.elffile,
self.get_table_offset('DT_RELA')[1],
next(self.iter_tags('DT_RELASZ'))['d_val'], True)
relentsz = next(self.iter_tags('DT_RELAENT'))['d_val']
elf_assert(result['RELA'].entry_size == relentsz,
'Expected DT_RELAENT to be %s' % relentsz)
if list(self.iter_tags('DT_JMPREL')):
result['JMPREL'] = RelocationTable(self.elffile,
self.get_table_offset('DT_JMPREL')[1],
next(self.iter_tags('DT_PLTRELSZ'))['d_val'],
next(self.iter_tags('DT_PLTREL'))['d_val'] == ENUM_D_TAG['DT_RELA'])
return result
class DynamicSection(Section, Dynamic):
""" ELF dynamic table section. Knows how to process the list of tags.
"""
def __init__(self, header, name, elffile):
Section.__init__(self, header, name, elffile)
stringtable = elffile.get_section(header['sh_link'])
Dynamic.__init__(self, self.stream, self.elffile, stringtable,
self['sh_offset'], self['sh_type'] == 'SHT_NOBITS')
class DynamicSegment(Segment, Dynamic):
""" ELF dynamic table segment. Knows how to process the list of tags.
"""
def __init__(self, header, stream, elffile):
# The string table section to be used to resolve string names in
# the dynamic tag array is the one pointed at by the sh_link field
# of the dynamic section header.
# So we must look for the dynamic section contained in the dynamic
# segment, we do so by searching for the dynamic section whose content
# is located at the same offset as the dynamic segment
stringtable = None
for section in elffile.iter_sections():
if (isinstance(section, DynamicSection) and
section['sh_offset'] == header['p_offset']):
stringtable = elffile.get_section(section['sh_link'])
break
Segment.__init__(self, header, stream)
Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'],
self['p_filesz'] == 0)
self._symbol_size = self.elfstructs.Elf_Sym.sizeof()
self._num_symbols = None
self._symbol_name_map = None
def num_symbols(self):
""" Number of symbols in the table recovered from DT_SYMTAB
"""
if self._num_symbols is not None:
return self._num_symbols
# Check if a DT_GNU_HASH tag exists and recover the number of symbols
# from the corresponding hash table
_, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH')
if gnu_hash_offset is not None:
hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self)
self._num_symbols = hash_section.get_number_of_symbols()
# If DT_GNU_HASH did not exist, maybe we can use DT_HASH
if self._num_symbols is None:
_, hash_offset = self.get_table_offset('DT_HASH')
if hash_offset is not None:
# Get the hash table from the DT_HASH offset
hash_section = ELFHashTable(self.elffile, hash_offset, self)
self._num_symbols = hash_section.get_number_of_symbols()
if self._num_symbols is None:
# Find closest higher pointer than tab_ptr. We'll use that to mark
# the end of the symbol table.
tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
if tab_ptr is None or tab_offset is None:
raise ELFError('Segment does not contain DT_SYMTAB.')
nearest_ptr = None
for tag in self.iter_tags():
tag_ptr = tag['d_ptr']
if tag['d_tag'] == 'DT_SYMENT':
if self._symbol_size != tag['d_val']:
# DT_SYMENT is the size of one symbol entry. It must be
# the same as returned by Elf_Sym.sizeof.
raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' %
(tag['d_val'], self._symbol_size))
if (tag_ptr > tab_ptr and
(nearest_ptr is None or nearest_ptr > tag_ptr)):
nearest_ptr = tag_ptr
if nearest_ptr is None:
# Use the end of segment that contains DT_SYMTAB.
for segment in self.elffile.iter_segments():
if (segment['p_vaddr'] <= tab_ptr and
tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])):
nearest_ptr = segment['p_vaddr'] + segment['p_filesz']
end_ptr = nearest_ptr
self._num_symbols = (end_ptr - tab_ptr) // self._symbol_size
if self._num_symbols is None:
raise ELFError('Cannot determine the end of DT_SYMTAB.')
return self._num_symbols
def get_symbol(self, index):
""" Get the symbol at index #index from the table (Symbol object)
"""
tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB')
if tab_ptr is None or tab_offset is None:
raise ELFError('Segment does not contain DT_SYMTAB.')
symbol = struct_parse(
self.elfstructs.Elf_Sym,
self._stream,
stream_pos=tab_offset + index * self._symbol_size)
string_table = self._get_stringtable()
symbol_name = string_table.get_string(symbol["st_name"])
return Symbol(symbol, symbol_name)
def get_symbol_by_name(self, name):
""" Get a symbol(s) by name. Return None if no symbol by the given name
exists.
"""
# The first time this method is called, construct a name to number
# mapping
#
if self._symbol_name_map is None:
self._symbol_name_map = defaultdict(list)
for i, sym in enumerate(self.iter_symbols()):
self._symbol_name_map[sym.name].append(i)
symnums = self._symbol_name_map.get(name)
return [self.get_symbol(i) for i in symnums] if symnums else None
def iter_symbols(self):
""" Yield all symbols in this dynamic segment. The symbols are usually
the same as returned by SymbolTableSection.iter_symbols. However,
in stripped binaries, SymbolTableSection might have been removed.
This method reads from the mandatory dynamic tag DT_SYMTAB.
"""
for i in range(self.num_symbols()):
yield(self.get_symbol(i))

View File

@@ -1,757 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/elffile.py
#
# ELFFile - main class for accessing ELF files
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import io
import struct
import zlib
try:
import resource
PAGESIZE = resource.getpagesize()
except ImportError:
try:
# Windows system
import mmap
PAGESIZE = mmap.PAGESIZE
except ImportError:
# Jython
PAGESIZE = 4096
from ..common.py3compat import BytesIO
from ..common.exceptions import ELFError
from ..common.utils import struct_parse, elf_assert
from .structs import ELFStructs
from .sections import (
Section, StringTableSection, SymbolTableSection,
SymbolTableIndexSection, SUNWSyminfoTableSection, NullSection,
NoteSection, StabSection, ARMAttributesSection)
from .dynamic import DynamicSection, DynamicSegment
from .relocation import RelocationSection, RelocationHandler
from .gnuversions import (
GNUVerNeedSection, GNUVerDefSection,
GNUVerSymSection)
from .segments import Segment, InterpSegment, NoteSegment
from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
from ..ehabi.ehabiinfo import EHABIInfo
from .hash import ELFHashSection, GNUHashSection
from .constants import SHN_INDICES
class ELFFile(object):
""" Creation: the constructor accepts a stream (file-like object) with the
contents of an ELF file.
Accessible attributes:
stream:
The stream holding the data of the file - must be a binary
stream (bytes, not string).
elfclass:
32 or 64 - specifies the word size of the target machine
little_endian:
boolean - specifies the target machine's endianness
elftype:
string or int, either known value of E_TYPE enum defining ELF
type (e.g. executable, dynamic library or core dump) or integral
unparsed value
header:
the complete ELF file header
e_ident_raw:
the raw e_ident field of the header
"""
def __init__(self, stream):
self.stream = stream
self._identify_file()
self.structs = ELFStructs(
little_endian=self.little_endian,
elfclass=self.elfclass)
self.structs.create_basic_structs()
self.header = self._parse_elf_header()
self.structs.create_advanced_structs(
self['e_type'],
self['e_machine'],
self['e_ident']['EI_OSABI'])
self.stream.seek(0)
self.e_ident_raw = self.stream.read(16)
self._section_header_stringtable = \
self._get_section_header_stringtable()
self._section_name_map = None
def num_sections(self):
""" Number of sections in the file
"""
if self['e_shoff'] == 0:
return 0
# From the ELF ABI documentation at
# https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html:
# "e_shnum normally tells how many entries the section header table
# contains. [...] If the number of sections is greater than or equal to
# SHN_LORESERVE (0xff00), e_shnum has the value SHN_UNDEF (0) and the
# actual number of section header table entries is contained in the
# sh_size field of the section header at index 0 (otherwise, the sh_size
# member of the initial entry contains 0)."
if self['e_shnum'] == 0:
return self._get_section_header(0)['sh_size']
return self['e_shnum']
def get_section(self, n):
""" Get the section at index #n from the file (Section object or a
subclass)
"""
section_header = self._get_section_header(n)
return self._make_section(section_header)
def get_section_by_name(self, name):
""" Get a section from the file, by name. Return None if no such
section exists.
"""
# The first time this method is called, construct a name to number
# mapping
#
if self._section_name_map is None:
self._make_section_name_map()
secnum = self._section_name_map.get(name, None)
return None if secnum is None else self.get_section(secnum)
def get_section_index(self, section_name):
""" Gets the index of the section by name. Return None if no such
section name exists.
"""
# The first time this method is called, construct a name to number
# mapping
#
if self._section_name_map is None:
self._make_section_name_map()
return self._section_name_map.get(section_name, None)
def iter_sections(self, type=None):
""" Yield all the sections in the file. If the optional |type|
parameter is passed, this method will only yield sections of the
given type. The parameter value must be a string containing the
name of the type as defined in the ELF specification, e.g.
'SHT_SYMTAB'.
"""
for i in range(self.num_sections()):
section = self.get_section(i)
if type is None or section['sh_type'] == type:
yield section
def num_segments(self):
""" Number of segments in the file
"""
# From: https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI
# Section: 4.1.2 Number of Program Headers
# If the number of program headers is greater than or equal to
# PN_XNUM (0xffff), this member has the value PN_XNUM
# (0xffff). The actual number of program header table entries
# is contained in the sh_info field of the section header at
# index 0.
if self['e_phnum'] < 0xffff:
return self['e_phnum']
else:
return self.get_section(0)['sh_info']
def get_segment(self, n):
""" Get the segment at index #n from the file (Segment object)
"""
segment_header = self._get_segment_header(n)
return self._make_segment(segment_header)
def iter_segments(self, type=None):
""" Yield all the segments in the file. If the optional |type|
parameter is passed, this method will only yield segments of the
given type. The parameter value must be a string containing the
name of the type as defined in the ELF specification, e.g.
'PT_LOAD'.
"""
for i in range(self.num_segments()):
segment = self.get_segment(i)
if type is None or segment['p_type'] == type:
yield segment
def address_offsets(self, start, size=1):
""" Yield a file offset for each ELF segment containing a memory region.
A memory region is defined by the range [start...start+size). The
offset of the region is yielded.
"""
end = start + size
# consider LOAD only to prevent same address being yielded twice
for seg in self.iter_segments(type='PT_LOAD'):
if (start >= seg['p_vaddr'] and
end <= seg['p_vaddr'] + seg['p_filesz']):
yield start - seg['p_vaddr'] + seg['p_offset']
def has_dwarf_info(self):
""" Check whether this file appears to have debugging information.
We assume that if it has the .debug_info or .zdebug_info section, it
has all the other required sections as well.
"""
return bool(self.get_section_by_name('.debug_info') or
self.get_section_by_name('.zdebug_info') or
self.get_section_by_name('.eh_frame'))
def get_dwarf_info(self, relocate_dwarf_sections=True):
""" Return a DWARFInfo object representing the debugging information in
this file.
If relocate_dwarf_sections is True, relocations for DWARF sections
are looked up and applied.
"""
# Expect that has_dwarf_info was called, so at least .debug_info is
# present.
# Sections that aren't found will be passed as None to DWARFInfo.
section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev',
'.debug_str', '.debug_line', '.debug_frame',
'.debug_loc', '.debug_ranges', '.debug_pubtypes',
'.debug_pubnames', '.debug_addr', '.debug_str_offsets')
compressed = bool(self.get_section_by_name('.zdebug_info'))
if compressed:
section_names = tuple(map(lambda x: '.z' + x[1:], section_names))
# As it is loaded in the process image, .eh_frame cannot be compressed
section_names += ('.eh_frame', )
(debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name,
debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
debug_pubnames_name, debug_addr_name, debug_str_offsets_name,
eh_frame_sec_name) = section_names
debug_sections = {}
for secname in section_names:
section = self.get_section_by_name(secname)
if section is None:
debug_sections[secname] = None
else:
dwarf_section = self._read_dwarf_section(
section,
relocate_dwarf_sections)
if compressed and secname.startswith('.z'):
dwarf_section = self._decompress_dwarf_section(dwarf_section)
debug_sections[secname] = dwarf_section
return DWARFInfo(
config=DwarfConfig(
little_endian=self.little_endian,
default_address_size=self.elfclass // 8,
machine_arch=self.get_machine_arch()),
debug_info_sec=debug_sections[debug_info_sec_name],
debug_aranges_sec=debug_sections[debug_aranges_sec_name],
debug_abbrev_sec=debug_sections[debug_abbrev_sec_name],
debug_frame_sec=debug_sections[debug_frame_sec_name],
eh_frame_sec=debug_sections[eh_frame_sec_name],
debug_str_sec=debug_sections[debug_str_sec_name],
debug_loc_sec=debug_sections[debug_loc_sec_name],
debug_ranges_sec=debug_sections[debug_ranges_sec_name],
debug_line_sec=debug_sections[debug_line_sec_name],
debug_pubtypes_sec=debug_sections[debug_pubtypes_name],
debug_pubnames_sec=debug_sections[debug_pubnames_name],
debug_addr_sec=debug_sections[debug_addr_name],
debug_str_offsets_sec=debug_sections[debug_str_offsets_name],
)
def has_ehabi_info(self):
""" Check whether this file appears to have arm exception handler index table.
"""
return any(self.iter_sections(type='SHT_ARM_EXIDX'))
def get_ehabi_infos(self):
""" Generally, shared library and executable contain 1 .ARM.exidx section.
Object file contains many .ARM.exidx sections.
So we must traverse every section and filter sections whose type is SHT_ARM_EXIDX.
"""
_ret = []
if self['e_type'] == 'ET_REL':
# TODO: support relocatable file
assert False, "Current version of pyelftools doesn't support relocatable file."
for section in self.iter_sections(type='SHT_ARM_EXIDX'):
_ret.append(EHABIInfo(section, self.little_endian))
return _ret if len(_ret) > 0 else None
def get_machine_arch(self):
""" Return the machine architecture, as detected from the ELF header.
"""
architectures = {
'EM_M32' : 'AT&T WE 32100',
'EM_SPARC' : 'SPARC',
'EM_386' : 'x86',
'EM_68K' : 'Motorola 68000',
'EM_88K' : 'Motorola 88000',
'EM_IAMCU' : 'Intel MCU',
'EM_860' : 'Intel 80860',
'EM_MIPS' : 'MIPS',
'EM_S370' : 'IBM System/370',
'EM_MIPS_RS3_LE' : 'MIPS RS3000 Little-endian',
'EM_PARISC' : 'Hewlett-Packard PA-RISC',
'EM_VPP500' : 'Fujitsu VPP500',
'EM_SPARC32PLUS' : 'Enhanced SPARC',
'EM_960' : 'Intel 80960',
'EM_PPC' : 'PowerPC',
'EM_PPC64' : '64-bit PowerPC',
'EM_S390' : 'IBM System/390',
'EM_SPU' : 'IBM SPU/SPC',
'EM_V800' : 'NEC V800',
'EM_FR20' : 'Fujitsu FR20',
'EM_RH32' : 'TRW RH-32',
'EM_RCE' : 'Motorola RCE',
'EM_ARM' : 'ARM',
'EM_ALPHA' : 'Digital Alpha',
'EM_SH' : 'Hitachi SH',
'EM_SPARCV9' : 'SPARC Version 9',
'EM_TRICORE' : 'Siemens TriCore embedded processor',
'EM_ARC' : 'Argonaut RISC Core, Argonaut Technologies Inc.',
'EM_H8_300' : 'Hitachi H8/300',
'EM_H8_300H' : 'Hitachi H8/300H',
'EM_H8S' : 'Hitachi H8S',
'EM_H8_500' : 'Hitachi H8/500',
'EM_IA_64' : 'Intel IA-64',
'EM_MIPS_X' : 'MIPS-X',
'EM_COLDFIRE' : 'Motorola ColdFire',
'EM_68HC12' : 'Motorola M68HC12',
'EM_MMA' : 'Fujitsu MMA',
'EM_PCP' : 'Siemens PCP',
'EM_NCPU' : 'Sony nCPU',
'EM_NDR1' : 'Denso NDR1',
'EM_STARCORE' : 'Motorola Star*Core',
'EM_ME16' : 'Toyota ME16',
'EM_ST100' : 'STMicroelectronics ST100',
'EM_TINYJ' : 'Advanced Logic TinyJ',
'EM_X86_64' : 'x64',
'EM_PDSP' : 'Sony DSP',
'EM_PDP10' : 'Digital Equipment PDP-10',
'EM_PDP11' : 'Digital Equipment PDP-11',
'EM_FX66' : 'Siemens FX66',
'EM_ST9PLUS' : 'STMicroelectronics ST9+ 8/16 bit',
'EM_ST7' : 'STMicroelectronics ST7 8-bit',
'EM_68HC16' : 'Motorola MC68HC16',
'EM_68HC11' : 'Motorola MC68HC11',
'EM_68HC08' : 'Motorola MC68HC08',
'EM_68HC05' : 'Motorola MC68HC05',
'EM_SVX' : 'Silicon Graphics SVx',
'EM_ST19' : 'STMicroelectronics ST19 8-bit',
'EM_VAX' : 'Digital VAX',
'EM_CRIS' : 'Axis Communications 32-bit',
'EM_JAVELIN' : 'Infineon Technologies 32-bit',
'EM_FIREPATH' : 'Element 14 64-bit DSP',
'EM_ZSP' : 'LSI Logic 16-bit DSP',
'EM_MMIX' : 'Donald Knuth\'s educational 64-bit',
'EM_HUANY' : 'Harvard University machine-independent object files',
'EM_PRISM' : 'SiTera Prism',
'EM_AVR' : 'Atmel AVR 8-bit',
'EM_FR30' : 'Fujitsu FR30',
'EM_D10V' : 'Mitsubishi D10V',
'EM_D30V' : 'Mitsubishi D30V',
'EM_V850' : 'NEC v850',
'EM_M32R' : 'Mitsubishi M32R',
'EM_MN10300' : 'Matsushita MN10300',
'EM_MN10200' : 'Matsushita MN10200',
'EM_PJ' : 'picoJava',
'EM_OPENRISC' : 'OpenRISC 32-bit',
'EM_ARC_COMPACT' : 'ARC International ARCompact',
'EM_XTENSA' : 'Tensilica Xtensa',
'EM_VIDEOCORE' : 'Alphamosaic VideoCore',
'EM_TMM_GPP' : 'Thompson Multimedia',
'EM_NS32K' : 'National Semiconductor 32000 series',
'EM_TPC' : 'Tenor Network TPC',
'EM_SNP1K' : 'Trebia SNP 1000',
'EM_ST200' : 'STMicroelectronics ST200',
'EM_IP2K' : 'Ubicom IP2xxx',
'EM_MAX' : 'MAX',
'EM_CR' : 'National Semiconductor CompactRISC',
'EM_F2MC16' : 'Fujitsu F2MC16',
'EM_MSP430' : 'Texas Instruments msp430',
'EM_BLACKFIN' : 'Analog Devices Blackfin',
'EM_SE_C33' : 'Seiko Epson S1C33',
'EM_SEP' : 'Sharp',
'EM_ARCA' : 'Arca RISC',
'EM_UNICORE' : 'PKU-Unity MPRC',
'EM_EXCESS' : 'eXcess',
'EM_DXP' : 'Icera Semiconductor Deep Execution Processor',
'EM_ALTERA_NIOS2' : 'Altera Nios II',
'EM_CRX' : 'National Semiconductor CompactRISC CRX',
'EM_XGATE' : 'Motorola XGATE',
'EM_C166' : 'Infineon C16x/XC16x',
'EM_M16C' : 'Renesas M16C',
'EM_DSPIC30F' : 'Microchip Technology dsPIC30F',
'EM_CE' : 'Freescale Communication Engine RISC core',
'EM_M32C' : 'Renesas M32C',
'EM_TSK3000' : 'Altium TSK3000',
'EM_RS08' : 'Freescale RS08',
'EM_SHARC' : 'Analog Devices SHARC',
'EM_ECOG2' : 'Cyan Technology eCOG2',
'EM_SCORE7' : 'Sunplus S+core7 RISC',
'EM_DSP24' : 'New Japan Radio (NJR) 24-bit DSP',
'EM_VIDEOCORE3' : 'Broadcom VideoCore III',
'EM_LATTICEMICO32' : 'Lattice FPGA RISC',
'EM_SE_C17' : 'Seiko Epson C17',
'EM_TI_C6000' : 'TI TMS320C6000',
'EM_TI_C2000' : 'TI TMS320C2000',
'EM_TI_C5500' : 'TI TMS320C55x',
'EM_TI_ARP32' : 'TI Application Specific RISC, 32bit',
'EM_TI_PRU' : 'TI Programmable Realtime Unit',
'EM_MMDSP_PLUS' : 'STMicroelectronics 64bit VLIW',
'EM_CYPRESS_M8C' : 'Cypress M8C',
'EM_R32C' : 'Renesas R32C',
'EM_TRIMEDIA' : 'NXP Semiconductors TriMedia',
'EM_QDSP6' : 'QUALCOMM DSP6',
'EM_8051' : 'Intel 8051',
'EM_STXP7X' : 'STMicroelectronics STxP7x',
'EM_NDS32' : 'Andes Technology RISC',
'EM_ECOG1' : 'Cyan Technology eCOG1X',
'EM_ECOG1X' : 'Cyan Technology eCOG1X',
'EM_MAXQ30' : 'Dallas Semiconductor MAXQ30',
'EM_XIMO16' : 'New Japan Radio (NJR) 16-bit',
'EM_MANIK' : 'M2000 Reconfigurable RISC',
'EM_CRAYNV2' : 'Cray Inc. NV2',
'EM_RX' : 'Renesas RX',
'EM_METAG' : 'Imagination Technologies META',
'EM_MCST_ELBRUS' : 'MCST Elbrus',
'EM_ECOG16' : 'Cyan Technology eCOG16',
'EM_CR16' : 'National Semiconductor CompactRISC CR16 16-bit',
'EM_ETPU' : 'Freescale',
'EM_SLE9X' : 'Infineon Technologies SLE9X',
'EM_L10M' : 'Intel L10M',
'EM_K10M' : 'Intel K10M',
'EM_AARCH64' : 'AArch64',
'EM_AVR32' : 'Atmel 32-bit',
'EM_STM8' : 'STMicroeletronics STM8 8-bit',
'EM_TILE64' : 'Tilera TILE64',
'EM_TILEPRO' : 'Tilera TILEPro',
'EM_MICROBLAZE' : 'Xilinx MicroBlaze 32-bit RISC',
'EM_CUDA' : 'NVIDIA CUDA',
'EM_TILEGX' : 'Tilera TILE-Gx',
'EM_CLOUDSHIELD' : 'CloudShield',
'EM_COREA_1ST' : 'KIPO-KAIST Core-A 1st generation',
'EM_COREA_2ND' : 'KIPO-KAIST Core-A 2nd generation',
'EM_ARC_COMPACT2' : 'Synopsys ARCompact V2',
'EM_OPEN8' : 'Open8 8-bit RISC',
'EM_RL78' : 'Renesas RL78',
'EM_VIDEOCORE5' : 'Broadcom VideoCore V',
'EM_78KOR' : 'Renesas 78KOR',
'EM_56800EX' : 'Freescale 56800EX',
'EM_BA1' : 'Beyond BA1',
'EM_BA2' : 'Beyond BA2',
'EM_XCORE' : 'XMOS xCORE',
'EM_MCHP_PIC' : 'Microchip 8-bit PIC',
'EM_INTEL205' : 'Reserved by Intel',
'EM_INTEL206' : 'Reserved by Intel',
'EM_INTEL207' : 'Reserved by Intel',
'EM_INTEL208' : 'Reserved by Intel',
'EM_INTEL209' : 'Reserved by Intel',
'EM_KM32' : 'KM211 KM32 32-bit',
'EM_KMX32' : 'KM211 KMX32 32-bit',
'EM_KMX16' : 'KM211 KMX16 16-bit',
'EM_KMX8' : 'KM211 KMX8 8-bit',
'EM_KVARC' : 'KM211 KVARC',
'EM_CDP' : 'Paneve CDP',
'EM_COGE' : 'Cognitive',
'EM_COOL' : 'Bluechip Systems CoolEngine',
'EM_NORC' : 'Nanoradio Optimized RISC',
'EM_CSR_KALIMBA' : 'CSR Kalimba',
'EM_Z80' : 'Zilog Z80',
'EM_VISIUM' : 'VISIUMcore',
'EM_FT32' : 'FTDI Chip FT32 32-bit RISC',
'EM_MOXIE' : 'Moxie',
'EM_AMDGPU' : 'AMD GPU',
'EM_RISCV' : 'RISC-V',
'EM_BPF' : 'Linux BPF - in-kernel virtual machine',
'EM_CSKY' : 'C-SKY',
'EM_FRV' : 'Fujitsu FR-V'
}
return architectures.get(self['e_machine'], '<unknown>')
def get_shstrndx(self):
""" Find the string table section index for the section header table
"""
# From https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html:
# If the section name string table section index is greater than or
# equal to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX
# (0xffff) and the actual index of the section name string table section
# is contained in the sh_link field of the section header at index 0.
if self['e_shstrndx'] != SHN_INDICES.SHN_XINDEX:
return self['e_shstrndx']
else:
return self._get_section_header(0)['sh_link']
#-------------------------------- PRIVATE --------------------------------#
def __getitem__(self, name):
""" Implement dict-like access to header entries
"""
return self.header[name]
def _identify_file(self):
""" Verify the ELF file and identify its class and endianness.
"""
# Note: this code reads the stream directly, without using ELFStructs,
# since we don't yet know its exact format. ELF was designed to be
# read like this - its e_ident field is word-size and endian agnostic.
self.stream.seek(0)
magic = self.stream.read(4)
elf_assert(magic == b'\x7fELF', 'Magic number does not match')
ei_class = self.stream.read(1)
if ei_class == b'\x01':
self.elfclass = 32
elif ei_class == b'\x02':
self.elfclass = 64
else:
raise ELFError('Invalid EI_CLASS %s' % repr(ei_class))
ei_data = self.stream.read(1)
if ei_data == b'\x01':
self.little_endian = True
elif ei_data == b'\x02':
self.little_endian = False
else:
raise ELFError('Invalid EI_DATA %s' % repr(ei_data))
def _section_offset(self, n):
""" Compute the offset of section #n in the file
"""
return self['e_shoff'] + n * self['e_shentsize']
def _segment_offset(self, n):
""" Compute the offset of segment #n in the file
"""
return self['e_phoff'] + n * self['e_phentsize']
def _make_segment(self, segment_header):
""" Create a Segment object of the appropriate type
"""
segtype = segment_header['p_type']
if segtype == 'PT_INTERP':
return InterpSegment(segment_header, self.stream)
elif segtype == 'PT_DYNAMIC':
return DynamicSegment(segment_header, self.stream, self)
elif segtype == 'PT_NOTE':
return NoteSegment(segment_header, self.stream, self)
else:
return Segment(segment_header, self.stream)
def _get_section_header(self, n):
""" Find the header of section #n, parse it and return the struct
"""
return struct_parse(
self.structs.Elf_Shdr,
self.stream,
stream_pos=self._section_offset(n))
def _get_section_name(self, section_header):
""" Given a section header, find this section's name in the file's
string table
"""
name_offset = section_header['sh_name']
return self._section_header_stringtable.get_string(name_offset)
def _make_section(self, section_header):
""" Create a section object of the appropriate type
"""
name = self._get_section_name(section_header)
sectype = section_header['sh_type']
if sectype == 'SHT_STRTAB':
return StringTableSection(section_header, name, self)
elif sectype == 'SHT_NULL':
return NullSection(section_header, name, self)
elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM', 'SHT_SUNW_LDYNSYM'):
return self._make_symbol_table_section(section_header, name)
elif sectype == 'SHT_SYMTAB_SHNDX':
return self._make_symbol_table_index_section(section_header, name)
elif sectype == 'SHT_SUNW_syminfo':
return self._make_sunwsyminfo_table_section(section_header, name)
elif sectype == 'SHT_GNU_verneed':
return self._make_gnu_verneed_section(section_header, name)
elif sectype == 'SHT_GNU_verdef':
return self._make_gnu_verdef_section(section_header, name)
elif sectype == 'SHT_GNU_versym':
return self._make_gnu_versym_section(section_header, name)
elif sectype in ('SHT_REL', 'SHT_RELA'):
return RelocationSection(section_header, name, self)
elif sectype == 'SHT_DYNAMIC':
return DynamicSection(section_header, name, self)
elif sectype == 'SHT_NOTE':
return NoteSection(section_header, name, self)
elif sectype == 'SHT_PROGBITS' and name == '.stab':
return StabSection(section_header, name, self)
elif sectype == 'SHT_ARM_ATTRIBUTES':
return ARMAttributesSection(section_header, name, self)
elif sectype == 'SHT_HASH':
return self._make_elf_hash_section(section_header, name)
elif sectype == 'SHT_GNU_HASH':
return self._make_gnu_hash_section(section_header, name)
else:
return Section(section_header, name, self)
def _make_section_name_map(self):
self._section_name_map = {}
for i, sec in enumerate(self.iter_sections()):
self._section_name_map[sec.name] = i
def _make_symbol_table_section(self, section_header, name):
""" Create a SymbolTableSection
"""
linked_strtab_index = section_header['sh_link']
strtab_section = self.get_section(linked_strtab_index)
return SymbolTableSection(
section_header, name,
elffile=self,
stringtable=strtab_section)
def _make_symbol_table_index_section(self, section_header, name):
""" Create a SymbolTableIndexSection object
"""
linked_symtab_index = section_header['sh_link']
return SymbolTableIndexSection(
section_header, name, elffile=self,
symboltable=linked_symtab_index)
def _make_sunwsyminfo_table_section(self, section_header, name):
""" Create a SUNWSyminfoTableSection
"""
linked_strtab_index = section_header['sh_link']
strtab_section = self.get_section(linked_strtab_index)
return SUNWSyminfoTableSection(
section_header, name,
elffile=self,
symboltable=strtab_section)
def _make_gnu_verneed_section(self, section_header, name):
""" Create a GNUVerNeedSection
"""
linked_strtab_index = section_header['sh_link']
strtab_section = self.get_section(linked_strtab_index)
return GNUVerNeedSection(
section_header, name,
elffile=self,
stringtable=strtab_section)
def _make_gnu_verdef_section(self, section_header, name):
""" Create a GNUVerDefSection
"""
linked_strtab_index = section_header['sh_link']
strtab_section = self.get_section(linked_strtab_index)
return GNUVerDefSection(
section_header, name,
elffile=self,
stringtable=strtab_section)
def _make_gnu_versym_section(self, section_header, name):
""" Create a GNUVerSymSection
"""
linked_strtab_index = section_header['sh_link']
strtab_section = self.get_section(linked_strtab_index)
return GNUVerSymSection(
section_header, name,
elffile=self,
symboltable=strtab_section)
def _make_elf_hash_section(self, section_header, name):
linked_symtab_index = section_header['sh_link']
symtab_section = self.get_section(linked_symtab_index)
return ELFHashSection(
section_header, name, self, symtab_section
)
def _make_gnu_hash_section(self, section_header, name):
linked_symtab_index = section_header['sh_link']
symtab_section = self.get_section(linked_symtab_index)
return GNUHashSection(
section_header, name, self, symtab_section
)
def _get_segment_header(self, n):
""" Find the header of segment #n, parse it and return the struct
"""
return struct_parse(
self.structs.Elf_Phdr,
self.stream,
stream_pos=self._segment_offset(n))
def _get_section_header_stringtable(self):
""" Get the string table section corresponding to the section header
table.
"""
stringtable_section_num = self.get_shstrndx()
return StringTableSection(
header=self._get_section_header(stringtable_section_num),
name='',
elffile=self)
def _parse_elf_header(self):
""" Parses the ELF file header and assigns the result to attributes
of this object.
"""
return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0)
def _read_dwarf_section(self, section, relocate_dwarf_sections):
""" Read the contents of a DWARF section from the stream and return a
DebugSectionDescriptor. Apply relocations if asked to.
"""
# The section data is read into a new stream, for processing
section_stream = BytesIO()
section_stream.write(section.data())
if relocate_dwarf_sections:
reloc_handler = RelocationHandler(self)
reloc_section = reloc_handler.find_relocations_for_section(section)
if reloc_section is not None:
reloc_handler.apply_section_relocations(
section_stream, reloc_section)
return DebugSectionDescriptor(
stream=section_stream,
name=section.name,
global_offset=section['sh_offset'],
size=section.data_size,
address=section['sh_addr'])
@staticmethod
def _decompress_dwarf_section(section):
""" Returns the uncompressed contents of the provided DWARF section.
"""
# TODO: support other compression formats from readelf.c
assert section.size > 12, 'Unsupported compression format.'
section.stream.seek(0)
# According to readelf.c the content should contain "ZLIB"
# followed by the uncompressed section size - 8 bytes in
# big-endian order
compression_type = section.stream.read(4)
assert compression_type == b'ZLIB', \
'Invalid compression type: %r' % (compression_type)
uncompressed_size = struct.unpack('>Q', section.stream.read(8))[0]
decompressor = zlib.decompressobj()
uncompressed_stream = BytesIO()
while True:
chunk = section.stream.read(PAGESIZE)
if not chunk:
break
uncompressed_stream.write(decompressor.decompress(chunk))
uncompressed_stream.write(decompressor.flush())
uncompressed_stream.seek(0, io.SEEK_END)
size = uncompressed_stream.tell()
assert uncompressed_size == size, \
'Wrong uncompressed size: expected %r, but got %r' % (
uncompressed_size, size,
)
return section._replace(stream=uncompressed_stream, size=size)

View File

@@ -1,225 +0,0 @@
#------------------------------------------------------------------------------
# elftools: elf/gnuversions.py
#
# ELF sections
#
# Yann Rouillard (yann@pleiades.fr.eu.org)
# This code is in the public domain
#------------------------------------------------------------------------------
from ..construct import CString
from ..common.utils import struct_parse, elf_assert
from .sections import Section, Symbol
class Version(object):
""" Version object - representing a version definition or dependency
entry from a "Version Needed" or a "Version Dependency" table section.
This kind of entry contains a pointer to an array of auxiliary entries
that store the information about version names or dependencies.
These entries are not stored in this object and should be accessed
through the appropriate method of a section object which will return
an iterator of VersionAuxiliary objects.
Similarly to Section objects, allows dictionary-like access to
verdef/verneed entry
"""
def __init__(self, entry, name=None):
self.entry = entry
self.name = name
def __getitem__(self, name):
""" Implement dict-like access to entry
"""
return self.entry[name]
class VersionAuxiliary(object):
""" Version Auxiliary object - representing an auxiliary entry of a version
definition or dependency entry
Similarly to Section objects, allows dictionary-like access to the
verdaux/vernaux entry
"""
def __init__(self, entry, name):
self.entry = entry
self.name = name
def __getitem__(self, name):
""" Implement dict-like access to entries
"""
return self.entry[name]
class GNUVersionSection(Section):
""" Common ancestor class for ELF SUNW|GNU Version Needed/Dependency
sections class which contains shareable code
"""
def __init__(self, header, name, elffile, stringtable,
field_prefix, version_struct, version_auxiliaries_struct):
super(GNUVersionSection, self).__init__(header, name, elffile)
self.stringtable = stringtable
self.field_prefix = field_prefix
self.version_struct = version_struct
self.version_auxiliaries_struct = version_auxiliaries_struct
def num_versions(self):
""" Number of version entries in the section
"""
return self['sh_info']
def _field_name(self, name, auxiliary=False):
""" Return the real field's name of version or a version auxiliary
entry
"""
middle = 'a_' if auxiliary else '_'
return self.field_prefix + middle + name
def _iter_version_auxiliaries(self, entry_offset, count):
""" Yield all auxiliary entries of a version entry
"""
name_field = self._field_name('name', auxiliary=True)
next_field = self._field_name('next', auxiliary=True)
for _ in range(count):
entry = struct_parse(
self.version_auxiliaries_struct,
self.stream,
stream_pos=entry_offset)
name = self.stringtable.get_string(entry[name_field])
version_aux = VersionAuxiliary(entry, name)
yield version_aux
entry_offset += entry[next_field]
def iter_versions(self):
""" Yield all the version entries in the section
Each time it returns the main version structure
and an iterator to walk through its auxiliaries entries
"""
aux_field = self._field_name('aux')
count_field = self._field_name('cnt')
next_field = self._field_name('next')
entry_offset = self['sh_offset']
for _ in range(self.num_versions()):
entry = struct_parse(
self.version_struct,
self.stream,
stream_pos=entry_offset)
elf_assert(entry[count_field] > 0,
'Expected number of version auxiliary entries (%s) to be > 0'
'for the following version entry: %s' % (
count_field, str(entry)))
version = Version(entry)
aux_entries_offset = entry_offset + entry[aux_field]
version_auxiliaries_iter = self._iter_version_auxiliaries(
aux_entries_offset, entry[count_field])
yield version, version_auxiliaries_iter
entry_offset += entry[next_field]
class GNUVerNeedSection(GNUVersionSection):
""" ELF SUNW or GNU Version Needed table section.
Has an associated StringTableSection that's passed in the constructor.
"""
def __init__(self, header, name, elffile, stringtable):
super(GNUVerNeedSection, self).__init__(
header, name, elffile, stringtable, 'vn',
elffile.structs.Elf_Verneed, elffile.structs.Elf_Vernaux)
self._has_indexes = None
def has_indexes(self):
""" Return True if at least one version definition entry has an index
that is stored in the vna_other field.
This information is used for symbol versioning
"""
if self._has_indexes is None:
self._has_indexes = False
for _, vernaux_iter in self.iter_versions():
for vernaux in vernaux_iter:
if vernaux['vna_other']:
self._has_indexes = True
break
return self._has_indexes
def iter_versions(self):
for verneed, vernaux in super(GNUVerNeedSection, self).iter_versions():
verneed.name = self.stringtable.get_string(verneed['vn_file'])
yield verneed, vernaux
def get_version(self, index):
""" Get the version information located at index #n in the table
Return boths the verneed structure and the vernaux structure
that contains the name of the version
"""
for verneed, vernaux_iter in self.iter_versions():
for vernaux in vernaux_iter:
if vernaux['vna_other'] == index:
return verneed, vernaux
return None
class GNUVerDefSection(GNUVersionSection):
""" ELF SUNW or GNU Version Definition table section.
Has an associated StringTableSection that's passed in the constructor.
"""
def __init__(self, header, name, elffile, stringtable):
super(GNUVerDefSection, self).__init__(
header, name, elffile, stringtable, 'vd',
elffile.structs.Elf_Verdef, elffile.structs.Elf_Verdaux)
def get_version(self, index):
""" Get the version information located at index #n in the table
Return boths the verdef structure and an iterator to retrieve
both the version names and dependencies in the form of
verdaux entries
"""
for verdef, verdaux_iter in self.iter_versions():
if verdef['vd_ndx'] == index:
return verdef, verdaux_iter
return None
class GNUVerSymSection(Section):
""" ELF SUNW or GNU Versym table section.
Has an associated SymbolTableSection that's passed in the constructor.
"""
def __init__(self, header, name, elffile, symboltable):
super(GNUVerSymSection, self).__init__(header, name, elffile)
self.symboltable = symboltable
def num_symbols(self):
""" Number of symbols in the table
"""
return self['sh_size'] // self['sh_entsize']
def get_symbol(self, n):
""" Get the symbol at index #n from the table (Symbol object)
It begins at 1 and not 0 since the first entry is used to
store the current version of the syminfo table
"""
# Grab the symbol's entry from the stream
entry_offset = self['sh_offset'] + n * self['sh_entsize']
entry = struct_parse(
self.structs.Elf_Versym,
self.stream,
stream_pos=entry_offset)
# Find the symbol name in the associated symbol table
name = self.symboltable.get_symbol(n).name
return Symbol(entry, name)
def iter_symbols(self):
""" Yield all the symbols in the table
"""
for i in range(self.num_symbols()):
yield self.get_symbol(i)

View File

@@ -1,186 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/hash.py
#
# ELF hash table sections
#
# Andreas Ziegler (andreas.ziegler@fau.de)
# This code is in the public domain
#-------------------------------------------------------------------------------
import struct
from ..common.utils import struct_parse
from .sections import Section
class ELFHashTable(object):
""" Representation of an ELF hash table to find symbols in the
symbol table - useful for super-stripped binaries without section
headers where only the start of the symbol table is known from the
dynamic segment. The layout and contents are nicely described at
https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/.
The symboltable argument needs to implement a get_symbol() method -
in a regular ELF file, this will be the linked symbol table section
as indicated by the sh_link attribute. For super-stripped binaries,
one should use the DynamicSegment object as the symboltable as it
supports symbol lookup without access to a symbol table section.
"""
def __init__(self, elffile, start_offset, symboltable):
self.elffile = elffile
self._symboltable = symboltable
self.params = struct_parse(self.elffile.structs.Elf_Hash,
self.elffile.stream,
start_offset)
def get_number_of_symbols(self):
""" Get the number of symbols from the hash table parameters.
"""
return self.params['nchains']
def get_symbol(self, name):
""" Look up a symbol from this hash table with the given name.
"""
if self.params['nbuckets'] == 0:
return None
hval = self.elf_hash(name) % self.params['nbuckets']
symndx = self.params['buckets'][hval]
while symndx != 0:
sym = self._symboltable.get_symbol(symndx)
if sym.name == name:
return sym
symndx = self.params['chains'][symndx]
return None
@staticmethod
def elf_hash(name):
""" Compute the hash value for a given symbol name.
"""
if not isinstance(name, bytes):
name = name.encode('utf-8')
h = 0
x = 0
for c in bytearray(name):
h = (h << 4) + c
x = h & 0xF0000000
if x != 0:
h ^= (x >> 24)
h &= ~x
return h
class ELFHashSection(Section, ELFHashTable):
""" Section representation of an ELF hash table. In regular ELF files, this
allows us to use the common functions defined on Section objects when
dealing with the hash table.
"""
def __init__(self, header, name, elffile, symboltable):
Section.__init__(self, header, name, elffile)
ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable)
class GNUHashTable(object):
""" Representation of a GNU hash table to find symbols in the
symbol table - useful for super-stripped binaries without section
headers where only the start of the symbol table is known from the
dynamic segment. The layout and contents are nicely described at
https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/.
The symboltable argument needs to implement a get_symbol() method -
in a regular ELF file, this will be the linked symbol table section
as indicated by the sh_link attribute. For super-stripped binaries,
one should use the DynamicSegment object as the symboltable as it
supports symbol lookup without access to a symbol table section.
"""
def __init__(self, elffile, start_offset, symboltable):
self.elffile = elffile
self._symboltable = symboltable
self.params = struct_parse(self.elffile.structs.Gnu_Hash,
self.elffile.stream,
start_offset)
# Element sizes in the hash table
self._wordsize = self.elffile.structs.Elf_word('').sizeof()
self._xwordsize = self.elffile.structs.Elf_xword('').sizeof()
self._chain_pos = start_offset + 4 * self._wordsize + \
self.params['bloom_size'] * self._xwordsize + \
self.params['nbuckets'] * self._wordsize
def get_number_of_symbols(self):
""" Get the number of symbols in the hash table by finding the bucket
with the highest symbol index and walking to the end of its chain.
"""
# Find highest index in buckets array
max_idx = max(self.params['buckets'])
if max_idx < self.params['symoffset']:
return self.params['symoffset']
# Position the stream at the start of the corresponding chain
max_chain_pos = self._chain_pos + \
(max_idx - self.params['symoffset']) * self._wordsize
self.elffile.stream.seek(max_chain_pos)
hash_format = '<I' if self.elffile.little_endian else '>I'
# Walk the chain to its end (lowest bit is set)
while True:
cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
if cur_hash & 1:
return max_idx + 1
max_idx += 1
def _matches_bloom(self, H1):
""" Helper function to check if the given hash could be in the hash
table by testing it against the bloom filter.
"""
arch_bits = self.elffile.elfclass
H2 = H1 >> self.params['bloom_shift']
word_idx = int(H1 / arch_bits) % self.params['bloom_size']
BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits))
return (self.params['bloom'][word_idx] & BITMASK) == BITMASK
def get_symbol(self, name):
""" Look up a symbol from this hash table with the given name.
"""
namehash = self.gnu_hash(name)
if not self._matches_bloom(namehash):
return None
symidx = self.params['buckets'][namehash % self.params['nbuckets']]
if symidx < self.params['symoffset']:
return None
self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize)
hash_format = '<I' if self.elffile.little_endian else '>I'
while True:
cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
if cur_hash | 1 == namehash | 1:
symbol = self._symboltable.get_symbol(symidx)
if name == symbol.name:
return symbol
if cur_hash & 1:
break
symidx += 1
return None
@staticmethod
def gnu_hash(key):
""" Compute the GNU-style hash value for a given symbol name.
"""
if not isinstance(key, bytes):
key = key.encode('utf-8')
h = 5381
for c in bytearray(key):
h = h * 33 + c
return h & 0xFFFFFFFF
class GNUHashSection(Section, GNUHashTable):
""" Section representation of a GNU hash table. In regular ELF files, this
allows us to use the common functions defined on Section objects when
dealing with the hash table.
"""
def __init__(self, header, name, elffile, symboltable):
Section.__init__(self, header, name, elffile)
GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable)

View File

@@ -1,62 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/notes.py
#
# ELF notes
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..common.py3compat import bytes2hex, bytes2str
from ..common.utils import struct_parse, roundup
from ..construct import CString
def iter_notes(elffile, offset, size):
""" Yield all the notes in a section or segment.
"""
end = offset + size
while offset < end:
note = struct_parse(
elffile.structs.Elf_Nhdr,
elffile.stream,
stream_pos=offset)
note['n_offset'] = offset
offset += elffile.structs.Elf_Nhdr.sizeof()
elffile.stream.seek(offset)
# n_namesz is 4-byte aligned.
disk_namesz = roundup(note['n_namesz'], 2)
note['n_name'] = bytes2str(
CString('').parse(elffile.stream.read(disk_namesz)))
offset += disk_namesz
desc_data = elffile.stream.read(note['n_descsz'])
note['n_descdata'] = desc_data
if note['n_type'] == 'NT_GNU_ABI_TAG':
note['n_desc'] = struct_parse(elffile.structs.Elf_abi,
elffile.stream,
offset)
elif note['n_type'] == 'NT_GNU_BUILD_ID':
note['n_desc'] = bytes2hex(desc_data)
elif note['n_type'] == 'NT_GNU_GOLD_VERSION':
note['n_desc'] = bytes2str(desc_data)
elif note['n_type'] == 'NT_PRPSINFO':
note['n_desc'] = struct_parse(elffile.structs.Elf_Prpsinfo,
elffile.stream,
offset)
elif note['n_type'] == 'NT_FILE':
note['n_desc'] = struct_parse(elffile.structs.Elf_Nt_File,
elffile.stream,
offset)
elif note['n_type'] == 'NT_GNU_PROPERTY_TYPE_0':
off = offset
props = []
while off < end:
p = struct_parse(elffile.structs.Elf_Prop, elffile.stream, off)
off += roundup(p.pr_datasz + 8, 2 if elffile.elfclass == 32 else 3)
props.append(p)
note['n_desc'] = props
else:
note['n_desc'] = desc_data
offset += roundup(note['n_descsz'], 2)
note['n_size'] = offset - note['n_offset']
yield note

View File

@@ -1,309 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/relocation.py
#
# ELF relocations
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import namedtuple
from ..common.exceptions import ELFRelocationError
from ..common.utils import elf_assert, struct_parse
from .sections import Section
from .enums import (
ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS,
ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
ENUM_D_TAG)
class Relocation(object):
""" Relocation object - representing a single relocation entry. Allows
dictionary-like access to the entry's fields.
Can be either a REL or RELA relocation.
"""
def __init__(self, entry, elffile):
self.entry = entry
self.elffile = elffile
def is_RELA(self):
""" Is this a RELA relocation? If not, it's REL.
"""
return 'r_addend' in self.entry
def __getitem__(self, name):
""" Dict-like access to entries
"""
return self.entry[name]
def __repr__(self):
return '<Relocation (%s): %s>' % (
'RELA' if self.is_RELA() else 'REL',
self.entry)
def __str__(self):
return self.__repr__()
class RelocationTable(object):
""" Shared functionality between relocation sections and relocation tables
"""
def __init__(self, elffile, offset, size, is_rela):
self._stream = elffile.stream
self._elffile = elffile
self._elfstructs = elffile.structs
self._size = size
self._offset = offset
self._is_rela = is_rela
if is_rela:
self.entry_struct = self._elfstructs.Elf_Rela
else:
self.entry_struct = self._elfstructs.Elf_Rel
self.entry_size = self.entry_struct.sizeof()
def is_RELA(self):
""" Is this a RELA relocation section? If not, it's REL.
"""
return self._is_rela
def num_relocations(self):
""" Number of relocations in the section
"""
return self._size // self.entry_size
def get_relocation(self, n):
""" Get the relocation at index #n from the section (Relocation object)
"""
entry_offset = self._offset + n * self.entry_size
entry = struct_parse(
self.entry_struct,
self._stream,
stream_pos=entry_offset)
return Relocation(entry, self._elffile)
def iter_relocations(self):
""" Yield all the relocations in the section
"""
for i in range(self.num_relocations()):
yield self.get_relocation(i)
class RelocationSection(Section, RelocationTable):
""" ELF relocation section. Serves as a collection of Relocation entries.
"""
def __init__(self, header, name, elffile):
Section.__init__(self, header, name, elffile)
RelocationTable.__init__(self, self.elffile,
self['sh_offset'], self['sh_size'], header['sh_type'] == 'SHT_RELA')
elf_assert(header['sh_type'] in ('SHT_REL', 'SHT_RELA'),
'Unknown relocation type section')
elf_assert(header['sh_entsize'] == self.entry_size,
'Expected sh_entsize of %s section to be %s' % (
header['sh_type'], self.entry_size))
class RelocationHandler(object):
""" Handles the logic of relocations in ELF files.
"""
def __init__(self, elffile):
self.elffile = elffile
def find_relocations_for_section(self, section):
""" Given a section, find the relocation section for it in the ELF
file. Return a RelocationSection object, or None if none was
found.
"""
reloc_section_names = (
'.rel' + section.name,
'.rela' + section.name)
# Find the relocation section aimed at this one. Currently assume
# that either .rel or .rela section exists for this section, but
# not both.
for relsection in self.elffile.iter_sections():
if ( isinstance(relsection, RelocationSection) and
relsection.name in reloc_section_names):
return relsection
return None
def apply_section_relocations(self, stream, reloc_section):
""" Apply all relocations in reloc_section (a RelocationSection object)
to the given stream, that contains the data of the section that is
being relocated. The stream is modified as a result.
"""
# The symbol table associated with this relocation section
symtab = self.elffile.get_section(reloc_section['sh_link'])
for reloc in reloc_section.iter_relocations():
self._do_apply_relocation(stream, reloc, symtab)
def _do_apply_relocation(self, stream, reloc, symtab):
# Preparations for performing the relocation: obtain the value of
# the symbol mentioned in the relocation, as well as the relocation
# recipe which tells us how to actually perform it.
# All peppered with some sanity checking.
if reloc['r_info_sym'] >= symtab.num_symbols():
raise ELFRelocationError(
'Invalid symbol reference in relocation: index %s' % (
reloc['r_info_sym']))
sym_value = symtab.get_symbol(reloc['r_info_sym'])['st_value']
reloc_type = reloc['r_info_type']
recipe = None
if self.elffile.get_machine_arch() == 'x86':
if reloc.is_RELA():
raise ELFRelocationError(
'Unexpected RELA relocation for x86: %s' % reloc)
recipe = self._RELOCATION_RECIPES_X86.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'x64':
if not reloc.is_RELA():
raise ELFRelocationError(
'Unexpected REL relocation for x64: %s' % reloc)
recipe = self._RELOCATION_RECIPES_X64.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'MIPS':
if reloc.is_RELA():
raise ELFRelocationError(
'Unexpected RELA relocation for MIPS: %s' % reloc)
recipe = self._RELOCATION_RECIPES_MIPS.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'ARM':
if reloc.is_RELA():
raise ELFRelocationError(
'Unexpected RELA relocation for ARM: %s' % reloc)
recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'AArch64':
recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None)
elif self.elffile.get_machine_arch() == '64-bit PowerPC':
recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None)
if recipe is None:
raise ELFRelocationError(
'Unsupported relocation type: %s' % reloc_type)
# So now we have everything we need to actually perform the relocation.
# Let's get to it:
# 0. Find out which struct we're going to be using to read this value
# from the stream and write it back.
if recipe.bytesize == 4:
value_struct = self.elffile.structs.Elf_word('')
elif recipe.bytesize == 8:
value_struct = self.elffile.structs.Elf_word64('')
else:
raise ELFRelocationError('Invalid bytesize %s for relocation' %
recipe.bytesize)
# 1. Read the value from the stream (with correct size and endianness)
original_value = struct_parse(
value_struct,
stream,
stream_pos=reloc['r_offset'])
# 2. Apply the relocation to the value, acting according to the recipe
relocated_value = recipe.calc_func(
value=original_value,
sym_value=sym_value,
offset=reloc['r_offset'],
addend=reloc['r_addend'] if recipe.has_addend else 0)
# 3. Write the relocated value back into the stream
stream.seek(reloc['r_offset'])
# Make sure the relocated value fits back by wrapping it around. This
# looks like a problem, but it seems to be the way this is done in
# binutils too.
relocated_value = relocated_value % (2 ** (recipe.bytesize * 8))
value_struct.build_stream(relocated_value, stream)
# Relocations are represented by "recipes". Each recipe specifies:
# bytesize: The number of bytes to read (and write back) to the section.
# This is the unit of data on which relocation is performed.
# has_addend: Does this relocation have an extra addend?
# calc_func: A function that performs the relocation on an extracted
# value, and returns the updated value.
#
_RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE',
'bytesize has_addend calc_func')
def _reloc_calc_identity(value, sym_value, offset, addend=0):
return value
def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0):
return sym_value + value
def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
return sym_value + value - offset
def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
return sym_value + addend
def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0):
return sym_value + addend - offset
def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
return sym_value // 4 + value - offset // 4
_RELOCATION_RECIPES_ARM = {
ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_arm_reloc_calc_sym_plus_value_pcrel),
}
_RELOCATION_RECIPES_AARCH64 = {
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend_pcrel),
}
# https://dmz-portal.mips.com/wiki/MIPS_relocation_types
_RELOCATION_RECIPES_MIPS = {
ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value),
}
_RELOCATION_RECIPES_PPC64 = {
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel),
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
}
_RELOCATION_RECIPES_X86 = {
ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_i386['R_386_PC32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value_pcrel),
}
_RELOCATION_RECIPES_X64 = {
ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend_pcrel),
ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
}

View File

@@ -1,507 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/sections.py
#
# ELF sections
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..common.exceptions import ELFCompressionError
from ..common.utils import struct_parse, elf_assert, parse_cstring_from_stream
from collections import defaultdict
from .constants import SH_FLAGS
from .notes import iter_notes
import zlib
class Section(object):
""" Base class for ELF sections. Also used for all sections types that have
no special functionality.
Allows dictionary-like access to the section header. For example:
> sec = Section(...)
> sec['sh_type'] # section type
"""
def __init__(self, header, name, elffile):
self.header = header
self.name = name
self.elffile = elffile
self.stream = self.elffile.stream
self.structs = self.elffile.structs
self._compressed = header['sh_flags'] & SH_FLAGS.SHF_COMPRESSED
if self.compressed:
# Read the compression header now to know about the size/alignment
# of the decompressed data.
header = struct_parse(self.structs.Elf_Chdr,
self.stream,
stream_pos=self['sh_offset'])
self._compression_type = header['ch_type']
self._decompressed_size = header['ch_size']
self._decompressed_align = header['ch_addralign']
else:
self._decompressed_size = header['sh_size']
self._decompressed_align = header['sh_addralign']
@property
def compressed(self):
""" Is this section compressed?
"""
return self._compressed
@property
def data_size(self):
""" Return the logical size for this section's data.
This can be different from the .sh_size header field when the section
is compressed.
"""
return self._decompressed_size
@property
def data_alignment(self):
""" Return the logical alignment for this section's data.
This can be different from the .sh_addralign header field when the
section is compressed.
"""
return self._decompressed_align
def data(self):
""" The section data from the file.
Note that data is decompressed if the stored section data is
compressed.
"""
# If this section is NOBITS, there is no data. provide a dummy answer
if self.header['sh_type'] == 'SHT_NOBITS':
return b'\0'*self.data_size
# If this section is compressed, deflate it
if self.compressed:
c_type = self._compression_type
if c_type == 'ELFCOMPRESS_ZLIB':
# Read the data to decompress starting right after the
# compression header until the end of the section.
hdr_size = self.structs.Elf_Chdr.sizeof()
self.stream.seek(self['sh_offset'] + hdr_size)
compressed = self.stream.read(self['sh_size'] - hdr_size)
decomp = zlib.decompressobj()
result = decomp.decompress(compressed, self.data_size)
else:
raise ELFCompressionError(
'Unknown compression type: {:#0x}'.format(c_type)
)
if len(result) != self._decompressed_size:
raise ELFCompressionError(
'Decompressed data is {} bytes long, should be {} bytes'
' long'.format(len(result), self._decompressed_size)
)
else:
self.stream.seek(self['sh_offset'])
result = self.stream.read(self._decompressed_size)
return result
def is_null(self):
""" Is this a null section?
"""
return False
def __getitem__(self, name):
""" Implement dict-like access to header entries
"""
return self.header[name]
def __eq__(self, other):
try:
return self.header == other.header
except AttributeError:
return False
def __hash__(self):
return hash(self.header)
class NullSection(Section):
""" ELF NULL section
"""
def is_null(self):
return True
class StringTableSection(Section):
""" ELF string table section.
"""
def get_string(self, offset):
""" Get the string stored at the given offset in this string table.
"""
table_offset = self['sh_offset']
s = parse_cstring_from_stream(self.stream, table_offset + offset)
return s.decode('utf-8', errors='replace') if s else ''
class SymbolTableIndexSection(Section):
""" A section containing the section header table indices corresponding
to symbols in the linked symbol table. This section has to exist if the
symbol table contains an entry with a section header index set to
SHN_XINDEX (0xffff). The format of the section is described at
https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html
"""
def __init__(self, header, name, elffile, symboltable):
super(SymbolTableIndexSection, self).__init__(header, name, elffile)
self.symboltable = symboltable
def get_section_index(self, n):
""" Get the section header table index for the symbol with index #n.
The section contains an array of Elf32_word values with one entry
for every symbol in the associated symbol table.
"""
return struct_parse(self.elffile.structs.Elf_word(''), self.stream,
self['sh_offset'] + n * self['sh_entsize'])
class SymbolTableSection(Section):
""" ELF symbol table section. Has an associated StringTableSection that's
passed in the constructor.
"""
def __init__(self, header, name, elffile, stringtable):
super(SymbolTableSection, self).__init__(header, name, elffile)
self.stringtable = stringtable
elf_assert(self['sh_entsize'] > 0,
'Expected entry size of section %r to be > 0' % name)
elf_assert(self['sh_size'] % self['sh_entsize'] == 0,
'Expected section size to be a multiple of entry size in section %r' % name)
self._symbol_name_map = None
def num_symbols(self):
""" Number of symbols in the table
"""
return self['sh_size'] // self['sh_entsize']
def get_symbol(self, n):
""" Get the symbol at index #n from the table (Symbol object)
"""
# Grab the symbol's entry from the stream
entry_offset = self['sh_offset'] + n * self['sh_entsize']
entry = struct_parse(
self.structs.Elf_Sym,
self.stream,
stream_pos=entry_offset)
# Find the symbol name in the associated string table
name = self.stringtable.get_string(entry['st_name'])
return Symbol(entry, name)
def get_symbol_by_name(self, name):
""" Get a symbol(s) by name. Return None if no symbol by the given name
exists.
"""
# The first time this method is called, construct a name to number
# mapping
#
if self._symbol_name_map is None:
self._symbol_name_map = defaultdict(list)
for i, sym in enumerate(self.iter_symbols()):
self._symbol_name_map[sym.name].append(i)
symnums = self._symbol_name_map.get(name)
return [self.get_symbol(i) for i in symnums] if symnums else None
def iter_symbols(self):
""" Yield all the symbols in the table
"""
for i in range(self.num_symbols()):
yield self.get_symbol(i)
class Symbol(object):
""" Symbol object - representing a single symbol entry from a symbol table
section.
Similarly to Section objects, allows dictionary-like access to the
symbol entry.
"""
def __init__(self, entry, name):
self.entry = entry
self.name = name
def __getitem__(self, name):
""" Implement dict-like access to entries
"""
return self.entry[name]
class SUNWSyminfoTableSection(Section):
""" ELF .SUNW Syminfo table section.
Has an associated SymbolTableSection that's passed in the constructor.
"""
def __init__(self, header, name, elffile, symboltable):
super(SUNWSyminfoTableSection, self).__init__(header, name, elffile)
self.symboltable = symboltable
def num_symbols(self):
""" Number of symbols in the table
"""
return self['sh_size'] // self['sh_entsize'] - 1
def get_symbol(self, n):
""" Get the symbol at index #n from the table (Symbol object).
It begins at 1 and not 0 since the first entry is used to
store the current version of the syminfo table.
"""
# Grab the symbol's entry from the stream
entry_offset = self['sh_offset'] + n * self['sh_entsize']
entry = struct_parse(
self.structs.Elf_Sunw_Syminfo,
self.stream,
stream_pos=entry_offset)
# Find the symbol name in the associated symbol table
name = self.symboltable.get_symbol(n).name
return Symbol(entry, name)
def iter_symbols(self):
""" Yield all the symbols in the table
"""
for i in range(1, self.num_symbols() + 1):
yield self.get_symbol(i)
class NoteSection(Section):
""" ELF NOTE section. Knows how to parse notes.
"""
def iter_notes(self):
""" Yield all the notes in the section. Each result is a dictionary-
like object with "n_name", "n_type", and "n_desc" fields, amongst
others.
"""
return iter_notes(self.elffile, self['sh_offset'], self['sh_size'])
class StabSection(Section):
""" ELF stab section.
"""
def iter_stabs(self):
""" Yield all stab entries. Result type is ELFStructs.Elf_Stabs.
"""
offset = self['sh_offset']
size = self['sh_size']
end = offset + size
while offset < end:
stabs = struct_parse(
self.structs.Elf_Stabs,
self.stream,
stream_pos=offset)
stabs['n_offset'] = offset
offset += self.structs.Elf_Stabs.sizeof()
self.stream.seek(offset)
yield stabs
class ARMAttribute(object):
""" ARM attribute object - representing a build attribute of ARM ELF files.
"""
def __init__(self, structs, stream):
self._tag = struct_parse(structs.Elf_Attribute_Tag, stream)
self.extra = None
if self.tag in ('TAG_FILE', 'TAG_SECTION', 'TAG_SYMBOL'):
self.value = struct_parse(structs.Elf_word('value'), stream)
if self.tag != 'TAG_FILE':
self.extra = []
s_number = struct_parse(structs.Elf_uleb128('s_number'), stream)
while s_number != 0:
self.extra.append(s_number)
s_number = struct_parse(structs.Elf_uleb128('s_number'),
stream
)
elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'):
self.value = struct_parse(structs.Elf_ntbs('value',
encoding='utf-8'),
stream)
elif self.tag == 'TAG_COMPATIBILITY':
self.value = struct_parse(structs.Elf_uleb128('value'), stream)
self.extra = struct_parse(structs.Elf_ntbs('vendor_name',
encoding='utf-8'),
stream)
elif self.tag == 'TAG_ALSO_COMPATIBLE_WITH':
self.value = ARMAttribute(structs, stream)
if type(self.value.value) is not str:
nul = struct_parse(structs.Elf_byte('nul'), stream)
elf_assert(nul == 0,
"Invalid terminating byte %r, expecting NUL." % nul)
else:
self.value = struct_parse(structs.Elf_uleb128('value'), stream)
@property
def tag(self):
return self._tag['tag']
def __repr__(self):
s = '<ARMAttribute (%s): %r>' % (self.tag, self.value)
s += ' %s' % self.extra if self.extra is not None else ''
return s
class ARMAttributesSubsubsection(object):
""" Subsubsection of an ELF .ARM.attributes section's subsection.
"""
def __init__(self, stream, structs, offset):
self.stream = stream
self.offset = offset
self.structs = structs
self.header = ARMAttribute(self.structs, self.stream)
self.attr_start = self.stream.tell()
def iter_attributes(self, tag=None):
""" Yield all attributes (limit to |tag| if specified).
"""
for attribute in self._make_attributes():
if tag is None or attribute.tag == tag:
yield attribute
@property
def num_attributes(self):
""" Number of attributes in the subsubsection.
"""
return sum(1 for _ in self.iter_attributes()) + 1
@property
def attributes(self):
""" List of all attributes in the subsubsection.
"""
return [self.header] + list(self.iter_attributes())
def _make_attributes(self):
""" Create all attributes for this subsubsection except the first one
which is the header.
"""
end = self.offset + self.header.value
self.stream.seek(self.attr_start)
while self.stream.tell() != end:
yield ARMAttribute(self.structs, self.stream)
def __repr__(self):
s = "<ARMAttributesSubsubsection (%s): %d bytes>"
return s % (self.header.tag[4:], self.header.value)
class ARMAttributesSubsection(object):
""" Subsection of an ELF .ARM.attributes section.
"""
def __init__(self, stream, structs, offset):
self.stream = stream
self.offset = offset
self.structs = structs
self.header = struct_parse(self.structs.Elf_Attr_Subsection_Header,
self.stream,
self.offset
)
self.subsubsec_start = self.stream.tell()
def iter_subsubsections(self, scope=None):
""" Yield all subsubsections (limit to |scope| if specified).
"""
for subsubsec in self._make_subsubsections():
if scope is None or subsubsec.header.tag == scope:
yield subsubsec
@property
def num_subsubsections(self):
""" Number of subsubsections in the subsection.
"""
return sum(1 for _ in self.iter_subsubsections())
@property
def subsubsections(self):
""" List of all subsubsections in the subsection.
"""
return list(self.iter_subsubsections())
def _make_subsubsections(self):
""" Create all subsubsections for this subsection.
"""
end = self.offset + self['length']
self.stream.seek(self.subsubsec_start)
while self.stream.tell() != end:
subsubsec = ARMAttributesSubsubsection(self.stream,
self.structs,
self.stream.tell())
self.stream.seek(self.subsubsec_start + subsubsec.header.value)
yield subsubsec
def __getitem__(self, name):
""" Implement dict-like access to header entries.
"""
return self.header[name]
def __repr__(self):
s = "<ARMAttributesSubsection (%s): %d bytes>"
return s % (self.header['vendor_name'], self.header['length'])
class ARMAttributesSection(Section):
""" ELF .ARM.attributes section.
"""
def __init__(self, header, name, elffile):
super(ARMAttributesSection, self).__init__(header, name, elffile)
fv = struct_parse(self.structs.Elf_byte('format_version'),
self.stream,
self['sh_offset']
)
elf_assert(chr(fv) == 'A',
"Unknown attributes version %s, expecting 'A'." % chr(fv)
)
self.subsec_start = self.stream.tell()
def iter_subsections(self, vendor_name=None):
""" Yield all subsections (limit to |vendor_name| if specified).
"""
for subsec in self._make_subsections():
if vendor_name is None or subsec['vendor_name'] == vendor_name:
yield subsec
@property
def num_subsections(self):
""" Number of subsections in the section.
"""
return sum(1 for _ in self.iter_subsections())
@property
def subsections(self):
""" List of all subsections in the section.
"""
return list(self.iter_subsections())
def _make_subsections(self):
""" Create all subsections for this section.
"""
end = self['sh_offset'] + self.data_size
self.stream.seek(self.subsec_start)
while self.stream.tell() != end:
subsec = ARMAttributesSubsection(self.stream,
self.structs,
self.stream.tell())
self.stream.seek(self.subsec_start + subsec['length'])
yield subsec

View File

@@ -1,121 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/segments.py
#
# ELF segments
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..construct import CString
from ..common.utils import struct_parse
from .constants import SH_FLAGS
from .notes import iter_notes
class Segment(object):
def __init__(self, header, stream):
self.header = header
self.stream = stream
def data(self):
""" The segment data from the file.
"""
self.stream.seek(self['p_offset'])
return self.stream.read(self['p_filesz'])
def __getitem__(self, name):
""" Implement dict-like access to header entries
"""
return self.header[name]
def section_in_segment(self, section):
""" Is the given section contained in this segment?
Note: this tries to reproduce the intricate rules of the
ELF_SECTION_IN_SEGMENT_STRICT macro of the header
elf/include/internal.h in the source of binutils.
"""
# Only the 'strict' checks from ELF_SECTION_IN_SEGMENT_1 are included
segtype = self['p_type']
sectype = section['sh_type']
secflags = section['sh_flags']
# Only PT_LOAD, PT_GNU_RELRO and PT_TLS segments can contain SHF_TLS
# sections
if ( secflags & SH_FLAGS.SHF_TLS and
segtype in ('PT_TLS', 'PT_GNU_RELRO', 'PT_LOAD')):
pass
# PT_TLS segment contains only SHF_TLS sections, PT_PHDR no sections
# at all
elif ( (secflags & SH_FLAGS.SHF_TLS) == 0 and
segtype not in ('PT_TLS', 'PT_PHDR')):
pass
else:
return False
# PT_LOAD and similar segments only have SHF_ALLOC sections.
if ( (secflags & SH_FLAGS.SHF_ALLOC) == 0 and
segtype in ('PT_LOAD', 'PT_DYNAMIC', 'PT_GNU_EH_FRAME',
'PT_GNU_RELRO', 'PT_GNU_STACK')):
return False
# In ELF_SECTION_IN_SEGMENT_STRICT the flag check_vma is on, so if
# this is an alloc section, check whether its VMA is in bounds.
if secflags & SH_FLAGS.SHF_ALLOC:
secaddr = section['sh_addr']
vaddr = self['p_vaddr']
# This checks that the section is wholly contained in the segment.
# The third condition is the 'strict' one - an empty section will
# not match at the very end of the segment (unless the segment is
# also zero size, which is handled by the second condition).
if not (secaddr >= vaddr and
secaddr - vaddr + section['sh_size'] <= self['p_memsz'] and
secaddr - vaddr <= self['p_memsz'] - 1):
return False
# If we've come this far and it's a NOBITS section, it's in the segment
if sectype == 'SHT_NOBITS':
return True
secoffset = section['sh_offset']
poffset = self['p_offset']
# Same logic as with secaddr vs. vaddr checks above, just on offsets in
# the file
return (secoffset >= poffset and
secoffset - poffset + section['sh_size'] <= self['p_filesz'] and
secoffset - poffset <= self['p_filesz'] - 1)
class InterpSegment(Segment):
""" INTERP segment. Knows how to obtain the path to the interpreter used
for this ELF file.
"""
def __init__(self, header, stream):
super(InterpSegment, self).__init__(header, stream)
def get_interp_name(self):
""" Obtain the interpreter path used for this ELF file.
"""
path_offset = self['p_offset']
return struct_parse(
CString('', encoding='utf-8'),
self.stream,
stream_pos=path_offset)
class NoteSegment(Segment):
""" NOTE segment. Knows how to parse notes.
"""
def __init__(self, header, stream, elffile):
super(NoteSegment, self).__init__(header, stream)
self.elffile = elffile
def iter_notes(self):
""" Yield all the notes in the segment. Each result is a dictionary-
like object with "n_name", "n_type", and "n_desc" fields, amongst
others.
"""
return iter_notes(self.elffile, self['p_offset'], self['p_filesz'])

View File

@@ -1,531 +0,0 @@
#-------------------------------------------------------------------------------
# elftools: elf/structs.py
#
# Encapsulation of Construct structs for parsing an ELF file, adjusted for
# correct endianness and word-size.
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..construct import (
UBInt8, UBInt16, UBInt32, UBInt64,
ULInt8, ULInt16, ULInt32, ULInt64,
SBInt32, SLInt32, SBInt64, SLInt64,
Struct, Array, Enum, Padding, BitStruct, BitField, Value, String, CString,
Switch, Field
)
from ..common.construct_utils import ULEB128
from ..common.utils import roundup
from .enums import *
class ELFStructs(object):
""" Accessible attributes:
Elf_{byte|half|word|word64|addr|offset|sword|xword|xsword}:
Data chunks, as specified by the ELF standard, adjusted for
correct endianness and word-size.
Elf_Ehdr:
ELF file header
Elf_Phdr:
Program header
Elf_Shdr:
Section header
Elf_Sym:
Symbol table entry
Elf_Rel, Elf_Rela:
Entries in relocation sections
"""
def __init__(self, little_endian=True, elfclass=32):
assert elfclass == 32 or elfclass == 64
self.little_endian = little_endian
self.elfclass = elfclass
self.e_type = None
self.e_machine = None
self.e_ident_osabi = None
def __getstate__(self):
return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi
def __setstate__(self, state):
self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state
self.create_basic_structs()
self.create_advanced_structs(e_type, e_machine, e_osabi)
def create_basic_structs(self):
""" Create word-size related structs and ehdr struct needed for
initial determining of ELF type.
"""
if self.little_endian:
self.Elf_byte = ULInt8
self.Elf_half = ULInt16
self.Elf_word = ULInt32
self.Elf_word64 = ULInt64
self.Elf_addr = ULInt32 if self.elfclass == 32 else ULInt64
self.Elf_offset = self.Elf_addr
self.Elf_sword = SLInt32
self.Elf_xword = ULInt32 if self.elfclass == 32 else ULInt64
self.Elf_sxword = SLInt32 if self.elfclass == 32 else SLInt64
else:
self.Elf_byte = UBInt8
self.Elf_half = UBInt16
self.Elf_word = UBInt32
self.Elf_word64 = UBInt64
self.Elf_addr = UBInt32 if self.elfclass == 32 else UBInt64
self.Elf_offset = self.Elf_addr
self.Elf_sword = SBInt32
self.Elf_xword = UBInt32 if self.elfclass == 32 else UBInt64
self.Elf_sxword = SBInt32 if self.elfclass == 32 else SBInt64
self._create_ehdr()
self._create_leb128()
self._create_ntbs()
def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=None):
""" Create all ELF structs except the ehdr. They may possibly depend
on provided e_type and/or e_machine parsed from ehdr.
"""
self.e_type = e_type
self.e_machine = e_machine
self.e_ident_osabi = e_ident_osabi
self._create_phdr()
self._create_shdr()
self._create_chdr()
self._create_sym()
self._create_rel()
self._create_dyn()
self._create_sunw_syminfo()
self._create_gnu_verneed()
self._create_gnu_verdef()
self._create_gnu_versym()
self._create_gnu_abi()
self._create_gnu_property()
self._create_note(e_type)
self._create_stabs()
self._create_arm_attributes()
self._create_elf_hash()
self._create_gnu_hash()
#-------------------------------- PRIVATE --------------------------------#
def _create_ehdr(self):
self.Elf_Ehdr = Struct('Elf_Ehdr',
Struct('e_ident',
Array(4, self.Elf_byte('EI_MAG')),
Enum(self.Elf_byte('EI_CLASS'), **ENUM_EI_CLASS),
Enum(self.Elf_byte('EI_DATA'), **ENUM_EI_DATA),
Enum(self.Elf_byte('EI_VERSION'), **ENUM_E_VERSION),
Enum(self.Elf_byte('EI_OSABI'), **ENUM_EI_OSABI),
self.Elf_byte('EI_ABIVERSION'),
Padding(7)
),
Enum(self.Elf_half('e_type'), **ENUM_E_TYPE),
Enum(self.Elf_half('e_machine'), **ENUM_E_MACHINE),
Enum(self.Elf_word('e_version'), **ENUM_E_VERSION),
self.Elf_addr('e_entry'),
self.Elf_offset('e_phoff'),
self.Elf_offset('e_shoff'),
self.Elf_word('e_flags'),
self.Elf_half('e_ehsize'),
self.Elf_half('e_phentsize'),
self.Elf_half('e_phnum'),
self.Elf_half('e_shentsize'),
self.Elf_half('e_shnum'),
self.Elf_half('e_shstrndx'),
)
def _create_leb128(self):
self.Elf_uleb128 = ULEB128
def _create_ntbs(self):
self.Elf_ntbs = CString
def _create_phdr(self):
p_type_dict = ENUM_P_TYPE_BASE
if self.e_machine == 'EM_ARM':
p_type_dict = ENUM_P_TYPE_ARM
elif self.e_machine == 'EM_AARCH64':
p_type_dict = ENUM_P_TYPE_AARCH64
elif self.e_machine == 'EM_MIPS':
p_type_dict = ENUM_P_TYPE_MIPS
if self.elfclass == 32:
self.Elf_Phdr = Struct('Elf_Phdr',
Enum(self.Elf_word('p_type'), **p_type_dict),
self.Elf_offset('p_offset'),
self.Elf_addr('p_vaddr'),
self.Elf_addr('p_paddr'),
self.Elf_word('p_filesz'),
self.Elf_word('p_memsz'),
self.Elf_word('p_flags'),
self.Elf_word('p_align'),
)
else: # 64
self.Elf_Phdr = Struct('Elf_Phdr',
Enum(self.Elf_word('p_type'), **p_type_dict),
self.Elf_word('p_flags'),
self.Elf_offset('p_offset'),
self.Elf_addr('p_vaddr'),
self.Elf_addr('p_paddr'),
self.Elf_xword('p_filesz'),
self.Elf_xword('p_memsz'),
self.Elf_xword('p_align'),
)
def _create_shdr(self):
"""Section header parsing.
Depends on e_machine because of machine-specific values in sh_type.
"""
sh_type_dict = ENUM_SH_TYPE_BASE
if self.e_machine == 'EM_ARM':
sh_type_dict = ENUM_SH_TYPE_ARM
elif self.e_machine == 'EM_X86_64':
sh_type_dict = ENUM_SH_TYPE_AMD64
elif self.e_machine == 'EM_MIPS':
sh_type_dict = ENUM_SH_TYPE_MIPS
self.Elf_Shdr = Struct('Elf_Shdr',
self.Elf_word('sh_name'),
Enum(self.Elf_word('sh_type'), **sh_type_dict),
self.Elf_xword('sh_flags'),
self.Elf_addr('sh_addr'),
self.Elf_offset('sh_offset'),
self.Elf_xword('sh_size'),
self.Elf_word('sh_link'),
self.Elf_word('sh_info'),
self.Elf_xword('sh_addralign'),
self.Elf_xword('sh_entsize'),
)
def _create_chdr(self):
# Structure of compressed sections header. It is documented in Oracle
# "Linker and Libraries Guide", Part IV ELF Application Binary
# Interface, Chapter 13 Object File Format, Section Compression:
# https://docs.oracle.com/cd/E53394_01/html/E54813/section_compression.html
fields = [
Enum(self.Elf_word('ch_type'), **ENUM_ELFCOMPRESS_TYPE),
self.Elf_xword('ch_size'),
self.Elf_xword('ch_addralign'),
]
if self.elfclass == 64:
fields.insert(1, self.Elf_word('ch_reserved'))
self.Elf_Chdr = Struct('Elf_Chdr', *fields)
def _create_rel(self):
# r_info is also taken apart into r_info_sym and r_info_type. This is
# done in Value to avoid endianity issues while parsing.
if self.elfclass == 32:
fields = [self.Elf_xword('r_info'),
Value('r_info_sym',
lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF),
Value('r_info_type',
lambda ctx: ctx['r_info'] & 0xFF)]
elif self.e_machine == 'EM_MIPS': # ELF64 MIPS
fields = [
# The MIPS ELF64 specification
# (https://www.linux-mips.org/pub/linux/mips/doc/ABI/elf64-2.4.pdf)
# provides a non-standard relocation structure definition.
self.Elf_word('r_sym'),
self.Elf_byte('r_ssym'),
self.Elf_byte('r_type3'),
self.Elf_byte('r_type2'),
self.Elf_byte('r_type'),
# Synthetize usual fields for compatibility with other
# architectures. This allows relocation consumers (including
# our readelf tests) to work without worrying about MIPS64
# oddities.
Value('r_info_sym', lambda ctx: ctx['r_sym']),
Value('r_info_ssym', lambda ctx: ctx['r_ssym']),
Value('r_info_type', lambda ctx: ctx['r_type']),
Value('r_info_type2', lambda ctx: ctx['r_type2']),
Value('r_info_type3', lambda ctx: ctx['r_type3']),
Value('r_info',
lambda ctx: (ctx['r_sym'] << 32)
| (ctx['r_ssym'] << 24)
| (ctx['r_type3'] << 16)
| (ctx['r_type2'] << 8)
| ctx['r_type']),
]
else: # Other 64 ELFs
fields = [self.Elf_xword('r_info'),
Value('r_info_sym',
lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF),
Value('r_info_type',
lambda ctx: ctx['r_info'] & 0xFFFFFFFF)]
self.Elf_Rel = Struct('Elf_Rel',
self.Elf_addr('r_offset'),
*fields)
fields_and_addend = fields + [self.Elf_sxword('r_addend')]
self.Elf_Rela = Struct('Elf_Rela',
self.Elf_addr('r_offset'),
*fields_and_addend
)
def _create_dyn(self):
d_tag_dict = dict(ENUM_D_TAG_COMMON)
if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE:
d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine])
elif self.e_ident_osabi == 'ELFOSABI_SOLARIS':
d_tag_dict.update(ENUM_D_TAG_SOLARIS)
self.Elf_Dyn = Struct('Elf_Dyn',
Enum(self.Elf_sxword('d_tag'), **d_tag_dict),
self.Elf_xword('d_val'),
Value('d_ptr', lambda ctx: ctx['d_val']),
)
def _create_sym(self):
# st_info is hierarchical. To access the type, use
# container['st_info']['type']
st_info_struct = BitStruct('st_info',
Enum(BitField('bind', 4), **ENUM_ST_INFO_BIND),
Enum(BitField('type', 4), **ENUM_ST_INFO_TYPE))
# st_other is hierarchical. To access the visibility,
# use container['st_other']['visibility']
st_other_struct = BitStruct('st_other',
# https://openpowerfoundation.org/wp-content/uploads/2016/03/ABI64BitOpenPOWERv1.1_16July2015_pub4.pdf
# See 3.4.1 Symbol Values.
Enum(BitField('local', 3), **ENUM_ST_LOCAL),
Padding(2),
Enum(BitField('visibility', 3), **ENUM_ST_VISIBILITY))
if self.elfclass == 32:
self.Elf_Sym = Struct('Elf_Sym',
self.Elf_word('st_name'),
self.Elf_addr('st_value'),
self.Elf_word('st_size'),
st_info_struct,
st_other_struct,
Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX),
)
else:
self.Elf_Sym = Struct('Elf_Sym',
self.Elf_word('st_name'),
st_info_struct,
st_other_struct,
Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX),
self.Elf_addr('st_value'),
self.Elf_xword('st_size'),
)
def _create_sunw_syminfo(self):
self.Elf_Sunw_Syminfo = Struct('Elf_Sunw_Syminfo',
Enum(self.Elf_half('si_boundto'), **ENUM_SUNW_SYMINFO_BOUNDTO),
self.Elf_half('si_flags'),
)
def _create_gnu_verneed(self):
# Structure of "version needed" entries is documented in
# Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
self.Elf_Verneed = Struct('Elf_Verneed',
self.Elf_half('vn_version'),
self.Elf_half('vn_cnt'),
self.Elf_word('vn_file'),
self.Elf_word('vn_aux'),
self.Elf_word('vn_next'),
)
self.Elf_Vernaux = Struct('Elf_Vernaux',
self.Elf_word('vna_hash'),
self.Elf_half('vna_flags'),
self.Elf_half('vna_other'),
self.Elf_word('vna_name'),
self.Elf_word('vna_next'),
)
def _create_gnu_verdef(self):
# Structure of "version definition" entries are documented in
# Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
self.Elf_Verdef = Struct('Elf_Verdef',
self.Elf_half('vd_version'),
self.Elf_half('vd_flags'),
self.Elf_half('vd_ndx'),
self.Elf_half('vd_cnt'),
self.Elf_word('vd_hash'),
self.Elf_word('vd_aux'),
self.Elf_word('vd_next'),
)
self.Elf_Verdaux = Struct('Elf_Verdaux',
self.Elf_word('vda_name'),
self.Elf_word('vda_next'),
)
def _create_gnu_versym(self):
# Structure of "version symbol" entries are documented in
# Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
self.Elf_Versym = Struct('Elf_Versym',
Enum(self.Elf_half('ndx'), **ENUM_VERSYM),
)
def _create_gnu_abi(self):
# Structure of GNU ABI notes is documented in
# https://code.woboq.org/userspace/glibc/csu/abi-note.S.html
self.Elf_abi = Struct('Elf_abi',
Enum(self.Elf_word('abi_os'), **ENUM_NOTE_ABI_TAG_OS),
self.Elf_word('abi_major'),
self.Elf_word('abi_minor'),
self.Elf_word('abi_tiny'),
)
def _create_gnu_property(self):
# Structure of GNU property notes is documented in
# https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf
def roundup_padding(ctx):
if self.elfclass == 32:
return roundup(ctx.pr_datasz, 2) - ctx.pr_datasz
return roundup(ctx.pr_datasz, 3) - ctx.pr_datasz
def classify_pr_data(ctx):
if type(ctx.pr_type) is not str:
return None
if ctx.pr_type.startswith('GNU_PROPERTY_X86_'):
return ('GNU_PROPERTY_X86_*', 4, 0)
return (ctx.pr_type, ctx.pr_datasz, self.elfclass)
self.Elf_Prop = Struct('Elf_Prop',
Enum(self.Elf_word('pr_type'), **ENUM_NOTE_GNU_PROPERTY_TYPE),
self.Elf_word('pr_datasz'),
Switch('pr_data', classify_pr_data, {
('GNU_PROPERTY_STACK_SIZE', 4, 32): self.Elf_word('pr_data'),
('GNU_PROPERTY_STACK_SIZE', 8, 64): self.Elf_word64('pr_data'),
('GNU_PROPERTY_X86_*', 4, 0): self.Elf_word('pr_data'),
},
default=Field('pr_data', lambda ctx: ctx.pr_datasz)
),
Padding(roundup_padding)
)
def _create_note(self, e_type=None):
# Structure of "PT_NOTE" section
self.Elf_ugid = self.Elf_half if self.elfclass == 32 and self.e_machine in {
'EM_MN10300',
'EM_ARM',
'EM_CRIS',
'EM_CYGNUS_FRV',
'EM_386',
'EM_M32R',
'EM_68K',
'EM_S390',
'EM_SH',
'EM_SPARC',
} else self.Elf_word
self.Elf_Nhdr = Struct('Elf_Nhdr',
self.Elf_word('n_namesz'),
self.Elf_word('n_descsz'),
Enum(self.Elf_word('n_type'),
**(ENUM_NOTE_N_TYPE if e_type != "ET_CORE"
else ENUM_CORE_NOTE_N_TYPE)),
)
# A process psinfo structure according to
# http://elixir.free-electrons.com/linux/v2.6.35/source/include/linux/elfcore.h#L84
if self.elfclass == 32:
self.Elf_Prpsinfo = Struct('Elf_Prpsinfo',
self.Elf_byte('pr_state'),
String('pr_sname', 1),
self.Elf_byte('pr_zomb'),
self.Elf_byte('pr_nice'),
self.Elf_xword('pr_flag'),
self.Elf_ugid('pr_uid'),
self.Elf_ugid('pr_gid'),
self.Elf_word('pr_pid'),
self.Elf_word('pr_ppid'),
self.Elf_word('pr_pgrp'),
self.Elf_word('pr_sid'),
String('pr_fname', 16),
String('pr_psargs', 80),
)
else: # 64
self.Elf_Prpsinfo = Struct('Elf_Prpsinfo',
self.Elf_byte('pr_state'),
String('pr_sname', 1),
self.Elf_byte('pr_zomb'),
self.Elf_byte('pr_nice'),
Padding(4),
self.Elf_xword('pr_flag'),
self.Elf_ugid('pr_uid'),
self.Elf_ugid('pr_gid'),
self.Elf_word('pr_pid'),
self.Elf_word('pr_ppid'),
self.Elf_word('pr_pgrp'),
self.Elf_word('pr_sid'),
String('pr_fname', 16),
String('pr_psargs', 80),
)
# A PT_NOTE of type NT_FILE matching the definition in
# https://chromium.googlesource.com/
# native_client/nacl-binutils/+/upstream/master/binutils/readelf.c
# Line 15121
self.Elf_Nt_File = Struct('Elf_Nt_File',
self.Elf_xword("num_map_entries"),
self.Elf_xword("page_size"),
Array(lambda ctx: ctx.num_map_entries,
Struct('Elf_Nt_File_Entry',
self.Elf_addr('vm_start'),
self.Elf_addr('vm_end'),
self.Elf_offset('page_offset'))),
Array(lambda ctx: ctx.num_map_entries,
CString('filename')))
def _create_stabs(self):
# Structure of one stabs entry, see binutils/bfd/stabs.c
# Names taken from https://sourceware.org/gdb/current/onlinedocs/stabs.html#Overview
self.Elf_Stabs = Struct('Elf_Stabs',
self.Elf_word('n_strx'),
self.Elf_byte('n_type'),
self.Elf_byte('n_other'),
self.Elf_half('n_desc'),
self.Elf_word('n_value'),
)
def _create_arm_attributes(self):
# Structure of a build attributes subsection header. A subsection is
# either public to all tools that process the ELF file or private to
# the vendor's tools.
self.Elf_Attr_Subsection_Header = Struct('Elf_Attr_Subsection',
self.Elf_word('length'),
self.Elf_ntbs('vendor_name',
encoding='utf-8')
)
# Structure of a build attribute tag.
self.Elf_Attribute_Tag = Struct('Elf_Attribute_Tag',
Enum(self.Elf_uleb128('tag'),
**ENUM_ATTR_TAG_ARM)
)
def _create_elf_hash(self):
# Structure of the old SYSV-style hash table header. It is documented
# in the Oracle "Linker and Libraries Guide", Part IV ELF Application
# Binary Interface, Chapter 14 Object File Format, Section Hash Table
# Section:
# https://docs.oracle.com/cd/E53394_01/html/E54813/chapter6-48031.html
self.Elf_Hash = Struct('Elf_Hash',
self.Elf_word('nbuckets'),
self.Elf_word('nchains'),
Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')),
Array(lambda ctx: ctx['nchains'], self.Elf_word('chains')))
def _create_gnu_hash(self):
# Structure of the GNU-style hash table header. Documentation for this
# table is mostly in the GLIBC source code, a good explanation of the
# format can be found in this blog post:
# https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/
self.Gnu_Hash = Struct('Gnu_Hash',
self.Elf_word('nbuckets'),
self.Elf_word('symoffset'),
self.Elf_word('bloom_size'),
self.Elf_word('bloom_shift'),
Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')),
Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')))

View File

@@ -1,116 +0,0 @@
#-------------------------------------------------------------------------------
# elftools example: dwarf_decode_address.py
#
# Decode an address in an ELF file to find out which function it belongs to
# and from which filename/line it comes in the original source file.
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from __future__ import print_function
import sys
# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
sys.path[0:0] = ['.', '..']
from elftools.common.py3compat import maxint, bytes2str
from elftools.dwarf.descriptions import describe_form_class
from elftools.elf.elffile import ELFFile
def process_file(filename, address):
print('Processing file:', filename)
with open(filename, 'rb') as f:
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
print(' file has no DWARF info')
return
# get_dwarf_info returns a DWARFInfo context object, which is the
# starting point for all DWARF-based processing in pyelftools.
dwarfinfo = elffile.get_dwarf_info()
funcname = decode_funcname(dwarfinfo, address)
file, line = decode_file_line(dwarfinfo, address)
print('Function:', bytes2str(funcname))
print('File:', bytes2str(file))
print('Line:', line)
def decode_funcname(dwarfinfo, address):
# Go over all DIEs in the DWARF information, looking for a subprogram
# entry with an address range that includes the given address. Note that
# this simplifies things by disregarding subprograms that may have
# split address ranges.
for CU in dwarfinfo.iter_CUs():
for DIE in CU.iter_DIEs():
try:
if DIE.tag == 'DW_TAG_subprogram':
lowpc = DIE.attributes['DW_AT_low_pc'].value
# DWARF v4 in section 2.17 describes how to interpret the
# DW_AT_high_pc attribute based on the class of its form.
# For class 'address' it's taken as an absolute address
# (similarly to DW_AT_low_pc); for class 'constant', it's
# an offset from DW_AT_low_pc.
highpc_attr = DIE.attributes['DW_AT_high_pc']
highpc_attr_class = describe_form_class(highpc_attr.form)
if highpc_attr_class == 'address':
highpc = highpc_attr.value
elif highpc_attr_class == 'constant':
highpc = lowpc + highpc_attr.value
else:
print('Error: invalid DW_AT_high_pc class:',
highpc_attr_class)
continue
if lowpc <= address < highpc:
return DIE.attributes['DW_AT_name'].value
except KeyError:
continue
return None
def decode_file_line(dwarfinfo, address):
# Go over all the line programs in the DWARF information, looking for
# one that describes the given address.
for CU in dwarfinfo.iter_CUs():
# First, look at line programs to find the file/line for the address
lineprog = dwarfinfo.line_program_for_CU(CU)
prevstate = None
for entry in lineprog.get_entries():
# We're interested in those entries where a new state is assigned
if entry.state is None:
continue
# Looking for a range of addresses in two consecutive states that
# contain the required address.
if prevstate and prevstate.address <= address < entry.state.address:
filename = lineprog['file_entry'][prevstate.file - 1].name
line = prevstate.line
return filename, line
if entry.state.end_sequence:
# For the state with `end_sequence`, `address` means the address
# of the first byte after the target machine instruction
# sequence and other information is meaningless. We clear
# prevstate so that it's not used in the next iteration. Address
# info is used in the above comparison to see if we need to use
# the line information for the prevstate.
prevstate = None
else:
prevstate = entry.state
return None, None
if __name__ == '__main__':
if sys.argv[1] == '--test':
process_file(sys.argv[2], 0x400503)
sys.exit(0)
if len(sys.argv) < 3:
print('Expected usage: {0} <address> <executable>'.format(sys.argv[0]))
sys.exit(1)
addr = int(sys.argv[1], 0)
process_file(sys.argv[2], addr)

View File

@@ -1,66 +0,0 @@
#-------------------------------------------------------------------------------
# elftools example: dwarf_die_tree.py
#
# In the .debug_info section, Dwarf Information Entries (DIEs) form a tree.
# pyelftools provides easy access to this tree, as demonstrated here.
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from __future__ import print_function
import sys
# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
sys.path[0:0] = ['.', '..']
from elftools.elf.elffile import ELFFile
def process_file(filename):
print('Processing file:', filename)
with open(filename, 'rb') as f:
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
print(' file has no DWARF info')
return
# get_dwarf_info returns a DWARFInfo context object, which is the
# starting point for all DWARF-based processing in pyelftools.
dwarfinfo = elffile.get_dwarf_info()
for CU in dwarfinfo.iter_CUs():
# DWARFInfo allows to iterate over the compile units contained in
# the .debug_info section. CU is a CompileUnit object, with some
# computed attributes (such as its offset in the section) and
# a header which conforms to the DWARF standard. The access to
# header elements is, as usual, via item-lookup.
print(' Found a compile unit at offset %s, length %s' % (
CU.cu_offset, CU['unit_length']))
# Start with the top DIE, the root for this CU's DIE tree
top_DIE = CU.get_top_DIE()
print(' Top DIE with tag=%s' % top_DIE.tag)
# We're interested in the filename...
print(' name=%s' % top_DIE.get_full_path())
# Display DIEs recursively starting with top_DIE
die_info_rec(top_DIE)
def die_info_rec(die, indent_level=' '):
""" A recursive function for showing information about a DIE and its
children.
"""
print(indent_level + 'DIE tag=%s' % die.tag)
child_indent = indent_level + ' '
for child in die.iter_children():
die_info_rec(child, child_indent)
if __name__ == '__main__':
if sys.argv[1] == '--test':
for filename in sys.argv[2:]:
process_file(filename)

View File

@@ -1,95 +0,0 @@
#-------------------------------------------------------------------------------
# elftools example: dwarf_lineprogram_filenames.py
#
# In the .debug_line section, the Dwarf line program generates a matrix
# of address-source references. This example demonstrates accessing the state
# of each line program entry to retrieve the underlying filenames.
#
# William Woodruff (william@yossarian.net)
# This code is in the public domain
#-------------------------------------------------------------------------------
from __future__ import print_function
from collections import defaultdict
import os
import sys
# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
sys.path[0:0] = ['.', '..']
from elftools.elf.elffile import ELFFile
def process_file(filename):
print('Processing file:', filename)
with open(filename, 'rb') as f:
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
print(' file has no DWARF info')
return
dwarfinfo = elffile.get_dwarf_info()
for CU in dwarfinfo.iter_CUs():
print(' Found a compile unit at offset %s, length %s' % (
CU.cu_offset, CU['unit_length']))
# Every compilation unit in the DWARF information may or may not
# have a corresponding line program in .debug_line.
line_program = dwarfinfo.line_program_for_CU(CU)
if line_program is None:
print(' DWARF info is missing a line program for this CU')
continue
# Print a reverse mapping of filename -> #entries
line_entry_mapping(line_program)
def line_entry_mapping(line_program):
filename_map = defaultdict(int)
# The line program, when decoded, returns a list of line program
# entries. Each entry contains a state, which we'll use to build
# a reverse mapping of filename -> #entries.
lp_entries = line_program.get_entries()
for lpe in lp_entries:
# We skip LPEs that don't have an associated file.
# This can happen if instructions in the compiled binary
# don't correspond directly to any original source file.
if not lpe.state or lpe.state.file == 0:
continue
filename = lpe_filename(line_program, lpe.state.file)
filename_map[filename] += 1
for filename, lpe_count in filename_map.items():
print(" filename=%s -> %d entries" % (filename, lpe_count))
def lpe_filename(line_program, file_index):
# Retrieving the filename associated with a line program entry
# involves two levels of indirection: we take the file index from
# the LPE to grab the file_entry from the line program header,
# then take the directory index from the file_entry to grab the
# directory name from the line program header. Finally, we
# join the (base) filename from the file_entry to the directory
# name to get the absolute filename.
lp_header = line_program.header
file_entries = lp_header["file_entry"]
# File and directory indices are 1-indexed.
file_entry = file_entries[file_index - 1]
dir_index = file_entry["dir_index"]
# A dir_index of 0 indicates that no absolute directory was recorded during
# compilation; return just the basename.
if dir_index == 0:
return file_entry.name.decode()
directory = lp_header["include_directory"][dir_index - 1]
return os.path.join(directory, file_entry.name).decode()
if __name__ == '__main__':
if sys.argv[1] == '--test':
for filename in sys.argv[2:]:
process_file(filename)

View File

@@ -1,111 +0,0 @@
#-------------------------------------------------------------------------------
# elftools example: dwarf_location_info.py
#
# Examine DIE entries which have either location list values or location
# expression values and decode that information.
#
# Location information can either be completely contained within a DIE
# (using 'DW_FORM_exprloc' in DWARFv4 or 'DW_FORM_block1' in earlier
# versions) or be a reference to a location list contained within
# the .debug_loc section (using 'DW_FORM_sec_offset' in DWARFv4 or
# 'DW_FORM_data4' / 'DW_FORM_data8' in earlier versions).
#
# The LocationParser object parses the DIE attributes and handles both
# formats.
#
# The directory 'test/testfiles_for_location_info' contains test files with
# location information represented in both DWARFv4 and DWARFv2 forms.
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from __future__ import print_function
import sys
# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
sys.path[0:0] = ['.', '..']
from elftools.common.py3compat import itervalues
from elftools.elf.elffile import ELFFile
from elftools.dwarf.descriptions import (
describe_DWARF_expr, set_global_machine_arch)
from elftools.dwarf.locationlists import (
LocationEntry, LocationExpr, LocationParser)
def process_file(filename):
print('Processing file:', filename)
with open(filename, 'rb') as f:
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
print(' file has no DWARF info')
return
# get_dwarf_info returns a DWARFInfo context object, which is the
# starting point for all DWARF-based processing in pyelftools.
dwarfinfo = elffile.get_dwarf_info()
# The location lists are extracted by DWARFInfo from the .debug_loc
# section, and returned here as a LocationLists object.
location_lists = dwarfinfo.location_lists()
# This is required for the descriptions module to correctly decode
# register names contained in DWARF expressions.
set_global_machine_arch(elffile.get_machine_arch())
# Create a LocationParser object that parses the DIE attributes and
# creates objects representing the actual location information.
loc_parser = LocationParser(location_lists)
for CU in dwarfinfo.iter_CUs():
# DWARFInfo allows to iterate over the compile units contained in
# the .debug_info section. CU is a CompileUnit object, with some
# computed attributes (such as its offset in the section) and
# a header which conforms to the DWARF standard. The access to
# header elements is, as usual, via item-lookup.
print(' Found a compile unit at offset %s, length %s' % (
CU.cu_offset, CU['unit_length']))
# A CU provides a simple API to iterate over all the DIEs in it.
for DIE in CU.iter_DIEs():
# Go over all attributes of the DIE. Each attribute is an
# AttributeValue object (from elftools.dwarf.die), which we
# can examine.
for attr in itervalues(DIE.attributes):
# Check if this attribute contains location information
if loc_parser.attribute_has_location(attr, CU['version']):
print(' DIE %s. attr %s.' % (DIE.tag, attr.name))
loc = loc_parser.parse_from_attribute(attr,
CU['version'])
# We either get a list (in case the attribute is a
# reference to the .debug_loc section) or a LocationExpr
# object (in case the attribute itself contains location
# information).
if isinstance(loc, LocationExpr):
print(' %s' % (
describe_DWARF_expr(loc.loc_expr,
dwarfinfo.structs, CU.cu_offset)))
elif isinstance(loc, list):
print(show_loclist(loc,
dwarfinfo,
' ', CU.cu_offset))
def show_loclist(loclist, dwarfinfo, indent, cu_offset):
""" Display a location list nicely, decoding the DWARF expressions
contained within.
"""
d = []
for loc_entity in loclist:
if isinstance(loc_entity, LocationEntry):
d.append('%s <<%s>>' % (
loc_entity,
describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs, cu_offset)))
else:
d.append(str(loc_entity))
return '\n'.join(indent + s for s in d)
if __name__ == '__main__':
if sys.argv[1] == '--test':
for filename in sys.argv[2:]:
process_file(filename)

View File

@@ -1,116 +0,0 @@
#-------------------------------------------------------------------------------
# elftools example: dwarf_pubnames_types.py
#
# Dump the contents of .debug_pubnames and .debug_pubtypes sections from the
# ELF file.
#
# Note: sample_exe64.elf doesn't have a .debug_pubtypes section.
#
# Vijay Ramasami (rvijayc@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from __future__ import print_function
import sys
# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
sys.path[0:0] = ['.', '..']
from elftools.elf.elffile import ELFFile
from elftools.common.py3compat import bytes2str
def process_file(filename):
print('Processing file:', filename)
with open(filename, 'rb') as f:
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
print(' file has no DWARF info')
return
# get_dwarf_info returns a DWARFInfo context object, which is the
# starting point for all DWARF-based processing in pyelftools.
dwarfinfo = elffile.get_dwarf_info()
# get .debug_pubtypes section.
pubnames = dwarfinfo.get_pubnames()
if pubnames is None:
print('ERROR: No .debug_pubnames section found in ELF.')
else:
print('%d entries found in .debug_pubnames' % len(pubnames))
print('Trying pubnames example ...')
for name, entry in pubnames.items():
print('%s: cu_ofs = %d, die_ofs = %d' %
(name, entry.cu_ofs, entry.die_ofs))
# get the actual CU/DIE that has this information.
print('Fetching the actual die for %s ...' % name)
for cu in dwarfinfo.iter_CUs():
if cu.cu_offset == entry.cu_ofs:
for die in cu.iter_DIEs():
if die.offset == entry.die_ofs:
print('Die Name: %s' %
bytes2str(die.attributes['DW_AT_name'].value))
# dump all entries in .debug_pubnames section.
print('Dumping .debug_pubnames table ...')
print('-' * 66)
print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
print('-' * 66)
for (name, entry) in pubnames.items():
print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
print('-' * 66)
# get .debug_pubtypes section.
pubtypes = dwarfinfo.get_pubtypes()
if pubtypes is None:
print('ERROR: No .debug_pubtypes section found in ELF')
else:
print('%d entries found in .debug_pubtypes' % len(pubtypes))
for name, entry in pubtypes.items():
print('%s: cu_ofs = %d, die_ofs = %d' %
(name, entry.cu_ofs, entry.die_ofs))
# get the actual CU/DIE that has this information.
print('Fetching the actual die for %s ...' % name)
for cu in dwarfinfo.iter_CUs():
if cu.cu_offset == entry.cu_ofs:
for die in cu.iter_DIEs():
if die.offset == entry.die_ofs:
print('Die Name: %s' %
bytes2str(die.attributes['DW_AT_name'].value))
die_info_rec(die)
# dump all entries in .debug_pubtypes section.
print('Dumping .debug_pubtypes table ...')
print('-' * 66)
print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
print('-' * 66)
for (name, entry) in pubtypes.items():
print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
print('-' * 66)
def die_info_rec(die, indent_level=' '):
""" A recursive function for showing information about a DIE and its
children.
"""
print(indent_level + 'DIE tag=%s, attrs=' % die.tag)
for name, val in die.attributes.items():
print(indent_level + ' %s = %s' % (name, val))
child_indent = indent_level + ' '
for child in die.iter_children():
die_info_rec(child, child_indent)
if __name__ == '__main__':
if sys.argv[1] == '--test':
process_file(sys.argv[2])
sys.exit(0)
if len(sys.argv) < 2:
print('Expected usage: {0} <executable>'.format(sys.argv[0]))
sys.exit(1)
process_file(sys.argv[1])

Some files were not shown because too many files have changed in this diff Show More