diff --git a/codecut-gui/ghidra_scripts/ModNamingRun.py b/codecut-gui/ghidra_scripts/ModNamingRun.py index 1a20eb8..57e6bf7 100644 --- a/codecut-gui/ghidra_scripts/ModNamingRun.py +++ b/codecut-gui/ghidra_scripts/ModNamingRun.py @@ -1,5 +1,5 @@ # @category CodeCut -# @menupath CodeCut.DeepCut (Run) +# @menupath CodeCut.ModNaming (Run) # @toolbar codecut.png # @runtime PyGhidra diff --git a/codecut-gui/ghidra_scripts/modnaming.py b/codecut-gui/ghidra_scripts/modnaming.py index c17d736..a2bfc8a 100644 --- a/codecut-gui/ghidra_scripts/modnaming.py +++ b/codecut-gui/ghidra_scripts/modnaming.py @@ -43,33 +43,33 @@ def debug_print(x): #string_range_tokenize(t): #Take a long string and convert it into a list of tokens. If using a separator, this will appear in the token list def string_range_tokenize(t): - - #print "string_range_tokenize: raw text:" - #print t - #remove printf/sprintf format strings - #tc = re.sub("%[0-9A-Za-z]+"," ",t) - #convert dash to underscore - #tc = re.sub("-","_",tc) - #replace _ and / with space - may want to turn this off sometimes - #this will break up snake case and paths - #problem is that if you have a path that is used throughout the binary it will probably dominate results - #tc = re.sub("_"," ",tc) - #replace / and \\ with a space - #tc = re.sub("[/\\\\]"," ",tc) - #remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _ - #tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc) - - #lowercase it - and store this as the original set of tokens to work with - tokens = [tk.lower() for tk in t.split()] - - #remove English stop words - #this is the list from the MIT *bow project - eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"} - #remove "code" stop words - #e.g. common words in debugging strings - code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"} - #remove code stop words (from Joxean Koret's "IDAMagicStrings") - jk_sw = {"copyright", "char", "bool", "int", "unsigned", "long", + + #print "string_range_tokenize: raw text:" + #print t + #remove printf/sprintf format strings + #tc = re.sub("%[0-9A-Za-z]+"," ",t) + #convert dash to underscore + #tc = re.sub("-","_",tc) + #replace _ and / with space - may want to turn this off sometimes + #this will break up snake case and paths + #problem is that if you have a path that is used throughout the binary it will probably dominate results + #tc = re.sub("_"," ",tc) + #replace / and \\ with a space + #tc = re.sub("[/\\\\]"," ",tc) + #remove anything except alphanumeric, spaces, . (for .c, .cpp, etc) and _ + #tc = re.sub("[^A-Za-z0-9_\.\s]"," ",tc) + + #lowercase it - and store this as the original set of tokens to work with + tokens = [tk.lower() for tk in t.split()] + + #remove English stop words + #this is the list from the MIT *bow project + eng_stopw = {"about","all","am","an","and","are","as","at","be","been","but","by","can","cannot","did","do","does","doing","done","for","from","had","has","have","having","if","in","is","it","its","of","on","that","the","these","they","this","those","to","too","want","wants","was","what","which","will","with","would"} + #remove "code" stop words + #e.g. common words in debugging strings + code_sw = {"error","err","errlog","log","return","returned","byte","bytes","status","len","length","size","ok","0x","warning","fail","failed","failure","invalid","illegal","param","parameter","done","complete","assert","assertion","cant","didnt","class","foundation","cdecl","stdcall","thiscall"} + #remove code stop words (from Joxean Koret's "IDAMagicStrings") + jk_sw = {"copyright", "char", "bool", "int", "unsigned", "long", "double", "float", "signed", "license", "version", "cannot", "error", "invalid", "null", "warning", "general", "argument", "written", "report", "failed", "assert", "object", "integer", "unknown", "localhost", "native", @@ -85,18 +85,18 @@ def string_range_tokenize(t): "corrupted", "default", "success", "expecting", "missing", "phrase", "unrecognized", "undefined"} - stopw = eng_stopw.union(code_sw) - stopw = stopw.union(jk_sw) + stopw = eng_stopw.union(code_sw) + stopw = stopw.union(jk_sw) - c = 0 - - tokens_f = [] - - for t in tokens: - if t not in stopw: - tokens_f.append(t) - - return tokens_f + c = 0 + + tokens_f = [] + + for t in tokens: + if t not in stopw: + tokens_f.append(t) + + return tokens_f #bracket_strings(t,b_brack,e_brack): #Return the most common string in the text that begins with b_brack and ends with e_brack @@ -105,36 +105,36 @@ def string_range_tokenize(t): #This function is called by guess_module_names() - if you see this format with different brackets #you can edit that call def bracket_strings(t, b_brack,e_brack, sep): - #sep = "tzvlw" - #t = basicutils.CompileTextFromRange(start,end,sep) - tokens = [tk.lower() for tk in t.split(sep)] - #don't want to use tokenize here because it removes brackets - - b=[] - for tk in tokens: - tk = tk.strip() - - if tk.startswith(b_brack) : - b_contents = tk[1:tk.find(e_brack)] - #print("found bracket string, content: %s" % b_contents) - #Hack to get rid of [-],[+],[*] - could also try to remove non alpha - if (len(b_contents) > 3): - #Hack for debug prints that started with [0x%x] - if (b_contents != "0x%x"): - b.append(b_contents) - - debug_print("bracket_strings tokens:") - debug_print(tokens) - debug_print(b) - - u_gram="" - u_gram_score=0 - if (len(b) > 0): - f = nltk.FreqDist(b) - u_gram = f.most_common(1)[0][0] - u_gram_score = f.most_common(1)[0][1] - - return (u_gram,u_gram_score) + #sep = "tzvlw" + #t = basicutils.CompileTextFromRange(start,end,sep) + tokens = [tk.lower() for tk in t.split(sep)] + #don't want to use tokenize here because it removes brackets + + b=[] + for tk in tokens: + tk = tk.strip() + + if tk.startswith(b_brack): + b_contents = tk[1:tk.find(e_brack)] if e_brack in tk else tk[1:] + #print("found bracket string, content: %s" % b_contents) + #Hack to get rid of [-],[+],[*] - could also try to remove non alpha + if (len(b_contents) > 3): + #Hack for debug prints that started with [0x%x] + if (b_contents != "0x%x"): + b.append(b_contents) + + debug_print("bracket_strings tokens:") + debug_print(tokens) + debug_print(b) + + u_gram="" + u_gram_score=0 + if (len(b) > 0): + f = nltk.FreqDist(b) + u_gram = f.most_common(1)[0][0] + u_gram_score = f.most_common(1)[0][1] + + return (u_gram,u_gram_score) #is_source_file_str(f): #return True if the file string ends with one of the source file extensions @@ -159,53 +159,53 @@ def is_source_file_str(f): #Return the most common string that looks like a source file name in the given text string # The count of how many times this string appeared is also returned def source_file_strings(t, sep): - #sep = "tzvlw" - #t = basicutils.CompileTextFromRange(start,end,sep) - #normally would do lower here to normalize but we lose camel case that way - tokens = [tk for tk in t.split(sep)] - - #for each string, remove quotes and commas, then tokenize based on spaces to generate the final list - tokens2=[] - for tk in tokens: - tk = tk.strip() - #strip punctuation, need to leave in _ for filenames and / and \ for paths - tk = re.sub("[\"\',]"," ",tk) - for tk2 in tk.split(" "): - tokens2.append(tk2) + #sep = "tzvlw" + #t = basicutils.CompileTextFromRange(start,end,sep) + #normally would do lower here to normalize but we lose camel case that way + tokens = [tk for tk in t.split(sep)] + + #for each string, remove quotes and commas, then tokenize based on spaces to generate the final list + tokens2=[] + for tk in tokens: + tk = tk.strip() + #strip punctuation, need to leave in _ for filenames and / and \ for paths + tk = re.sub("[\"\',]"," ",tk) + for tk2 in tk.split(" "): + tokens2.append(tk2) - debug_print("source_file_strings tokens2:") - debug_print(tokens2) + debug_print("source_file_strings tokens2:") + debug_print(tokens2) - b=[] - for tk in tokens2: - tk = tk.strip() - if is_source_file_str(tk): - #If there's a dir path, only use the end filename - #This could be tweaked if the directory structure is part of the software architecture - #e.g. if there are multiple source directories with meaningful names - if tk.rfind("/") != -1: - ntk = tk[tk.rfind("/")+1:] - elif tk.rfind("\\") != -1: - ntk = tk[tk.rfind("\\")+1:] - else: - ntk = tk - b.append(ntk) - - debug_print("source_file_strings tokens:") - debug_print(tokens) - debug_print(b) - - #a better way to do this (if there are multiple) - #would be to sort, uniquify, and then make the name foo.c_and_bar.c - u_gram="" - u_gram_score=0 - if (len(b) > 0): - f = nltk.FreqDist(b) - u_gram = f.most_common(1)[0][0] - u_gram_score = f.most_common(1)[0][1] - - return (u_gram,u_gram_score) - + b=[] + for tk in tokens2: + tk = tk.strip() + if is_source_file_str(tk): + #If there's a dir path, only use the end filename + #This could be tweaked if the directory structure is part of the software architecture + #e.g. if there are multiple source directories with meaningful names + if tk.rfind("/") != -1: + ntk = tk[tk.rfind("/")+1:] + elif tk.rfind("\\") != -1: + ntk = tk[tk.rfind("\\")+1:] + else: + ntk = tk + b.append(ntk) + + debug_print("source_file_strings tokens:") + debug_print(tokens) + debug_print(b) + + #a better way to do this (if there are multiple) + #would be to sort, uniquify, and then make the name foo.c_and_bar.c + u_gram="" + u_gram_score=0 + if (len(b) > 0): + f = nltk.FreqDist(b) + u_gram = f.most_common(1)[0][0] + u_gram_score = f.most_common(1)[0][1] + + return (u_gram,u_gram_score) + #common_strings(t, sep): #Return a list of the common strings in the string "t" - lines separated by "sep" #Uses NLTK to generate a list of unigrams, bigrams, and trigrams (1 word, 2 word phrase, 3 word phrase) @@ -213,90 +213,90 @@ def source_file_strings(t, sep): #If the bigram score > 1/2 * unigram score, the most common bigram is used #Otherwise the most common unigram (single word is used) def common_strings(t,sep): - CS_THRESHOLD = 6 - - tokens = string_range_tokenize(t) - - #make a copy since we're going to edit it - u_tokens = tokens - c=0 - while (c 1) and (b_gram_score * 2 >= u_gram_score): - if (t_gram_score > 1) and (t_gram_score * 2 >= b_gram_score): - ret = t_str - ret_s = t_gram_score - else: - ret = b_str - ret_s = b_gram_score - else: - ret = u_gram - ret_s = u_gram_score - - return (ret,ret_s) + CS_THRESHOLD = 6 + + tokens = string_range_tokenize(t) + + #make a copy since we're going to edit it + u_tokens = tokens + c=0 + while (c 1) and (b_gram_score * 2 >= u_gram_score): + if (t_gram_score > 1) and (t_gram_score * 2 >= b_gram_score): + ret = t_str + ret_s = t_gram_score + else: + ret = b_str + ret_s = b_gram_score + else: + ret = u_gram + ret_s = u_gram_score + + return (ret,ret_s) -### End of NLP Section ### +### End of NLP Section ### @@ -308,44 +308,44 @@ def common_strings(t,sep): #You can tweak the switchover thresholds below. def guess_module_names(t,sep): - #idea - make score threshold based on the size of the module - # (e.g. smaller modules should have a smaller threshold - C_SCORE_THRESHOLD = 4 #we need to see at least occurrences of a string set in order to pick that name - S_SCORE_THRESHOLD = 2 #if we see occurrences of foo.c we'll pick "foo.c" - B_SCORE_THRESHOLD = 2 #if we see occurrences of [foo] we'll pick "foo" + #idea - make score threshold based on the size of the module + # (e.g. smaller modules should have a smaller threshold + C_SCORE_THRESHOLD = 4 #we need to see at least occurrences of a string set in order to pick that name + S_SCORE_THRESHOLD = 2 #if we see occurrences of foo.c we'll pick "foo.c" + B_SCORE_THRESHOLD = 2 #if we see occurrences of [foo] we'll pick "foo" - # first look for strings that start with [FOO], (bracket strings) - # then look for strings that contain source files (.c,.cpp,etc.) - # then try common strings - # above thresholds can be tweaked - they represent the number of strings that have to be repeated - # in order to use that string as the module name - (name,scr) = bracket_strings(t,"[","]",sep) - debug_print("bracket name: %s score: %d" %(name, scr)) - #if (True): - if (scr < B_SCORE_THRESHOLD): - (name,scr) = source_file_strings(t,sep) - debug_print("source name: %s score: %d" % (name, scr)) - #if (True):e - if (scr < S_SCORE_THRESHOLD): - (name,scr) = common_strings(t,sep) - debug_print("common name: %s score: %d" % (name, scr)) - if (scr < C_SCORE_THRESHOLD): - #Couldn't come up with a name - name = "unknown" + # first look for strings that start with [FOO], (bracket strings) + # then look for strings that contain source files (.c,.cpp,etc.) + # then try common strings + # above thresholds can be tweaked - they represent the number of strings that have to be repeated + # in order to use that string as the module name + (name,scr) = bracket_strings(t,"[","]",sep) + debug_print("bracket name: %s score: %d" %(name, scr)) + #if (True): + if (scr < B_SCORE_THRESHOLD): + (name,scr) = source_file_strings(t,sep) + debug_print("source name: %s score: %d" % (name, scr)) + #if (True):e + if (scr < S_SCORE_THRESHOLD): + (name,scr) = common_strings(t,sep) + debug_print("common name: %s score: %d" % (name, scr)) + if (scr < C_SCORE_THRESHOLD): + #Couldn't come up with a name + name = "unknown" - return name + return name def main(): - #t="" - sep = "tzvlw" - # java side handles adding sep between strings, - # read all in at once (no newlines between strings) - #t = sys.stdin.readline() - t = input() - #print ("text in: %s" % t) - name = guess_module_names(t,sep) - print(name) + #t="" + sep = "tzvlw" + # java side handles adding sep between strings, + # read all in at once (no newlines between strings) + #t = sys.stdin.readline() + t = input() + #print ("text in: %s" % t) + name = guess_module_names(t,sep) + print(name) if __name__ == "__main__": - main() + main() diff --git a/codecut-gui/src/main/java/codecutguiv2/CodeCutGUIPlugin.java b/codecut-gui/src/main/java/codecutguiv2/CodeCutGUIPlugin.java index 7d6fc01..0c4666c 100644 --- a/codecut-gui/src/main/java/codecutguiv2/CodeCutGUIPlugin.java +++ b/codecut-gui/src/main/java/codecutguiv2/CodeCutGUIPlugin.java @@ -791,6 +791,31 @@ public class CodeCutGUIPlugin extends ProgramPlugin implements DomainObjectListe } + private class ModuleNamerV2 extends GhidraScript{ + Program program = GhidraProgramUtilities.getCurrentProgram(tool); + GhidraState state = new GhidraState(tool, tool.getProject(), program, null, null, null); + String start_addr; + String end_addr; + String path; + + public ModuleNamerV2(String start, String end, File file) { + this.start_addr = start; + this.end_addr = end; + } + @Override + public void run() { + String[] args = {start_addr, end_addr}; + try { + runScript("range.py", args); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + } + + private void createExportActions() { //Need Decompiler extensions /* @@ -1255,6 +1280,7 @@ public class CodeCutGUIPlugin extends ProgramPlugin implements DomainObjectListe } } + private class OFileExporter extends GhidraScript{ GhidraState state; diff --git a/codecut-gui/src/main/java/codecutguiv2/ModNamingAnalyzer.java b/codecut-gui/src/main/java/codecutguiv2/ModNamingAnalyzer.java index f31037c..2bec1be 100644 --- a/codecut-gui/src/main/java/codecutguiv2/ModNamingAnalyzer.java +++ b/codecut-gui/src/main/java/codecutguiv2/ModNamingAnalyzer.java @@ -146,9 +146,6 @@ public class ModNamingAnalyzer { return allStrings; } - private String guessSingleModule(List strList) { - return ""; - } public void guessModuleNames() { Task guessNamesTask = new Task("Guess Module Names", true, true, true) { @@ -186,7 +183,7 @@ public class ModNamingAnalyzer { //if name is "unknown" (e.g. modnaming found no repeated strings) don't bother renaming if (suggestedName.equals("unknown")) { Msg.info(this, "No name guess found for module " + ns.getName() + ", leaving unchanged"); - break; + continue; } suggestedModuleNames.put(ns, suggestedName); @@ -204,16 +201,18 @@ public class ModNamingAnalyzer { num++; } Namespace newNs = null; - int transactionId = currentProgram.startTransaction("ns"); + + int transactionId = currentProgram.startTransaction("CreateNamespace"); + boolean success = false; try { - newNs = currentProgram.getSymbolTable().createNameSpace(ns.getParentNamespace(), newName, SourceType.USER_DEFINED); - Msg.info(this, "Created NS with new name " + newName + " for module " + ns.getName()); + newNs = currentProgram.getSymbolTable() + .createNameSpace(ns.getParentNamespace(), newName, SourceType.USER_DEFINED); + success = true; + } catch (DuplicateNameException ex) { + Msg.error(this, "Failed to create namespace for suggested name " + suggestedName, ex); + } finally { + currentProgram.endTransaction(transactionId, success); } - catch (DuplicateNameException ex) { - Msg.error(this, "Failed when trying to find and set name for suggested name " + suggestedName); - currentProgram.endTransaction(transactionId, false); - } - currentProgram.endTransaction(transactionId, true); try { CodecutUtils.renameNamespace(currentProgram, ns, newNs); diff --git a/deepcut-ghidra/ghidra_scripts/deepcut.py b/deepcut-ghidra/ghidra_scripts/deepcut.py index fb7f4ce..d4a5840 100644 --- a/deepcut-ghidra/ghidra_scripts/deepcut.py +++ b/deepcut-ghidra/ghidra_scripts/deepcut.py @@ -36,6 +36,7 @@ import torch from math import log2, copysign from networkx import DiGraph from scipy.linalg import toeplitz +from scipy.sparse import coo_matrix, diags, csr_matrix import GNN_Net @@ -106,38 +107,44 @@ class Deepcut: def _adjacency_matrix(self): num_funcs = len(self.graph.nodes) - A = np.zeros((num_funcs, num_funcs)) - for e, v in zip(self.graph_connectivity, self.predicted_labels): + # Build sparse adjacency from predicted edge scores + rows = [] + cols = [] + vals = [] + for (e, v) in zip(self.graph_connectivity, self.predicted_labels.flatten()): e0, e1 = e - A[e0, e1] = v + rows.append(e0) + cols.append(e1) + vals.append(float(v)) - A += A.T - A *= 0.5 + A = coo_matrix((vals, (rows, cols)), shape=(num_funcs, num_funcs)) - """ - add a small connection between adjacent nodes, - essentially to break ties in favor of merging communities - """ - x = np.zeros(num_funcs) - x[1] = 0.05 - A += toeplitz(x) + # Symmetrize and average: (A + A^T)/2 + A = (A + A.T).multiply(0.5).tocsr() - return A + # Add small off-diagonal connection (equivalent to toeplitz([0, 0.05, 0, ...])) + off = diags([0.05 * np.ones(num_funcs - 1), 0.05 * np.ones(num_funcs - 1)], + offsets=[-1, 1], shape=(num_funcs, num_funcs), format='csr') + A = (A + off).tocsr() + + return A # CSR sparse matrix def _modularity(self): - adj_matrix = self._adjacency_matrix() - # node degrees - k = np.sum(adj_matrix, axis=0) - - k2 = np.array([k]) - B = k2.T @ k2 - B /= 2 * np.sum(k2) - - Q = adj_matrix - B + A = self._adjacency_matrix() # sparse CSR + # node degrees (as dense 1D array for lightweight vector ops) + k = np.array(A.sum(axis=0)).ravel() + two_m = 2.0 * k.sum() # denominator used in modularity B term def compute_partial_modularity(start, stop): - return np.sum(Q[start:stop, start:stop]) + # Sum of A over the block [start:stop, start:stop] + A_block_sum = A[start:stop, start:stop].sum() + # Sum of degrees in the block + k_block_sum = k[start:stop].sum() + # Sum of B over the block: (sum_k_block)^2 / (2m) + B_block_sum = (k_block_sum * k_block_sum) / two_m if two_m > 0 else 0.0 + # Return sum(Q_block) = sum(A_block) - sum(B_block) + return float(A_block_sum) - float(B_block_sum) scores = [0.0] scores = np.array(scores) @@ -148,15 +155,15 @@ class Deepcut: for index in range(1, len(self.graph.nodes)): update = [compute_partial_modularity(i, index) for i in - range(max(0, index-max_cluster_size), index)] + range(max(0, index - max_cluster_size), index)] if index > max_cluster_size: - update = [0]*(index-max_cluster_size) + update + update = [0] * (index - max_cluster_size) + update updated_scores = scores + update i = np.argmax(updated_scores) if index > max_cluster_size: - i = np.argmax(updated_scores[index-max_cluster_size:])+ (index - max_cluster_size) + i = np.argmax(updated_scores[index - max_cluster_size:]) + (index - max_cluster_size) s = updated_scores[i] c = cuts[i] + [index] diff --git a/deepcut-ghidra/gradle/wrapper/gradle-wrapper.properties b/deepcut-ghidra/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 12d38de..0000000 --- a/deepcut-ghidra/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,5 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-bin.zip -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/deepcut-ghidra/gradlew b/deepcut-ghidra/gradlew deleted file mode 100755 index 4f906e0..0000000 --- a/deepcut-ghidra/gradlew +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env sh - -# -# Copyright 2015 the original author or authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -############################################################################## -## -## Gradle start up script for UN*X -## -############################################################################## - -# Attempt to set APP_HOME -# Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi -done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null - -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" - -warn () { - echo "$*" -} - -die () { - echo - echo "$*" - echo - exit 1 -} - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" - else - JAVACMD="$JAVA_HOME/bin/java" - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD="java" - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." -fi - -# Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi -fi - -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi - -# For Cygwin or MSYS, switch paths to Windows format before running java -if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi - # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" - fi - i=`expr $i + 1` - done - case $i in - 0) set -- ;; - 1) set -- "$args0" ;; - 2) set -- "$args0" "$args1" ;; - 3) set -- "$args0" "$args1" "$args2" ;; - 4) set -- "$args0" "$args1" "$args2" "$args3" ;; - 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac -fi - -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=`save "$@"` - -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" - -exec "$JAVACMD" "$@" diff --git a/deepcut-ghidra/gradlew.bat b/deepcut-ghidra/gradlew.bat deleted file mode 100644 index ac1b06f..0000000 --- a/deepcut-ghidra/gradlew.bat +++ /dev/null @@ -1,89 +0,0 @@ -@rem -@rem Copyright 2015 the original author or authors. -@rem -@rem Licensed under the Apache License, Version 2.0 (the "License"); -@rem you may not use this file except in compliance with the License. -@rem You may obtain a copy of the License at -@rem -@rem https://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, software -@rem distributed under the License is distributed on an "AS IS" BASIS, -@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@rem See the License for the specific language governing permissions and -@rem limitations under the License. -@rem - -@if "%DEBUG%" == "" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Resolve any "." and ".." in APP_HOME to make it shorter. -for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto execute - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto execute - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* - -:end -@rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/deepcut-ghidra/src/main/help/help/TOC_Source.xml b/deepcut-ghidra/src/main/help/help/TOC_Source.xml deleted file mode 100644 index a34f62e..0000000 --- a/deepcut-ghidra/src/main/help/help/TOC_Source.xml +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - diff --git a/deepcut-ghidra/src/main/help/help/shared/Frontpage.css b/deepcut-ghidra/src/main/help/help/shared/Frontpage.css deleted file mode 100644 index b847166..0000000 --- a/deepcut-ghidra/src/main/help/help/shared/Frontpage.css +++ /dev/null @@ -1,64 +0,0 @@ -/* ### - * IP: GHIDRA - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - WARNING! - This file is copied to all help directories. If you change this file, you must copy it - to each src/main/help/help/shared directory. - - - Java Help Note: JavaHelp does not accept sizes (like in 'margin-top') in anything but - px (pixel) or with no type marking. - -*/ - -body { margin-bottom: 50px; margin-left: 10px; margin-right: 10px; margin-top: 10px; } /* some padding to improve readability */ -li { font-family:times new roman; font-size:14pt; } -h1 { color:#000080; font-family:times new roman; font-size:36pt; font-style:italic; font-weight:bold; text-align:center; } -h2 { margin: 10px; margin-top: 20px; color:#984c4c; font-family:times new roman; font-size:18pt; font-weight:bold; } -h3 { margin-left: 10px; margin-top: 20px; color:#0000ff; font-family:times new roman; `font-size:14pt; font-weight:bold; } -h4 { margin-left: 10px; margin-top: 20px; font-family:times new roman; font-size:14pt; font-style:italic; } - -/* - P tag code. Most of the help files nest P tags inside of blockquote tags (the was the - way it had been done in the beginning). The net effect is that the text is indented. In - modern HTML we would use CSS to do this. We need to support the Ghidra P tags, nested in - blockquote tags, as well as naked P tags. The following two lines accomplish this. Note - that the 'blockquote p' definition will inherit from the first 'p' definition. -*/ -p { margin-left: 40px; font-family:times new roman; font-size:14pt; } -blockquote p { margin-left: 10px; } - -p.providedbyplugin { color:#7f7f7f; margin-left: 10px; font-size:14pt; margin-top:100px } -p.ProvidedByPlugin { color:#7f7f7f; margin-left: 10px; font-size:14pt; margin-top:100px } -p.relatedtopic { color:#800080; margin-left: 10px; font-size:14pt; } -p.RelatedTopic { color:#800080; margin-left: 10px; font-size:14pt; } - -/* - We wish for a tables to have space between it and the preceding element, so that text - is not too close to the top of the table. Also, nest the table a bit so that it is clear - the table relates to the preceding text. -*/ -table { margin-left: 20px; margin-top: 10px; width: 80%;} -td { font-family:times new roman; font-size:14pt; vertical-align: top; } -th { font-family:times new roman; font-size:14pt; font-weight:bold; background-color: #EDF3FE; } - -/* - Code-like formatting for things such as file system paths and proper names of classes, - methods, etc. To apply this to a file path, use this syntax: - ... -*/ -code { color: black; font-weight: bold; font-family: courier new, monospace; font-size: 14pt; white-space: nowrap; } -code.path { color: #4682B4; font-weight: bold; font-family: courier new, monospace; font-size: 14pt; white-space: nowrap; } diff --git a/deepcut-ghidra/src/main/java/deepcut/DeepCutAnalyzer.java b/deepcut-ghidra/src/main/java/deepcut/DeepCutAnalyzer.java index 35f3519..78e2bc5 100644 --- a/deepcut-ghidra/src/main/java/deepcut/DeepCutAnalyzer.java +++ b/deepcut-ghidra/src/main/java/deepcut/DeepCutAnalyzer.java @@ -30,13 +30,11 @@ import java.io.FileNotFoundException; import com.google.gson.Gson; import com.google.gson.GsonBuilder; -import generic.jar.ResourceFile; import ghidra.app.script.GhidraScriptLoadException; import ghidra.app.services.AbstractAnalyzer; import ghidra.app.services.AnalysisPriority; import ghidra.app.services.AnalyzerType; import ghidra.app.util.importer.MessageLog; -import ghidra.framework.Application; import ghidra.framework.options.Options; import ghidra.program.model.address.Address; import ghidra.program.model.address.AddressFactory; @@ -153,6 +151,7 @@ public class DeepCutAnalyzer extends AbstractAnalyzer { private void addNamespace(Program program, String name, Function function) throws DuplicateNameException, InvalidInputException, CircularDependencyException { + SymbolTable symbolTable = program.getSymbolTable(); Namespace ns = symbolTable.getNamespace(name, null); if (ns == null) {