diff --git a/README.md b/README.md index 494758f..af47b5d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Bytecode Simplifier +*(Modified by @abel1502 for EXTENDED_ARG opcode support and minor fixes)* + Bytecode simplifier is a tool to deobfuscate PjOrion protected python scripts. This is a complete rewrite of my older tool [PjOrion Deobfuscator](https://github.com/extremecoders-re/PjOrion-Deobfuscator) diff --git a/assembler.py b/assembler.py index 5e7125f..389a786 100644 --- a/assembler.py +++ b/assembler.py @@ -66,6 +66,7 @@ def assemble(self): # Modify relative jump to absolute jump if ins.mnemonic == 'JUMP_FORWARD': + ins.mnemonic = 'JUMP_ABSOLUTE' ins.opcode = dis.opmap['JUMP_ABSOLUTE'] # If instruction is a relative control transfer instruction @@ -118,7 +119,7 @@ def dfs(self, bb): bb.b_seen = True # Recursively dfs on all out going explicit edges - for o_edge in self.bb_graph.out_edges_iter(bb, data=True): + for o_edge in self.bb_graph.out_edges(bb, data=True): # o_edge is a tuple (edge src, edge dest, edge attrib dict) if o_edge[2]['edge_type'] == 'explicit': self.dfs(o_edge[1]) @@ -130,7 +131,7 @@ def dfs(self, bb): self.dfs(ins.argval) # Recursively dfs on all out going implicit edges - for o_edge in self.bb_graph.out_edges_iter(bb, data=True): + for o_edge in self.bb_graph.out_edges(bb, data=True): # o_edge is a tuple (edge src, edge dest, edge attrib dict) if o_edge[2]['edge_type'] == 'implicit': self.dfs(o_edge[1]) @@ -163,15 +164,12 @@ def calculate_ins_operands(self): if ins.opcode in dis.hasjabs: # ins.argval is a BasicBlock ins.arg = ins.argval.address - # TODO - # We do not generate EXTENDED_ARG opcode at the moment, - # hence size of opcode argument can only be 2 bytes - assert ins.arg <= 0xFFFF + assert ins.arg <= 0xFFFFFFFF elif ins.opcode in dis.hasjrel: ins.arg = ins.argval.address - addr # relative jump can USUALLY go forward assert ins.arg >= 0 - assert ins.arg <= 0xFFFF + assert ins.arg <= 0xFFFFFFFF def emit(self): logger.debug('Generating code...') diff --git a/decoder.py b/decoder.py index 5a55fe0..3a8597b 100644 --- a/decoder.py +++ b/decoder.py @@ -16,8 +16,7 @@ def decode_at(self, offset): opcode = self.insBytes[offset] - if opcode == dis.opmap['EXTENDED_ARG']: - raise Exception('EXTENDED_ARG not yet implemented') + # Invalid instruction if opcode not in dis.opmap.values(): @@ -25,7 +24,12 @@ def decode_at(self, offset): if opcode < dis.HAVE_ARGUMENT: return Instruction(opcode, None, 1) - - if opcode >= dis.HAVE_ARGUMENT: + elif opcode == dis.opmap['EXTENDED_ARG']: + arg = (self.insBytes[offset + 2] << 8) | self.insBytes[offset + 1] + arg = (arg << 16) + ((self.insBytes[offset + 5] << 8) | self.insBytes[offset + 4]) + nextop = self.insBytes[offset + 3] + assert nextop >= dis.HAVE_ARGUMENT + return Instruction(nextop, arg, 6) + else: arg = (self.insBytes[offset + 2] << 8) | self.insBytes[offset + 1] return Instruction(opcode, arg, 3) diff --git a/deobfuscator.py b/deobfuscator.py index 68c142f..aece9d9 100644 --- a/deobfuscator.py +++ b/deobfuscator.py @@ -33,7 +33,7 @@ def find_oep(insBytes): # Second instruction is intentionally invalid, on execution # control transfers to exception handler - assert dec.decode_at(3).is_opcode_valid() == False + assert dec.decode_at(ins.size).is_opcode_valid() == False assert dec.decode_at(exc_handler).mnemonic == 'POP_TOP' assert dec.decode_at(exc_handler + 1).mnemonic == 'POP_TOP' diff --git a/disassembler.py b/disassembler.py index ec8a8a0..2b76231 100644 --- a/disassembler.py +++ b/disassembler.py @@ -241,7 +241,7 @@ def construct_basic_blocks(self): logger.debug('{} basic blocks created'.format(self.bb_graph.number_of_nodes())) def find_bb_by_address(self, address): - for bb in self.bb_graph.nodes_iter(): + for bb in self.bb_graph.nodes.iterkeys(): if bb.address == address: return bb @@ -252,7 +252,7 @@ def build_bb_edges(self): """ logger.debug('Constructing edges between basic blocks...') - for bb in self.bb_graph.nodes_iter(): + for bb in self.bb_graph.nodes.iterkeys(): offset = 0 for idx in xrange(len(bb.instructions)): @@ -313,7 +313,7 @@ def build_bb_edges(self): # RETURN_VALUE elif ins.is_ret(): - nx.set_node_attributes(self.bb_graph, 'isTerminal', {bb: True}) + nx.set_node_attributes(self.bb_graph, {bb: True}, 'isTerminal') # Does not have any sucessors assert len(nextInsAddr) == 0 diff --git a/instruction.py b/instruction.py index 7592210..728c2ea 100644 --- a/instruction.py +++ b/instruction.py @@ -12,7 +12,9 @@ def __init__(self, opcode, arg, size): # Numeric argument to operation(if any), otherwise None self.arg = arg - + + if size == 3 and arg >= 65536: + size = 6 # The size of the instruction including the arguement self.size = size @@ -74,8 +76,10 @@ def has_xref(self): def assemble(self): if self.size == 1: return chr(self.opcode) - else: + elif self.size == 3 and self.arg < 65536: return chr(self.opcode) + chr(self.arg & 0xFF) + chr((self.arg >> 8) & 0xFF) + else: + return chr(dis.opmap["EXTENDED_ARG"]) + chr((self.arg >> 16) & 0xFF) + chr((self.arg >> 24) & 0xFF) + chr(self.opcode) + chr(self.arg & 0xFF) + chr((self.arg >> 8) & 0xFF) def __str__(self): return '{} {} {}'.format(self.opcode, self.mnemonic, self.arg) diff --git a/simplifier.py b/simplifier.py index d02eeb9..24eb734 100644 --- a/simplifier.py +++ b/simplifier.py @@ -27,7 +27,7 @@ def eliminate_forwarders(self): # Loop until no basic block can be eliminated any more while bb_eliminated: bb_eliminated = False - for bb in self.bb_graph.nodes_iter(): + for bb in self.bb_graph.nodes.iterkeys(): # Must have a single instruction if len(bb.instructions) == 1: ins = bb.instructions[0] @@ -36,7 +36,7 @@ def eliminate_forwarders(self): assert self.bb_graph.out_degree(bb) == 1 forwarderBB = bb - forwardedBB = self.bb_graph.successors(bb)[0] + forwardedBB = next(self.bb_graph.successors(bb)) # Check if forwardedBB has atleast one implicit in edge forwardedBB_in_edge_exists = len(filter(lambda edge: edge[2]['edge_type'] == 'implicit', @@ -54,7 +54,7 @@ def eliminate_forwarders(self): self.bb_graph.remove_edge(forwarderBB, forwardedBB) # Iterate over the predecessors of the forwarder - for predecessorBB in self.bb_graph.predecessors(forwarderBB): + for predecessorBB in list(self.bb_graph.predecessors(forwarderBB)): # Get existing edge type e_type = self.bb_graph.get_edge_data(predecessorBB, forwarderBB)['edge_type'] @@ -97,7 +97,7 @@ def eliminate_forwarders(self): def merge_basic_blocks(self): """ - Merges a basic block into its predecessor iff the basic block has exactly one predecessor + Merges a basic block into its predecessor if the basic block has exactly one predecessor and the predecessor has this basic block as its lone successor :param bb_graph: A graph of basic blocks @@ -113,14 +113,13 @@ def merge_basic_blocks(self): # Loop until no basic block can be eliminated any more while bb_merged: bb_merged = False - for bb in self.bb_graph.nodes_iter(): + for bb in self.bb_graph.nodes.iterkeys(): # The basic block should not have any xrefs and must have exactly one predecessor if not bb.has_xrefs_to and self.bb_graph.in_degree(bb) == 1: - predecessorBB = self.bb_graph.predecessors(bb)[0] + predecessorBB = self.bb_graph.predecessors(bb).next() # Predecessor basic block must have exactly one successor - if self.bb_graph.out_degree(predecessorBB) == 1 and self.bb_graph.successors(predecessorBB)[ - 0] == bb: + if self.bb_graph.out_degree(predecessorBB) == 1 and self.bb_graph.successors(predecessorBB).next() == bb: # The predecessor block will be the merged block mergedBB = predecessorBB @@ -138,12 +137,12 @@ def merge_basic_blocks(self): # If bb is a terminal node, mark the mergedBB as terminal too if bb in nx.get_node_attributes(self.bb_graph, 'isTerminal').keys(): - nx.set_node_attributes(self.bb_graph, 'isTerminal', {mergedBB: True}) + nx.set_node_attributes(self.bb_graph, {mergedBB: True}, 'isTerminal') # Remove the edge self.bb_graph.remove_edge(mergedBB, bb) - for successorBB in self.bb_graph.successors(bb): + for successorBB in list(self.bb_graph.successors(bb)): # Get existing type e_type = self.bb_graph.get_edge_data(bb, successorBB)['edge_type'] diff --git a/utils/rendergraph.py b/utils/rendergraph.py index 58a6360..a980061 100644 --- a/utils/rendergraph.py +++ b/utils/rendergraph.py @@ -36,7 +36,7 @@ def render_graph(bb_graph, filename): nodedict = {} - for bb in bb_graph.nodes_iter(): + for bb in bb_graph.nodes.iterkeys(): node = render_bb(bb, bb == entryblock, bb in returnblocks) if bb == entryblock: sub = pydotplus.Subgraph('sub', rank='source') @@ -46,7 +46,8 @@ def render_graph(bb_graph, filename): graph.add_node(node) nodedict[bb] = node - for edge in bb_graph.edges_iter(data=True): + for edge in bb_graph.edges.iteritems(): + edge = (edge[0][0], edge[0][1], edge[1]) src = nodedict[edge[0]] dest = nodedict[edge[1]] e_style = 'dashed' if edge[2]['edge_type'] == 'implicit' else 'solid' diff --git a/verifier.py b/verifier.py index 1913181..530b70a 100644 --- a/verifier.py +++ b/verifier.py @@ -25,7 +25,7 @@ def verify_graph(bb_graph): logger.error('The entry point basic block has an in degree of {}'.format(i_degree_entry)) raise Exception - for bb in bb_graph.nodes_iter(): + for bb in bb_graph.nodes.iterkeys(): o_degree = bb_graph.out_degree(bb) # A basic block can have 0,1 or 2 successors if o_degree > 2: @@ -41,11 +41,14 @@ def verify_graph(bb_graph): # A basic block having out degree of 2, cannot have both out edge as of explicit type or implicit type if o_degree == 2: - o_edges = bb_graph.out_edges(bb, data=True) - if o_edges[0][2]['edge_type'] == 'explicit' and o_edges[1][2]['edge_type'] == 'explicit': + o_edges = bb_graph.out_edges(bb, data=True).__iter__() + o_edges_zero = o_edges.next() + o_edges_one = o_edges.next() + print o_edges + if o_edges_zero[2]['edge_type'] == 'explicit' and o_edges_one[2]['edge_type'] == 'explicit': logger.error('Basic block {} has both out edges of explicit type'.format(hex(id(bb)))) raise Exception - if o_edges[0][2]['edge_type'] == 'implicit' and o_edges[1][2]['edge_type'] == 'implicit': + if o_edges_zero[2]['edge_type'] == 'implicit' and o_edges_one[2]['edge_type'] == 'implicit': logger.error('Basic block {} has both out edges of implicit type'.format(hex(id(bb)))) raise Exception @@ -54,7 +57,7 @@ def verify_graph(bb_graph): # If in degree is greater than zero if i_degree > 0: numImplicitEdges = 0 - for edge in bb_graph.in_edges_iter(bb, data=True): + for edge in bb_graph.in_edges(bb, data=True): if edge[2]['edge_type'] == 'implicit': numImplicitEdges += 1 @@ -64,7 +67,6 @@ def verify_graph(bb_graph): if i_degree == o_degree == 0: logger.error('Orphaned block {} has no edges'.format(hex(id(bb)))) - except Exception as ex: print ex return False