Looking for malicious samples to analyze I had a look at Pony version 1.9 which sourcecode was leaked a couple of years ago.
The malware makes use of a simple but effective obfuscation method which makes analysis with disassemblers and debuggers cumbersome.
The obfuscation method is simple:
jmp instructions are replaced with:
push address //address that will be jumped to
clc //clear carry flag
jb 0xxx //never taken because of clc instruction, but still makes disassemblers confused
retn //results in eip set to pushed value from above.
An example:
So we want a script that finds all of those cases matching the construction above and replace them each with a jmp instruction.
It was solved with the Python API for BinaryNinja:
from binaryninja import *
from collections import deque
import time
arch = Architecture['x86']
def checkIfMatchAndPatch(bv, ilS):
if len(ilS)!=5:
return False
print "Now analyzing from address %x to address %x" % (ilS[0].address,ilS[4].address)
#push immediate
if ilS[0].operation != enums.LowLevelILOperation.LLIL_PUSH or \
ilS[0].prefix_operands[1].operation != enums.LowLevelILOperation.LLIL_CONST:
return False
#clear c flag
if ilS[1].operation != enums.LowLevelILOperation.LLIL_SET_FLAG or \
ilS[1].prefix_operands[1].name != "c":
return False
#if case
if ilS[2].operation != enums.LowLevelILOperation.LLIL_IF or \
ilS[2].prefix_operands[1].operation != enums.LowLevelILOperation.LLIL_FLAG or \
ilS[2].prefix_operands[2].name != "c":
return False
#jump or can be return
if (ilS[3].operation != enums.LowLevelILOperation.LLIL_JUMP or \
ilS[3].prefix_operands[1].operation != enums.LowLevelILOperation.LLIL_CONST_PTR) and \
ilS[3].operation != enums.LowLevelILOperation.LLIL_RET:
return False
#return, jmps to pushed value, return might be in case above, then this one will be whatever
if (ilS[4].operation != enums.LowLevelILOperation.LLIL_RET or \
ilS[4].prefix_operands[1].operation != enums.LowLevelILOperation.LLIL_POP) and \
ilS[3].operation != enums.LowLevelILOperation.LLIL_RET:
return False
#Address instruction will be loaded at
instrLocation = ilS[0].address
#Address to jump to
jmpLocation = ilS[0].src.value.value
print type(jmpLocation)
code, err = arch.assemble("jmp %d" % jmpLocation, instrLocation)
if err!='':
print "Error when generating code for jmp to location %d" % jmpLocation
return False
print "Writing jmp 0x%x to address 0x%x" % (jmpLocation,instrLocation)
bv.write(instrLocation, code)
return True
def blockAdr(block):
return block[0].address
def goOn(bv,function):
funcsToTraverse = bv.functions
traversedFuncs = []
while funcsToTraverse!=[]:
func = funcsToTraverse.pop()
latestInstr = deque([])
for block in sorted(func.low_level_il, key=blockAdr):
for il in block:
if len(latestInstr)==5:
latestInstr.popleft()
latestInstr.append(il)
checkIfMatchAndPatch(bv, latestInstr)
traversedFuncs.append(func)
if funcsToTraverse == []:
bv.update_analysis()
time.sleep(1.0)
funcsToTraverse = list(set(bv.functions) - set(traversedFuncs))
def go(bv,function):
goOn(bv,function)
PluginCommand.register_for_function("Patch Pony", "Patch Pony", go)
Running it results in a normal dissasembly, for example the function in the screenshot above now look as:
And the number of identified functions has gone from 4 to 155 =)
As simple obfuscation schemes as this are probably not common these days, but nevertheless a good oppertunity to use the excellent API provided by Vector35.