Part 2 - Control Flow Flattening

Flattening Control Flow

In part 1 we successfully revealed all of the strings in the script. In this part, we are going to tackle the control flow obfuscation. Control flow obfuscation is a technique to jumble up the order of code in order to make it harder to follow. It jumps around and is usually found in a switch block. I’ve included a visual representation of what that might look like below.

Virus Bulletin on Twitter: "Sophos' @hackingump1 writes an introduction to control  flow flattening in Emotet. Control flow flattening is an obfuscation  technique that hides program flow by putting all function blocks next

We can see examples of this in the script we’ve partially deobfuscated so far:

    var _0x3100a2 = "4|2|3|1|6|5|7|0|9|8"["split"]("|"),
      _0x6753ae = 0x0;
    while (!![]) {
      switch (_0x3100a2[_0x6753ae++]) {
        case "0":
          _0x4e96ba["Pgl"](_0x14d4e9, _0x4e96ba["uLq"](_0x2bcd36, _0x4bc649));
          continue;
        case "1":
          _0x4bc649["push"](["'v8b33affa616d7e2343cc7cd58fb6cd20c99ad6b16413b7c5af014cfde3a957ad'.toString()", "value"]);
          continue;
        case "2":
          if (!_0x530384["btoa"]) _0x530384["btoa"] = _0x48c673;
          continue;
        case "3":
          _0x4e96ba["gGv"](_0x3abf6a);
          continue;
        case "4":
          if (_0x1e92fd) {
            try {
              _0x2e9a92["log"] = _0x4e96ba["uLq"](_0x19195e, _0x1e92fd);
            } catch (_0x3fc75d) {}
          }
          continue;
        case "5":
          var _0x290d1c = _0x20a23a["substr"](0x0, 0x2);
          continue;
        case "6":
          var _0x20a23a = "bO+/vQxkbu+/vXQgXu+/ve+/vUHvv73vv71d77+9";
          continue;
        case "7":
          var _0x5a434c = _0x20a23a["substr"](0x2);
          continue;
        case "8":
          _0x4fd281["createElement"]("img")["src"] = _0x4e96ba["LZO"]("/_Incapsula_Resource?SWKMTFSR=1&e=", _0x530384["Math"]["random"]());
          continue;
        case "9":
          if (_0x163515) {
            _0x4bc649["push"]([_0x163515, "value"]);
            _0x4e96ba["uLq"](_0x14d4e9, _0x4e96ba["FRS"](_0x2bcd36, _0x4bc649));
          }
          continue;
      }
      break;
    }

We can see that _0x3100a2 is a variable which contains string literals, that is split by |. The swtich statement then loops through each element and runs the code under each switch statement. The end goal here is to simplify the code to something like this:

//Code from switch "4"
if (_0x1e92fd) {
try {
  _0x2e9a92["log"] = _0x4e96ba["uLq"](_0x19195e, _0x1e92fd);
} catch (_0x3fc75d) {}
}

//Code from switch "2"
if (!_0x530384["btoa"]) _0x530384["btoa"] = _0x48c673;

//Code from switch "3"
_0x4e96ba["gGv"](_0x3abf6a);

//Code from switch "1"
_0x4bc649["push"](["'v8b33affa616d7e2343cc7cd58fb6cd20c99ad6b16413b7c5af014cfde3a957ad'.toString()", "value"]);
... etc

And here is our resulting Babel plugin:

const t = require("@babel/types");
const parser = require("@babel/parser");
const traverse = require("@babel/traverse").default;

const flattenControlFlowVisitor = {
    SwitchStatement(path){
        const { node } = path;
        if(t.isMemberExpression(node.discriminant) &&
            t.isIdentifier(node.discriminant.object) &&
            t.isUpdateExpression(node.discriminant.property) &&
            node.discriminant.property.operator === "++" &&
            node.discriminant.property.prefix === false)
        {
            //We're in the right switch statement

            //Get the switch order variable name
            //e.g.     var _0x48d663 = "3|6|2|0|5|1|4"["split"]("|"), ---> _0x48d663
            const switchOrderVar = node.discriminant.object.name;

            //Get the bindings of the variable and get the switch order into an array
            const switchOrder = path.scope.getBinding(switchOrderVar).path.node.init.callee.object.value.split("|")

            let orderedNodes = []

            //Loop through the switch order
            for(const sw of switchOrder){
                //Get the switch cases that belong to the switch
                const switchCase = path.node.cases.find(c => c.test.value === sw);
                
                //Get the nodes under the switch excluding the continue statement
                const nodesInSwitchCase = switchCase.consequent.filter(c => !t.isContinueStatement(c))
                
                //Drop them into an array
                //cloneDeepWithoutLoc to avoid issues!
                orderedNodes.push(...nodesInSwitchCase.map(n => t.cloneDeepWithoutLoc(n)))
            }
            
            //Replace the parent while statement
            const whileStatement = path.parentPath.parentPath;
            whileStatement.replaceWithMultiple(orderedNodes);
        }
    }
}

traverse(ast, flattenControlFlowVisitor);
traverse(ast, flattenControlFlowVisitor); //Run it twice!

You’ll notice that we need to run this visitor twice, this is because there are nested switch statements. One improvement that could be made to this visitor is to make it recursive. This would make it more efficient and mean that we don’t have to traverse the AST twice. We’ve now simplified the ugly control flow code into this:

function _0x4d748b(_0x1b013d) {
var _0x3a900f = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var _0x2026be, _0x30a9be, _0x105a8e;
var _0x337f59, _0x32ce20, _0x52c47a;
_0x105a8e = _0x1b013d["length"];
_0x30a9be = 0x0;
_0x2026be = "";
while (_0x388c13["wag"](_0x30a9be, _0x105a8e)) {
  var _0x4a9695 = "6|8|7|2|0|1|4|3|5"["split"]("|"),
    _0x1a9326 = 0x0;
  _0x337f59 = _0x388c13["YnZ"](_0x1b013d["charCodeAt"](_0x30a9be++), 0xff);
  if (_0x388c13["PNJ"](_0x30a9be, _0x105a8e)) {
    _0x2026be += _0x3a900f["charAt"](_0x337f59 >> 0x2);
    _0x2026be += _0x3a900f["charAt"](_0x388c13["fza"](_0x388c13["yuG"](_0x337f59, 0x3), 0x4));
    _0x2026be += "==";
    break;
  }
  _0x32ce20 = _0x1b013d["charCodeAt"](_0x30a9be++);
  if (_0x388c13["PNJ"](_0x30a9be, _0x105a8e)) {
    _0x2026be += _0x3a900f["charAt"](_0x388c13["ydV"](_0x337f59, 0x2));
    _0x2026be += _0x3a900f["charAt"](_0x388c13["AfV"](_0x388c13["XCm"](_0x337f59, 0x3) << 0x4, (_0x32ce20 & 0xf0) >> 0x4));
    _0x2026be += _0x3a900f["charAt"](_0x388c13["pYD"](_0x32ce20, 0xf) << 0x2);
    _0x2026be += "=";
    break;
  }
  _0x52c47a = _0x1b013d["charCodeAt"](_0x30a9be++);
  _0x2026be += _0x3a900f["charAt"](_0x388c13["Oed"](_0x337f59, 0x2));
  _0x2026be += _0x3a900f["charAt"](_0x388c13["VuS"](_0x388c13["jpt"](_0x337f59, 0x3) << 0x4, _0x388c13["ydV"](_0x388c13["EML"](_0x32ce20, 0xf0), 0x4)));
  _0x2026be += _0x3a900f["charAt"](_0x388c13["sWH"](_0x32ce20 & 0xf, 0x2) | _0x388c13["ydV"](_0x388c13["uQC"](_0x52c47a, 0xc0), 0x6));
  _0x2026be += _0x3a900f["charAt"](_0x388c13["EML"](_0x52c47a, 0x3f));
}
return _0x2026be;
}

Removing Proxy References

The next obfuscation technique we’re going to tackle is proxy references. Proxy references are calls to functions that execute another function. The end goal here is to replace any calls to proxy functions with their intended function calls.

We can see examples of proxy calls in the code, e.g:

"Pjt": function _0x1068f3(_0x1dcc0d, _0x4fccf2) {
  return _0x1dcc0d + _0x4fccf2;
},

Here we have a function that is defined as Pjt, and all this is doing is doing a simple addition on the two parameters.

First thing we need to do is traverse through the script and identify any objects that define the proxy functions. You’ll see that the proxy references are contained in an object like so:

var _0x388c13 = {
"wag": function _0x67d457(_0x2f20be, _0x4a591a) {
  return _0x2f20be < _0x4a591a;
},

The goal is to build a lookup table of the object variable name (_0x388c13) and all the proxy functions it contains. We can achieve this with this visitor:

this.proxyFuncVars = {}
path.traverse({
    ObjectProperty(path){
        const { node } = path;
        if (t.isFunctionExpression(node.value) && t.isReturnStatement(node.value.body.body[0])){
            //Found a proxy expression
            const varDecl = path.getStatementParent()
            if (!this.proxyFuncVars[varDecl.node.declarations[0].id.name]) {
                this.proxyFuncVars[varDecl.node.declarations[0].id.name] = [];
            }

            this.proxyFuncVars[varDecl.node.declarations[0].id.name].push([node.key.value, node.value]);
        }
    }
})

We now have a lookup table that looks like this:

image-20230306151919010

The next thing that we need to do is traverse CallExpressions and see if it belongs in our lookup table. If it does, we can simply replace the CallExpression with the proxy function. Here is a visitor that does just that:

function findProxyFunction(lookupTable, varName, funcName){
    for(const key of Object.keys(lookupTable)){
        if (key === varName){
            for(const func of lookupTable[key]){
                if (func[0] === funcName){
                    return func[1]
                }
            }
        }
    }
    return null
}

CallExpression(path){
    const { node } = path;
    if (
        t.isMemberExpression(node.callee) &&
        t.isIdentifier(node.callee.object) &&
        findProxyFunction(this.proxyFuncVars, node.callee.object.name, node.callee.property.name)
    ){
        const varName = node.callee.object.name;
        const funcName =  node.callee.property.name;

        const proxyFunc = findProxyFunction(this.proxyFuncVars, varName, funcName);
        if (proxyFunc){
            //We found a proxy function, so do a replacement
            if (t.isBinaryExpression(proxyFunc.body.body[0].argument)){
                const funcBinaryExpression = proxyFunc.body.body[0].argument;
                path.replaceWith(t.binaryExpression(funcBinaryExpression.operator, node.arguments[0], node.arguments[1]))
            } else if (t.isCallExpression(proxyFunc.body.body[0].argument)){
                const funcName = node.arguments.slice(1);
                path.replaceWith(t.callExpression(node.arguments[0], funcName))
            }
        }
    }
},

Caution: The above code doesn’t consider the ordering of the arguments. For the utmvc script it’s not a problem since they’re always in order, however you should consider the argument positioning for other obfuscated code

We’ve now transformed the code and removed all the proxy references!

Before:

_0x25c494[_0x25c494.length] = _0x5cf4da.bfY(_0x4ce1f7, _0x5cf4da.szJ(_0x412285, "=undefined"));

After:

_0x25c494[_0x25c494.length] = _0x4ce1f7(_0x412285 + "=undefined");