From 013943fbe67d733d2bf31574f6aba7fb683b5af8 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Tue, 21 Nov 2023 12:05:43 +0100 Subject: [PATCH] Support strings originating from `ConstantExpr` expressions --- .../strings-encoding/StringEncoding.cpp | 75 +++++++++++++++++-- src/test/lit.cfg.py | 2 +- .../strings-encoding/basic-ios-swift.ll | 18 +++++ .../passes/strings-encoding/config_replace.py | 2 + 4 files changed, 88 insertions(+), 9 deletions(-) create mode 100644 src/test/passes/strings-encoding/basic-ios-swift.ll diff --git a/src/passes/strings-encoding/StringEncoding.cpp b/src/passes/strings-encoding/StringEncoding.cpp index 10093b4d..49a25b92 100644 --- a/src/passes/strings-encoding/StringEncoding.cpp +++ b/src/passes/strings-encoding/StringEncoding.cpp @@ -14,10 +14,13 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -88,6 +91,22 @@ inline bool isSkip(const StringEncodingOpt& encInfo) { return std::get_if(&encInfo) != nullptr; } +GlobalVariable *extractGlobalVariable(ConstantExpr *Expr) { + while (Expr) { + if (Expr->getOpcode() == llvm::Instruction::IntToPtr || + llvm::Instruction::isBinaryOp(Expr->getOpcode())) { + Expr = dyn_cast(Expr->getOperand(0)); + } else if (Expr->getOpcode() == llvm::Instruction::PtrToInt || + Expr->getOpcode() == llvm::Instruction::GetElementPtr) { + return dyn_cast(Expr->getOperand(0)); + } else { + break; + } + } + + return nullptr; +} + bool StringEncoding::runOnBasicBlock(Module& M, Function& F, BasicBlock& BB, ObfuscationConfig& userConfig) { @@ -103,10 +122,14 @@ bool StringEncoding::runOnBasicBlock(Module& M, Function& F, BasicBlock& BB, for (Use& Op : I.operands()) { GlobalVariable *G = dyn_cast(Op->stripPointerCasts()); - if (G == nullptr) { - continue; + if (!G) { + if (auto *CE = dyn_cast(Op)) + G = extractGlobalVariable(CE); } + if (!G) + continue; + if (!isEligible(*G)) { continue; } @@ -269,6 +292,24 @@ bool StringEncoding::injectOnStack(BasicBlock& BB, Instruction& I, Use& Op, Glob return true; } +std::pair +materializeConstantExpression(Instruction *Point, ConstantExpr *CE) { + auto *Inst = CE->getAsInstruction(); + auto *Prev = Inst; + Inst->insertBefore(Point); + + Value *Expr = Inst->getOperand(0); + while (isa(Expr)) { + auto *NewInst = cast(Expr)->getAsInstruction(); + NewInst->insertBefore(Prev); + Prev->setOperand(0, NewInst); + Expr = NewInst->getOperand(0); + Prev = NewInst; + } + + return {Inst, Prev}; +} + bool StringEncoding::injectOnStackLoop(BasicBlock& BB, Instruction& I, Use& Op, GlobalVariable& G, ConstantDataSequential& data, const StringEncoding::EncodingInfo& info) { @@ -285,8 +326,9 @@ bool StringEncoding::injectOnStackLoop(BasicBlock& BB, Instruction& I, Use& Op, IRBuilder IRB(&BB); IRB.SetInsertPoint(&I); - AllocaInst* clearBuffer = IRB.CreateAlloca(IRB.getInt8Ty(), - IRB.getInt32(str.size())); + AllocaInst *ClearBuffer = + IRB.CreateAlloca(ArrayType::get(IRB.getInt8Ty(), str.size())); + auto *CastClearBuffer = IRB.CreateBitCast(ClearBuffer, IRB.getInt8PtrTy()); AllocaInst* Key = IRB.CreateAlloca(IRB.getInt64Ty()); AllocaInst* StrSize = IRB.CreateAlloca(IRB.getInt32Ty()); @@ -311,6 +353,14 @@ bool StringEncoding::injectOnStackLoop(BasicBlock& BB, Instruction& I, Use& Op, fatalError("Can't find the 'decode' routine"); } + // TODO: support ObjC strings as well + Value *CastEncPtr = nullptr; + if (auto *CE = dyn_cast(EncPtr)) { + assert(extractGlobalVariable(CE) == &G && + "Previously extracted global variable need to match"); + CastEncPtr = IRB.CreateBitCast(&G, IRB.getInt8PtrTy()); + } + auto NewF = Function::Create(FDecode->getFunctionType(), llvm::GlobalValue::PrivateLinkage, "__omvll_decode", BB.getModule()); @@ -323,9 +373,8 @@ bool StringEncoding::injectOnStackLoop(BasicBlock& BB, Instruction& I, Use& Op, SmallVector Returns; CloneFunctionInto(NewF, FDecode, VMap, CloneFunctionChangeType::DifferentModule, Returns); - std::vector Args = { - clearBuffer, EncPtr, KeyVal, VStrSize - }; + std::vector Args = { + CastClearBuffer, CastEncPtr ? CastEncPtr : EncPtr, KeyVal, VStrSize}; if (NewF->arg_size() != 4) { fatalError(fmt::format("Expecting 4 arguments ({} provided)", NewF->arg_size())); @@ -344,7 +393,17 @@ bool StringEncoding::injectOnStackLoop(BasicBlock& BB, Instruction& I, Use& Op, CallInst* callee = IRB.CreateCall(NewF->getFunctionType(), NewF, Args); inline_wlist_.push_back(callee); - I.setOperand(Op.getOperandNo(), clearBuffer); + if (auto *CE = dyn_cast(EncPtr)) { + auto [First, Last] = materializeConstantExpression(&I, CE); + assert(((First != Last) || + (isa(First) || isa(First))) && + "Nested constantexpr in getelementptr/ptrtoint should not appear?"); + Last->setOperand(0, ClearBuffer); + I.setOperand(Op.getOperandNo(), First); + } else { + I.setOperand(Op.getOperandNo(), ClearBuffer); + } + return true; } diff --git a/src/test/lit.cfg.py b/src/test/lit.cfg.py index fc627f08..bc9d3105 100644 --- a/src/test/lit.cfg.py +++ b/src/test/lit.cfg.py @@ -6,7 +6,7 @@ from lit.llvm import llvm_config config.name = "O-MVLL Tests" -config.suffixes = ['.c', '.cpp'] +config.suffixes = ['.c', '.cpp', '.ll'] config.test_format = lit.formats.ShTest(True) config.test_source_root = os.path.dirname(__file__) diff --git a/src/test/passes/strings-encoding/basic-ios-swift.ll b/src/test/passes/strings-encoding/basic-ios-swift.ll new file mode 100644 index 00000000..8f1ea807 --- /dev/null +++ b/src/test/passes/strings-encoding/basic-ios-swift.ll @@ -0,0 +1,18 @@ +; REQUIRES: aarch64-registered-target + +; The 'replace' configuration encodes the string and adds logic that decodes it at load-time: +; RUN: env OMVLL_CONFIG=%S/config_replace.py clang++ -fpass-plugin=%libOMVLL \ +; RUN: -target arm64-apple-ios -fno-legacy-pass-manager -O1 -c %s -o - | strings | FileCheck %s +; +; CHECK-NOT: {{.*Hello, Swift.*}} + +%swift.bridge = type opaque + +declare swiftcc { i64, %swift.bridge*, i1, %swift.bridge* } @"foo"(i64 %0, %swift.bridge* %1) + +@.str.4 = private constant [13 x i8] c"Hello, Swift\00" + +define { i64, %swift.bridge*, i1, %swift.bridge* } @test_function() { + %1 = call swiftcc { i64, %swift.bridge*, i1, %swift.bridge* } @"foo"(i64 -3458764513820540911, %swift.bridge* inttoptr (i64 or (i64 sub (i64 ptrtoint ([13 x i8]* @.str.4 to i64), i64 32), i64 -9223372036854775808) to %swift.bridge*)) + ret { i64, %swift.bridge*, i1, %swift.bridge* } %1 +} diff --git a/src/test/passes/strings-encoding/config_replace.py b/src/test/passes/strings-encoding/config_replace.py index 04264c29..23321741 100644 --- a/src/test/passes/strings-encoding/config_replace.py +++ b/src/test/passes/strings-encoding/config_replace.py @@ -7,6 +7,8 @@ def __init__(self): def obfuscate_string(self, _, __, string: bytes): if string.endswith(b".cpp"): return omvll.StringEncOptGlobal() + if string.endswith(b"Swift"): + return omvll.StringEncOptStack() @lru_cache(maxsize=1) def omvll_get_config() -> omvll.ObfuscationConfig: