woa

0.1.0-SNAPSHOT


dependencies

org.clojure/clojure
1.6.0
org.clojure/tools.cli
0.3.1
org.clojure/tools.nrepl
0.2.5
org.clojure/data.json
0.2.5
org.clojure/core.async
0.1.346.0-17112a-alpha
asmdex/asmdex
1.0
soot/soot
1.0
pandect
0.4.1
commons-io/commons-io
2.4
incanter/incanter-core
1.5.6
me.raynes/fs
1.4.6
com.taoensso/nippy
2.7.1
clojurewerkz/neocons
3.0.0



(this space intentionally left almost blank)
 
(ns woa.apk.aapt.parse
  ;; internal libs
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])  
  ;; special libs
  (:require [clojure.java.shell :as shell :refer [sh]]))
(def manifest "AndroidManifest.xml")

porcelain

(declare get-badging get-manifest get-layout-callbacks)
(declare decompile-xml get-manifest-xml)

plumbing

(declare parse-aapt-xmltree
         get-nodes-from-parsed-xmltree)
(declare aapt-dump-xmltree
         aapt-dump-badging
         aapt-dump-manifest
         aapt-dump-resources)

get badging in Clojure data structure

(defn get-badging
  [apk]
  (let [result (atom {})
        get-string-in-single-quotes #(if-let [[_ meat] (re-find #"^'([^']+)'$" %)]
                                       meat
                                       %)]
    ;; first pass
    (doseq [line (str/split-lines (aapt-dump-badging apk))]
      ;; only consider lines that have values
      (when-let [[_ label content] (re-find #"^([^:]+):([^:]+)$" line)]
        (let [label (keyword label)]
          (swap! result update-in [label]
                 conj content))))
    ;; second pass
    (doseq [k (keys @result)]
      (swap! result update-in [k]
             (fn [content]
               (when-let [first-item (first content)]
                 (cond
                  ;; strings
                  (re-matches #"'[^']+'" first-item)
                  (into #{}
                        (map get-string-in-single-quotes
                             content))
                  ;; map
                  (re-matches #"(?:\s[^\s'][^\s=]+='[^']+')+" first-item)
                  (into {}
                        (for [[_ k v] (re-seq #"\s([^\s=]+)='([^']+)'" first-item)]
                          [(keyword k)
                           v]))
                  ;; set
                  (re-matches #"(?:\s'[^']+')+" first-item)
                  (into #{}
                        (for [[_ v] (re-seq #"\s'([^']+)'" first-item)]
                          v))
                  ;; sequence
                  (re-matches #"'[^']+',.+" first-item)
                  (into #{}
                        (map #(into []
                                    (map (fn [[_ meat]]
                                           meat)
                                         (re-seq #"'([^']+)',?" %)))
                             content)))))))
    @result))

get manifest in Clojure data structure

reference: https://developer.android.com/guide/topics/manifest/manifest-intro.html

(defn get-manifest
  [apk]
  (let [parsed-manifest (parse-aapt-xmltree (aapt-dump-manifest apk))
        result (atom {})
        get-node-android-name (fn [node package]
                                (-> node
                                  (get-in [:attrs :android:name])
                                  str
                                  (#(if (.startsWith ^String % ".")
                                      (str package %)
                                      %))
                                  keyword))]
    ;; <manifest> attrs
    (let [node (first (get-nodes-from-parsed-xmltree parsed-manifest
                                                     [:manifest]))]
      (doseq [attr [:android:versionCode
                    :android:versionName
                    :package]]
        (swap! result assoc-in [attr]
               (get-in node [:attrs attr]))))
    (let [package (get-in @result [:package])]
      ;; <manifest> level
      (doseq [node [:uses-permission
                    :permission]]
        (swap! result assoc-in [node]
               (set (map #(get-node-android-name % package)
                         (get-nodes-from-parsed-xmltree parsed-manifest
                                                        [:manifest node])))))
      ;; <application> level
      (doseq [node [:activity
                    :activity-alias
                    :service
                    :receiver]]
        (swap! result assoc-in [node]
               (into {}
                     (map (fn [node]
                            {(get-node-android-name node package)
                             (into {}
                                   (map (fn [intent-filter-tag]
                                          [(keyword (str "intent-filter-"
                                                         (name intent-filter-tag)))
                                           (set (map #(get-node-android-name % package)
                                                     (get-nodes-from-parsed-xmltree (:content node)
                                                                                    [:intent-filter
                                                                                     intent-filter-tag])))])
                                        [:action :category]))})
                          (get-nodes-from-parsed-xmltree parsed-manifest
                                                         [:manifest :application node]))))))
    @result))

return layout id and their callbacks' class.method

(defn get-layout-callbacks
  [apk]
  (->> (for [[_ layout-id _1 layout-name]
             (re-seq #"(?x)
spec\s+resource\s+
0x([0-9a-fA-F]+)\s+
([^:]+):layout/([^:]+):
"
                     (aapt-dump-resources apk))]
         [(Long/parseLong layout-id 16)
          (let [xml-res-name (str "res/layout/"
                                  layout-name
                                  ".xml")
                the-xml (parse-aapt-xmltree (aapt-dump-xmltree apk
                                                               xml-res-name))
                callbacks (atom #{})]
            (loop [worklist the-xml]
              (when (and worklist
                         (not (empty? worklist)))
                (let [new-worklist (atom #{})]
                  (doseq [{:keys [tag attrs content]
                           :as work} worklist]
                    (let [attr-keys (keys attrs)
                          callback-keys (filter #(let [key-name (name %)]
                                                   (re-matches #"android:on.+"
                                                               key-name))
                                                attr-keys)
                          info (select-keys attrs
                                            (set/difference (set attr-keys)
                                                            (set callback-keys)))]
                      (doseq [callback-key callback-keys]
                        (swap! callbacks conj
                               (merge info
                                      {:view-type (name tag)
                                       :method (get attrs callback-key)}))))
                    (swap! new-worklist into content))
                  (recur @new-worklist))))
            @callbacks)])
       (into {})))

get manifest in XML format

(defn get-manifest-xml
  [apk]
  (decompile-xml apk manifest))

decompile the binary xml on PATH in APK

(defn decompile-xml
  [apk path]
  (let [xmltree (parse-aapt-xmltree (aapt-dump-xmltree apk path))
        xmltree-to-xml (fn xmltree-to-xml [indent nodes]
                         (when (not-empty nodes)
                           (doseq [node nodes]
                             (let [tag (:tag node)
                                   attrs (:attrs node)
                                   content (:content node)
                                   indent-str (apply str (repeat indent " "))]
                               (printf "%s<%s%s"
                                       indent-str
                                       (name tag)
                                       (if-not (empty? attrs)
                                         (str " "
                                              (str/join " "
                                                        (map (fn [[k v]]
                                                               (if (and k v)
                                                                 (format "%s=\"%s\""
                                                                         (name k) v)
                                                                 ""))
                                                             attrs)))
                                         ""))
                               (if (not-empty content)
                                 (do
                                   (println ">")
                                   (xmltree-to-xml (+ indent 2)
                                                   content)
                                   (printf "%s</%s>\n"
                                           indent-str
                                           (name tag)))
                                 (println " />"))))))]
    (with-out-str
      (println "<?xml version=\"1.0\" encoding=\"utf-8\"?>")
      (xmltree-to-xml 0 xmltree))))

parse aapt xmltree dump into Clojure data structure

(defn parse-aapt-xmltree
  [xmltree-dump]
  (let [lines (vec (map #(let [[_ spaces type raw]
                               (re-find #"^(\s*)(\S):\s(.+)$"
                                        %)]
                           {:indent (count spaces)
                            :type type
                            :raw raw})
                        (str/split-lines xmltree-dump)))
        ;; first pass build: from lines to a tree        
        build (fn build [lines]
                (when-let [lines (vec lines)]
                  (when (not (empty? lines))
                    (let [start-indent (:indent (first lines))
                          segment-indexes (vec (concat (keep-indexed #(when (<= (:indent %2)
                                                                                start-indent)
                                                                        %1)
                                                                     lines)
                                                       [(count lines)]))
                          segments (map #(subvec lines
                                                 (get segment-indexes %)
                                                 (get segment-indexes (inc %)))
                                        (range (dec (count segment-indexes))))]
                      (->> segments
                           (map (fn [lines]
                                  (let [line (first lines)
                                        lines (rest lines)
                                        type (:type line)
                                        raw (:raw line)]
                                    (case type
                                      ;; Namespace
                                      "N"
                                      (let [[_ n v] (re-find #"^([^=]+)=([^=]+)$" raw)]
                                        {:type :namespace
                                         :name (str "xmlns:" n)
                                         :value v
                                         :children (build lines)}) 
                                      ;; Element
                                      "E"
                                      (let [[_ name line] (re-find #"^(\S+)\s+\(line=(\d+)\)$"
                                                                   raw)]
                                        {:type :element
                                         :name name
                                         :line line
                                         :children (build lines)})
                                      ;; Attribute
                                      "A"
                                      (let [[_
                                             encoded-name bare-name
                                             quoted-value encoded-value bare-value] (re-find
                                                                                     #"(?x)
^(?:
  ([^=(]+)\([^)]+\)| # encoded name
  ([^=(]+) # bare name)
=
(?:
  \"([^\"]+)\"| # quoted value
  \([^)]+\)(\S+)|  # encoded value
  ([^\"(]\S*) # bare value)
"
                                                                                     raw)]
                                        {:type :attribute
                                         :name (or bare-name encoded-name)
                                         :value (or quoted-value encoded-value bare-value)})
                                      ;; falls through
                                      nil))))
                           (keep identity)
                           vec)))))
        pass (build lines)]
    (let [;; second pass: merge namespace/attributes into elements
          build (fn build [node & [immediate-namespace]]
                  (case (:type node)
                    ;; element
                    :element
                    (let [[attrs elems] (split-with #(= (:type %) :attribute)
                                                    (:children node))]
                      {:tag (keyword (:name node))
                       :attrs (let [attrs (into {} (mapcat #(let [the-key (keyword (:name %))
                                                                  the-value (:value %)]
                                                              (when (and the-key the-value)
                                                                [[the-key
                                                                  the-value]]))
                                                        attrs))]
                                (if immediate-namespace
                                  (assoc attrs (keyword (:name immediate-namespace))
                                         (:value immediate-namespace))
                                  attrs))
                       :content (set (map build elems))})                    
                    ;; namespace
                    :namespace
                    (build (first (:children node))
                           ;; pass the immediate-namespace to its children
                           (select-keys node [:name :value]))))
          pass (set (map build pass))]
      pass)))

get nodes from parsed xmltree

(defn get-nodes-from-parsed-xmltree
  [parsed-xmltree [tag & more-tags]]
  (->> parsed-xmltree
       (filter #(= (:tag %) tag))
       ((fn [nodes]
          (if more-tags
            (mapcat #(get-nodes-from-parsed-xmltree (:content %)
                                                    more-tags)
                    nodes)
            nodes)))
       set))

aapt dump xmltree

(defn aapt-dump-xmltree
  [apk asset]
  (:out (sh "aapt" "dump" "xmltree"
            apk asset)))

aapt dump badging

(defn aapt-dump-badging
  [apk]
  (:out (sh "aapt" "dump" "badging"
            apk)))

aapt dump xmltree

(defn aapt-dump-manifest
  [apk]
  (aapt-dump-xmltree apk manifest))

aapt dump resources

(defn aapt-dump-resources
  [apk]
  (:out (sh "aapt" "dump" "resources"
            apk)))
 
(ns woa.apk.dex.asmdex.opcodes
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])  
  ;; special libs
  (:require [clojure.reflect :refer [reflect]])
  ;; imports
  (:import (org.ow2.asmdex Opcodes)))
(declare decode-opcode encode-opcode)

decode or-ed opcode from 'int' to 'set of keywords'encode or-ed opcode from 'seq of keywords' to 'int'deocde exclusive opcode from 'int' to 'keyword'encode exclusive opcode from 'keyword' to 'int'do encode/decode on opcodedecode opcodeencode opcode

(let [opcode-map {:access "ACC_"
                  :debug "DBG_"
                  :instruction "INSN_"
                  :type "TYPE_"
                  :value "VALUE_"
                  :visibility "VISIBILITY_"}
      opcodes (into {}
                    (map (fn [[tag name-prefix]]
                           [tag
                            (into {}
                                  (->>  Opcodes
                                    reflect
                                    :members
                                    (map (comp str :name))
                                    (filter #(.startsWith % name-prefix))
                                    (map (fn [field-name]
                                           [(keyword (let [prettify-opcode-name
                                                           (fn [name]
                                                             (let [[_ name]
                                                                   (re-find #"^[^_]+_(.+)$" name)]
                                                               (-> name
                                                                 str/lower-case
                                                                 (str/replace "_" "-"))))]
                                                       (prettify-opcode-name field-name)))
                                            (eval `(. Opcodes
                                                      ~(symbol field-name)))]))))])
                         opcode-map))
      opcodes-invert (into {}
                           (map (fn [[tag opcodes]]
                                  [tag
                                   (set/map-invert opcodes)])
                                opcodes))]
  (defn- decode-ored-opcode
    [opcode-type code]
    (let [opcode-type (keyword opcode-type)
          opcodes (opcode-type opcodes)]
      (set (filter #(not= 0
                          (bit-and code
                                   (get opcodes % 0)))
                   (keys opcodes)))))
  (defn- encode-ored-opcode
    [opcode-type code]
    (let [opcode-type (keyword opcode-type)
          opcodes (opcode-type opcodes)]
      (reduce bit-or 0
              (map opcodes
                   (set/intersection (set code)
                                     (set (keys opcodes)))))))
  (defn- decode-exclusive-opcode
    [opcode-type code]
    (get ((keyword opcode-type) opcodes-invert)
         code))
  (defn- encode-exclusive-opcode
    [opcode-type code]
    (get ((keyword opcode-type) opcodes)
         code))
  (let [impl {:encode {:ored encode-ored-opcode
                       :exclusive encode-exclusive-opcode}
              :decode {:ored decode-ored-opcode
                       :exclusive decode-exclusive-opcode}}
        opcode-type-map {:access :ored
                         :debug :exclusive
                         :instruction :exclusive
                         :type :exclusive
                         :value :exclusive
                         :visibility :exclusive}]
    (defn- do-opcode
      [dowhat opcode-type code]
      (let [dowhat (keyword dowhat)
            opcode-type (keyword opcode-type)]
        ((get (dowhat impl)
              (opcode-type opcode-type-map))
         opcode-type code)))
    (defn decode-opcode
      [opcode-type code]
      (do-opcode :decode opcode-type code))
    (defn encode-opcode
      [opcode-type code]
      (do-opcode :encode opcode-type code))))
 
(ns woa.apk.dex.asmdex.parse
  ;; internal libs
  (:require [woa.apk.dex.parse
             :refer [the-dex]]
            [woa.apk.util
             :refer [get-apk-file-input-stream]]           
            [woa.apk.dex.asmdex.opcodes
             :refer [decode-opcode
                     encode-opcode]])  
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])  
  ;; special libs
  ;; imports
  ;; http://asm.ow2.org/doc/tutorial-asmdex.html
  (:import (org.ow2.asmdex ApplicationReader
                           ApplicationVisitor
                           ClassVisitor
                           AnnotationVisitor
                           FieldVisitor
                           MethodVisitor
                           Opcodes)))

declaration

(declare parse-the-dex-in-apk)

implementation

parse the dex in apk

(defn parse-the-dex-in-apk
  [apk & {:keys []
          :as args}]
  (let [api Opcodes/ASM4
        app-reader (ApplicationReader. api
                                       (get-apk-file-input-stream apk
                                                                  the-dex))
        the-structure (atom {})]
    (let [app-visitor (proxy [ApplicationVisitor] [api]
                        (visitClass [access name signature super-name interfaces]
                          (let [access (decode-opcode :access access)
                                signature (set signature)
                                interfaces (set interfaces)]
                            (swap! the-structure assoc-in [name]
                                   {:access access
                                    :name name
                                    :signature signature
                                    :super-name super-name
                                    :interfaces interfaces
                                    ;; placeholders
                                    :fields {}
                                    :methods {}})
                            (let [class-name name]
                              (proxy [ClassVisitor] [api]
                                (visitField [access name desc signature value]
                                  (let [access (decode-opcode :access access)
                                        signature (set signature)]
                                    (swap! the-structure assoc-in [class-name :fields name]
                                           {:access access
                                            :name name
                                            :desc desc
                                            :signature signature
                                            :value value})
                                    nil))
                                (visitMethod [access name desc signature exceptions]
                                  (let [access (decode-opcode :access access)
                                        signature (set signature)
                                        exceptions (set exceptions)
                                        ;; code to be appended here
                                        code (atom [])]
                                    (swap! the-structure assoc-in
                                           [class-name :methods name]
                                           {:access access
                                            :name name
                                            :desc desc
                                            :signature signature
                                            :exceptions exceptions
                                            ;; to be filled later at the visitEnd for MethodVisitor
                                            :code nil})
                                    (let [method-name name
                                          conj-code (fn [insn]
                                                      (swap! code conj insn))]
                                      (proxy [MethodVisitor] [api]
                                        (visitArrayLengthInsn [value-reg array-reg]
                                          (conj-code {:tag :array-length-insn
                                                      :instruction :array-length
                                                      :array array-reg
                                                      :value value-reg}))
                                        (visitArrayOperationInsn [opcode value-reg array-reg idx-reg]
                                          (conj-code {:tag :array-op-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :array array-reg
                                                      :index idx-reg
                                                      :value value-reg}))
                                        (visitFieldInsn [opcode owner name desc value-reg obj-reg]
                                          (conj-code {:tag :field-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :owner owner
                                                      :name name
                                                      :desc desc
                                                      :value value-reg
                                                      :object obj-reg}))
                                        (visitFillArrayDataInsn [array-reg array-data]
                                          (conj-code {:tag :fill-array-data-insn
                                                      :instruction :fill-array-data
                                                      :array array-reg
                                                      :data array-data}))
                                        (visitInsn [opcode]
                                          (conj-code {:tag :nullary-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)}))
                                        (visitIntInsn [opcode reg]
                                          (conj-code {:tag :unary-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :reg reg}))
                                        (visitJumpInsn [opcode label reg-a reg-b]
                                          (conj-code {:tag :jump-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :label label
                                                      :reg-a reg-a
                                                      :reg-b reg-b}))
                                        (visitLabel [label]
                                          (conj-code {:tag :label
                                                      :label label}))
                                        (visitLineNumber [line start]
                                          (conj-code {:tag :line-number
                                                      :line line
                                                      :start start}))
                                        (visitLocalVariable [name desc signature start end index]
                                          (conj-code {:tag :local-variable
                                                      :name name
                                                      :desc desc
                                                      :signature signature
                                                      :start start
                                                      :end end
                                                      :index index}))
                                        (visitLookupSwitchInsn [reg default switch-keys labels]
                                          (let [switch-keys (vec switch-keys)
                                                labels (vec labels)]
                                            (conj-code {:tag :lookup-switch-insn
                                                        :instruction :lookup-switch
                                                        :reg reg
                                                        :default default
                                                        :switch-keys switch-keys
                                                        :labels labels})))
                                        (visitMaxs [max-stack _]
                                          ;; local vars + param vars (last ones; "this" implicit for instance method)
                                          (swap! the-structure assoc-in
                                                 [class-name :methods method-name :vars]
                                                 max-stack))
                                        (visitMethodInsn [opcode owner name desc arguments]
                                          (let [arguments (vec arguments)]
                                            (conj-code {:tag :method-insn
                                                        :instruction (decode-opcode :instruction
                                                                                    opcode)
                                                        :owner owner
                                                        :name name
                                                        :desc desc
                                                        :arguments arguments})))
                                        (visitMultiANewArrayInsn [desc regs]
                                          (let [regs (vec regs)]
                                            (conj-code {:tag :multi-a-newarray-insn
                                                        :instruction :multi-a-newarray
                                                        :desc desc
                                                        :reg regs})))
                                        (visitOperationInsn [opcode dest-reg src-reg-1 src-reg-2 value]
                                          (conj-code {:tag :op-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :dest-reg dest-reg
                                                      :src-reg-1 src-reg-1
                                                      :src-reg-2 src-reg-2
                                                      :value value}))
                                        (visitParameters [params]
                                          (let [params (vec params)]
                                            (swap! the-structure assoc-in
                                                   [class-name :methods method-name :params]
                                                   params)))
                                        (visitStringInsn [opcode dest-reg string]
                                          (conj-code {:tag :string-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :dest-reg dest-reg
                                                      :string string}))
                                        (visitTableSwitchInsn [reg min max default labels]
                                          (let [labels (vec labels)]
                                            (conj-code {:tag :table-switch-insn
                                                        :instruction :table-switch
                                                        :reg reg
                                                        :min min
                                                        :max max
                                                        :default default
                                                        :labels labels})))
                                        (visitTryCatchBlock [start end handler type]
                                          (conj-code {:tag :try-catch-block
                                                      :start start
                                                      :end end
                                                      :handler handler
                                                      :type type}))
                                        (visitTypeInsn [opcode dest-reg ref-reg size-reg type]
                                          (conj-code {:tag :type-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :ref-reg ref-reg
                                                      :size-reg size-reg
                                                      :type type}))
                                        (visitVarInsn [opcode dest-reg var]
                                          (conj-code {:tag :var-insn
                                                      :instruction (decode-opcode :instruction
                                                                                  opcode)
                                                      :dest-reg dest-reg
                                                      :var var}))
                                        (visitEnd []
                                          ;; now save the code
                                          (swap! the-structure assoc-in
                                                 [class-name :methods method-name :code]
                                                 @code)))))))))))]
      (.accept app-reader
               app-visitor
               (bit-or 0
                       ApplicationReader/SKIP_DEBUG))
      @the-structure)))
 
(ns woa.apk.dex.asmdex.util
  ;; internal libs
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])  
  ;; special libs
  ;; imports)

declaration

var

func

(declare
 get-component-callback-method-all-external-invokes
 expand-invokes extract-dex-method-invokes
 comp-name-2-class-name class-name-2-comp-name)

implementation

for each component callback method (i.e., on*), get its external invokes

(defn get-component-callback-method-all-external-invokes
  [apk]
  (let [expanded-invokes (expand-invokes (extract-dex-method-invokes apk))]
    (into {}
          (map (fn [comp-type]
                 [comp-type
                  (into {}
                        (map (fn [comp-name]
                               [comp-name
                                (->> (get expanded-invokes
                                          ;; internal class name
                                          (comp-name-2-class-name comp-name))
                                     ;; filter event callbacks 
                                     (filter #(re-find #"^on[A-Z]"
                                                       (first %)))
                                     ;; filter external invokes
                                     (map (fn [[k methods]]
                                            [k
                                             (set (filter (fn [{:keys [class]}]
                                                            ;; external invokes
                                                            (not (get expanded-invokes
                                                                      class)))
                                                          methods))]))
                                     (into {}))])
                             (->> apk :manifest comp-type keys (map name))))])
               [:receiver :service :activity]))))

expand invokes to include transitive/indirect ones

(defn expand-invokes
  [invokes]
  (let [expand (fn expand [method visited]
                 (if-let [method-invokes (get invokes method)]
                   ;; internal method - further expansion if not visited
                   (if-not (contains? visited method)
                     ;; not visited
                     (mapcat #(expand %
                                      (conj (set visited)
                                            method))
                             method-invokes)
                     ;; already visited - return method
                     #{method})
                   ;; external method - no further expansion
                   #{method}))]
    (let [result (atom {})
          tmp (->> invokes
                   (map (fn [[k v]]
                          [k (set (mapcat #(expand %
                                                   #{%})
                                          v))]))
                   (into {}))]
      (doseq [[{:keys [class method]} all-invokes] tmp]
        (swap! result assoc-in [class method] all-invokes))
      @result)))

extract invokes in each method

(defn extract-dex-method-invokes
  [apk]
  (let [dex (:dex apk)]
    (->> (mapcat (fn [[class-name {:keys [methods]}]]
                   (map (fn [[method-name {:keys [code]}]]
                          {{:class class-name :method method-name}
                           (->> code
                                (filter #(= (:tag %)
                                            :method-insn))
                                (map #(do {:class (:owner %)
                                           :method (:name %)
                                           ;;:instruction (:instruction %)  ; not interesting to us
                                           }))
                                set)})
                        methods))
                 dex)
         (reduce merge))))

a.b.c -> La/b/c;

(defn comp-name-2-class-name
  [comp-name]
  (str "L" (str/replace comp-name "." "/") ";"))

La/b/c; -> a.b.c

(defn class-name-2-comp-name
  [class-name]
  (let [[_ class-name] (re-find #"^L([^;]+);$" class-name)]
    (str/replace class-name "/" ".")))
 
(ns woa.apk.dex.parse
  (:require [woa.apk.util
             :refer [get-apk-file-bytes get-apk-file-input-stream
                     extract-apk-file
                     get-apk-file-sha256-digest
                     get-file-sha256-digest]])  
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]]))

declaration

var

(declare the-dex)

func

(declare extract-the-dex-in-apk get-the-dex-sha256-digest)

implementation

(def the-dex "classes.dex")

extract the dex in apk to output-file-name

(defn extract-the-dex-in-apk
  [apk output-file-name]
  (extract-apk-file apk the-dex output-file-name))

get sha256 digest of the dex in apk

(defn get-the-dex-sha256-digest
  [apk]
  (get-apk-file-sha256-digest apk the-dex))
 
(ns woa.apk.dex.soot.parse
  ;; internal libs
  (:require [woa.util
             :refer [print-stack-trace-if-verbose]])
  (:require [woa.apk.dex.soot.util
             :as util
             :refer :all])
  (:require [woa.apk.dex.soot.simulator
             :as simulator
             :refer :all])  
  (:require [woa.apk.parse
             :as apk-parse])
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])  
  ;; special lib
  (:require [me.raynes.fs :as fs])  
  ;; imports
  (:import (java.util.concurrent Executors
                                 TimeUnit))
  (:import (soot Unit
                 SootField
                 SootClass
                 SootMethod
                 SootMethodRef)
           (soot.jimple Stmt)
           (soot.options Options)))

declaration

func

public

(declare parse-apk get-apk-interesting-invokes)

private

(declare prettify-args)

implementation

parse apk with soot

(defn parse-apk
  [apk-name options]
  (merge (apk-parse/parse-apk apk-name)
         {:dex (get-apk-interesting-invokes apk-name {}
                                            options)}))

get App components and their (transitive) interesting invokes

(defn get-apk-interesting-invokes
  [apk-name
   {:keys [exclusion-name-patterns
           exclusion-name-pattern-exceptions]
    :as params
    :or {exclusion-name-patterns [#"^java\."
                                  #"^javax\."
                                  #"^junit\."
                                  #"^org\.json"
                                  #"^org\.w3c\."
                                  #"^org\.xmlpull\."]
         exclusion-name-pattern-exceptions [#"^android\."
                                            #"^com\.android\."
                                            #"^dalvik\."
                                            #"^java\.lang\.System"
                                            #"^java\.lang\.Class"
                                            #"^java\.lang\.ClassLoader"
                                            #"^java\.lang\.reflect"
                                            #"^java\.security"]}}
   {:keys [soot-android-jar-path
           soot-show-result
           soot-result-exclude-app-methods
           soot-parallel-jobs
           verbose]
    :as options}]
  (when (and apk-name (fs/readable? apk-name))
    (let [apk-path (.getPath (io/file apk-name))
          get-android-jar-path #(let [res-name "EMPTY"
                                      ;; hack to get "woa.jar" dir
                                      [_ path] (re-find (re-pattern (str "^file:(.*)/[^/]+!/"
                                                                         res-name "$"))
                                                        (.getPath (io/resource res-name)))]
                                  (str/join (System/getProperty "file.separator")
                                            [path "android.jar"]))
          android-jar-path (if soot-android-jar-path
                             soot-android-jar-path
                             (get-android-jar-path))
          result (atom {})
          ;; the current thread's Soot context
          g-objgetter (new-g-objgetter)]
      ;; unfortunately, Singleton is so deeply embedded in Soot's implementation, we have to work in critical Section altogether
      (with-soot
        ;; use the current thread's Soot context
        g-objgetter
        ;; reset at the end to release the Soot Objects built up during the analysis
        true
        ;; the real work begins from here
        (when (or (not verbose)
                  (<= verbose 1))
          (mute))
        (try
          (doto soot-options
            (.set_src_prec (Options/src_prec_apk))
            (.set_process_dir [apk-path])
            (.set_force_android_jar android-jar-path)
            (.set_allow_phantom_refs true)
            (.set_no_bodies_for_excluded true)
            (.set_ignore_resolution_errors true)
            (.set_whole_program true)
            (.set_output_format (Options/output_format_none)))
          (doto soot-phase-options)
          ;; do it manually --- barebone
          (run-body-packs :scene soot-scene
                          :pack-manager soot-pack-manager
                          :body-packs ["jb"]
                          :verbose verbose)
          (when (and verbose (> verbose 3))
            (println "body pack finished"))
          ;; start working on the bodies
          (let [step1 (fn []
                        (let [application-classes (get-application-classes soot-scene)
                              android-api-descendants
                              (->> application-classes
                                   (filter (fn [class]
                                             (->> (get-interesting-transitive-super-class-and-interface
                                                   class android-api?)
                                                  not-empty))))
                              android-api-descendant-callbacks
                              (->> android-api-descendants
                                   (remove #(.. ^SootClass % isPhantom))
                                   (mapcat #(->> (.. ^SootClass % getMethods)
                                                 (filter (fn [method]
                                                           (and (.hasActiveBody method)
                                                                (re-find #"^on[A-Z]"
                                                                         (.getName method)))))))
                                   set)]
                          ;; descendant relations
                          (doseq [descendant android-api-descendants]
                            (when (and verbose (> verbose 3))
                              (println "android API descendants:" descendant))
                            (swap! result assoc-in
                                   [(.. descendant getPackageName) (.. descendant getName)]
                                   {:android-api-ancestors
                                    (->> (for [super (get-interesting-transitive-super-class-and-interface
                                                      descendant android-api?)]
                                           {:class (.. super getName)
                                            :package (.. super getPackageName)})
                                         set)}))
                          ;; cg will only see parts reachable from these entry points
                          (.. soot-scene
                              (setEntryPoints (seq android-api-descendant-callbacks)))
                          ;; return the result
                          {:android-api-descendants android-api-descendants}))
                step1-result (step1)
                step2 (fn [{:keys [android-api-descendants]
                            :as prev-step-result}]
                        (let [application-classes (get-application-classes soot-scene)
                              application-methods (get-application-methods soot-scene)
                              interesting-method?
                              (memoize
                               (fn [method]
                                 (let [method-name (-> method get-soot-name)
                                       class (-> method get-soot-class)
                                       ;;super (-> class get-transitive-super-class-and-interface)
                                       ]
                                   ;; interestingness criteria
                                   (and true
                                        (if soot-result-exclude-app-methods
                                          ;; external
                                          (not (contains? application-methods method))
                                          true)
                                        ;; not in exclusion-name-patterns
                                        (or (->> [class]
                                                 (filter
                                                  (fn [x]
                                                    (some #(re-find % (-> x get-soot-class-name))
                                                          exclusion-name-patterns)))
                                                 empty?)
                                            ;; ... unless in exclusion-name-pattern-exceptions
                                            (->> [class]
                                                 (filter
                                                  (fn [x]
                                                    (some #(re-find % (-> x get-soot-class-name))
                                                          exclusion-name-pattern-exceptions)))
                                                 not-empty))
                                        ;; not <init> or <clinit>
                                        (not (re-find #"<[^>]+>" method-name))))))]
                          (let [pool (Executors/newFixedThreadPool soot-parallel-jobs)]
                            (doseq [descendant android-api-descendants]
                              (.. pool
                                  (execute
                                   (fn []
                                     ;; impose lifecycle order on callbacks
                                     ;; https://developer.android.com/images/activity_lifecycle.png
                                     (let [callbacks
                                           (->> (.. ^SootClass descendant getMethods)
                                                (filter (fn [method]
                                                          (and (.hasActiveBody method)
                                                               (re-find #"^on[A-Z]"
                                                                        (.getName method)))))
                                                (sort-by #(.getName %)
                                                         (fn [x y]
                                                           (let [order {"onCreate" 1
                                                                        "onStart" 2
                                                                        "onResume" 3
                                                                        ;; others
                                                                        "onPause" 5
                                                                        "onStop" 6
                                                                        "onRestart" 7
                                                                        "onDestroy" 8}
                                                                 ox (get order x 4)
                                                                 oy (get order y 4)]
                                                             (compare ox oy)))))]
                                       (try
                                         (doseq [callback callbacks]
                                           (let [callback-class (.. callback getDeclaringClass)]
                                             (when (and verbose (> verbose 3))
                                               (println "app component callback:" callback))
                                             (let [{:keys [explicit-invokes
                                                           implicit-invokes
                                                           component-invokes
                                                           invoke-paths]}
                                                   (with-simulator
                                                     (initialize-classes {:classes application-classes
                                                                          :circumscription application-methods}
                                                                         options)                                                 
                                                     (get-all-interesting-invokes callback
                                                                                  interesting-method?
                                                                                  application-methods
                                                                                  options))]
                                               (doseq [[type invokes] [[:explicit explicit-invokes]
                                                                       [:implicit implicit-invokes]
                                                                       [:component component-invokes]]]
                                                 (swap! result assoc-in
                                                        [(.. callback-class getPackageName)
                                                         (.. callback-class getName)
                                                         :callbacks
                                                         (.. callback getName)
                                                         type]
                                                        (->> invokes
                                                             (filter #(let [{:keys [method args]} %]
                                                                        (soot-queryable? method)))
                                                             (map #(let [{:keys [method args]} %
                                                                         class (-> method
                                                                                   get-soot-class)]
                                                                     {:method (-> method get-soot-name)
                                                                      :class (-> method get-soot-class-name)
                                                                      :package (.. class getPackageName)
                                                                      :args (->> args prettify-args str)}))
                                                             set)))
                                               ;; add explicit link between invokes and their Android API ancestor
                                               (let [path [(.. callback-class getPackageName)
                                                           (.. callback-class getName)
                                                           :callbacks
                                                           (.. callback getName)
                                                           :descend]]
                                                 (doseq [invoke (set/union explicit-invokes implicit-invokes)]
                                                   (let [method (:method invoke)]
                                                     (when (soot-queryable? method)
                                                       (when-not (android-api? method)
                                                         (let [method-name (-> method get-soot-name)
                                                               method-class (-> method get-soot-class)
                                                               v {:method method-name
                                                                  :class (-> method get-soot-class-name)
                                                                  :package (.. method-class getPackageName)}
                                                               ;; Android API supers
                                                               supers (->> method-class
                                                                           get-transitive-super-class-and-interface
                                                                           (filter android-api?))]
                                                           (when-let [super (some #(try
                                                                                     (if (.. ^soot.SootClass %
                                                                                             (getMethodByNameUnsafe
                                                                                              method-name))
                                                                                       %
                                                                                       false)
                                                                                     ;; Soot implementation: Ambiguious 
                                                                                     (catch RuntimeException e
                                                                                       %))
                                                                                  supers)]
                                                             (let [k {:method method-name
                                                                      :class (-> super get-soot-class-name)
                                                                      :package (.. super getPackageName)}]
                                                               (swap! result update-in (conj path k)
                                                                      #(conj (set %1) %2) v)))))))))
                                               (let [path [(.. callback-class getPackageName)
                                                           (.. callback-class getName)
                                                           :callbacks
                                                           (.. callback getName)
                                                           :invoke-paths]]
                                                 (swap! result assoc-in path
                                                        invoke-paths)))))
                                         (catch Exception e
                                           (print-stack-trace-if-verbose e verbose))
                                         (catch Error e
                                           ;; any error in processing; skip this sample
                                           (.. pool shutdownNow))))))))
                            (.. pool shutdown)
                            (try
                              (when-not (.. pool (awaitTermination Integer/MAX_VALUE
                                                                   TimeUnit/SECONDS))
                                (.. pool shutdownNow))
                              (catch InterruptedException e
                                (.. pool shutdownNow)
                                (.. Thread currentThread interrupt))))
                          ;; must be in Soot body to ensure content/arguments can be printed
                          (when soot-show-result
                            (pprint @result))))
                step2-result (step2 step1-result)])
          ;; catch Exception to prevent disrupting outer threads
          (catch Exception e
            (print-stack-trace-if-verbose e verbose))
          (finally
            (unmute))))
      @result)))

prettify results

(defn- prettify-args
  [args]
  (try
    (cond
      (instance? woa.apk.dex.soot.sexp.ErrorSexp args)
      (list (prettify-args (:type args)) (prettify-args (:info args)))
      (instance? woa.apk.dex.soot.sexp.ExternalSexp args)
      (list :instance (prettify-args (:type args)))
      (or (instance? woa.apk.dex.soot.sexp.BinaryOperationSexp args)
          (instance? woa.apk.dex.soot.sexp.UnaryOperationSexp args))
      (list* (:operator args)
             (map prettify-args (:operands args)))
      (instance? woa.apk.dex.soot.sexp.InvokeSexp args)
      (list :invoke
            (prettify-args (:method args))
            (prettify-args (:base args))
            (prettify-args (:args args)))
      (instance? woa.apk.dex.soot.sexp.InstanceSexp args)
      (list :instance (prettify-args (:instance args)))
      (instance? woa.apk.dex.soot.sexp.MethodSexp args)
      (list :method (prettify-args (:method args)))
      (instance? woa.apk.dex.soot.sexp.FieldSexp args)
      (list :field (prettify-args (:field args)))
      (instance? woa.apk.dex.soot.sexp.InstanceOfSexp args)
      (list :instance-of
            (prettify-args (:class args))
            (prettify-args (:instance args)))
      (instance? woa.apk.dex.soot.sexp.NewArraySexp args)
      (list :new-array
            (prettify-args (:base-type args))
            (prettify-args (:size args)))
      (instance? woa.apk.dex.soot.sexp.NewMultiArraySexp args)
      (list :new-multi-array
            (prettify-args (:base-type args))
            (prettify-args (:sizes args)))
      (instance? woa.apk.dex.soot.sexp.ArrayRefSexp args)
      (list :array-ref
            (prettify-args (:base args))
            (prettify-args (:index args)))
      (instance? woa.apk.dex.soot.sexp.ConstantSexp args)
      (list :constant
            (prettify-args (:const args)))
      (or (instance? soot.SootClass args))
      (list :class (get-soot-name args))
      (or (instance? soot.SootMethod args)
          (instance? soot.SootMethodRef args))
      (list :method (str (get-soot-class-name args)
                         "."
                         (get-soot-name args)))
      (or (instance? soot.SootField args)
          (instance? soot.SootMethodRef args))
      (list :field (str (get-soot-class-name args)
                        "."
                        (get-soot-name args)))
      (soot-queryable? args)
      (->> args get-soot-name)
      (and (not (instance? woa.apk.dex.soot.sexp.Sexp args))
           (coll? args))
      (->> args
           (map prettify-args)
           (into (empty args)))    
      :otherwise
      args)
    (catch Exception e
      args)))
 

define symbolic expression (sexp)

(ns woa.apk.dex.soot.sexp
  ;; internal libs
  (:use woa.apk.dex.soot.util)
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  (:import (clojure.lang IHashEq)))

declaration

implementation

(defprotocol Sexp)
(defrecord ErrorSexp [type info]
  Sexp)
(defn make-error-sexp
  [type info]
  (ErrorSexp. type info))
(defrecord ExternalSexp [type]
  Sexp)
(defn make-external-sexp
  [type]
  (ExternalSexp. type))
(defrecord BinaryOperationSexp [operator operands]
  Sexp)
(defn make-binary-operator-sexp
  [operator operands]
  (BinaryOperationSexp. operator operands))
(defrecord UnaryOperationSexp [operator operands]
  Sexp)
(defn make-unary-operator-sexp
  [operator operands]
  (UnaryOperationSexp. operator operands))
(defrecord InvokeSexp [invoke-type method base args]
  Sexp
  SootQuery
  (get-soot-class [this]
    (case invoke-type
      :static-invoke (->> (:method this) get-soot-class)
      (try
        (->> (:base this) get-soot-class)
        (catch Exception e
          (->> (:method this) get-soot-class)))))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (->> (:method this) get-soot-name))
  (soot-resolve [this]
    (->> (:method this) soot-resolve)))
(defn make-invoke-sexp
  [invoke-type method base args]
  (InvokeSexp. invoke-type method base args))
(defrecord InstanceSexp [class instance]
  Sexp
  Object
  SootQuery
  (get-soot-class [this]
    (->> (:class this) get-soot-class))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (->> this get-soot-class-name))
  (soot-resolve [this]
    (->> this get-soot-class soot-resolve)))
(defn make-instance-sexp
  [class instance]
  (InstanceSexp. class instance))
(defrecord ClassSexp [class]
  Sexp
  Object
  SootQuery
  (get-soot-class [this]
    (->> (:class this) get-soot-class))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (->> this get-soot-class-name))
  (soot-resolve [this]
    (->> this get-soot-class soot-resolve)))
(defn make-class-sexp
  [class]
  (ClassSexp. class))
(defrecord MethodSexp [instance method]
  Sexp
  SootQuery
  (get-soot-class [this]
    (->> (:instance this) get-soot-class))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (->> (:method this) get-soot-name))
  (soot-resolve [this]
    (->> (:method this) soot-resolve)))
(defn make-method-sexp
  [instance method]
  (MethodSexp. instance method))
(defrecord FieldSexp [instance field]
  Sexp
  SootQuery
  (get-soot-class [this]
    (->> (:instance this) get-soot-class))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (->> (:field this) get-soot-name))
  (soot-resolve [this]
    (->> (:field this) soot-resolve)))
(defn make-field-sexp
  [instance field]
  (FieldSexp. instance field))
(defrecord LocalSexp [local]
  Sexp)
(defn make-local-sexp
  [local]
  (LocalSexp. local))
(defrecord InstanceOfSexp [class instance]
  Sexp)
(defn make-instance-of-sexp
  [class instance]
  (InstanceOfSexp. class instance))
(defrecord NewArraySexp [base-type size]
  Sexp)
(defn make-new-array-sexp
  [base-type size]
  (NewArraySexp. base-type size))
(defrecord NewMultiArraySexp [base-type sizes]
  Sexp)
(defn make-new-multi-array-sexp
  [base-type sizes]
  (NewMultiArraySexp. base-type sizes))
(defrecord ArrayRefSexp [base index]
  Sexp)
(defn make-array-ref-sexp
  [base index]
  (ArrayRefSexp. base index))
(defrecord ConstantSexp [const]
  Sexp)
(defn make-constant-sexp
  [const]
  (ConstantSexp. const))
(defrecord CastSexp [value cast-type]
  Sexp)
(defn make-cast-sexp
  [value cast-type]
  (CastSexp. value cast-type))
 
(ns woa.apk.dex.soot.simulator
  ;; internal libs
  (:use woa.util)   
  (:use woa.apk.dex.soot.util)
  (:use woa.apk.dex.soot.sexp)
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; imports
  (:import (soot Unit
                 SootField
                 SootClass
                 SootMethod
                 SootMethodRef
                 Scene)
           (soot.jimple Stmt
                        StmtSwitch
                        JimpleValueSwitch)))

declaration

public

(declare with-simulator)
(declare initialize-classes get-all-interesting-invokes)
(declare ^:dynamic *init-instances* ^:dynamic *simulator-global-state*)

private

(declare simulate-method simulate-basic-block)
(declare create-simulator 
         simulator-evaluate
         simulator-new-instance
         simulator-get-field simulator-set-field
         simulator-get-this simulator-get-param
         simulator-set-local simulator-get-local
         simulator-add-returns simulator-get-returns simulator-clear-returns
         simulator-add-explicit-invokes simulator-get-explicit-invokes simulator-clear-explicit-invokes
         simulator-add-implicit-invokes simulator-get-implicit-invokes simulator-clear-implicit-invokes
         simulator-add-component-invokes simulator-get-component-invokes simulator-clear-component-invokes
         simulator-add-invoke-paths simulator-get-invoke-paths simulator-clear-invoke-paths)
(declare filter-implicit-cf-invoke-methods
         implicit-cf-class? implicit-cf? implicit-cf-task? implicit-cf-component? 
         get-transitive-implicit-cf-super-class-and-interface get-implicit-cf-root-class-names)
(declare implicit-cf-marker implicit-cf-marker-task implicit-cf-marker-component)
(declare safe-invokes)

implementation

value resolver protocol

(defprotocol SimulatorValueResolver
  (simulator-resolve-value [obj simulator]))

the default case

(extend-type nil
  SimulatorValueResolver
  (simulator-resolve-value [object simulator]
    nil))
(extend-type Object
  SimulatorValueResolver
  (simulator-resolve-value [object simulator]
    object))
(extend-type soot.Local
  SimulatorValueResolver
  (simulator-resolve-value [local simulator]
    (let [value (simulator-get-local simulator local)]
      (if (= value :nil)
        (make-local-sexp local)
        (simulator-resolve-value value simulator)))))
(extend-type soot.SootField
  SimulatorValueResolver
  (simulator-resolve-value [field simulator]
    (let [instance (simulator-get-this simulator)
          value (simulator-get-field instance field)]
      (if (= value :nil)
        (make-field-sexp instance field)
        (simulator-resolve-value value simulator)))))
(extend-type soot.SootFieldRef
  SimulatorValueResolver
  (simulator-resolve-value [field simulator]
    (let [instance (simulator-get-this simulator)
          value (simulator-get-field instance field)]
      (if (= value :nil)
        (make-field-sexp instance field)
        (simulator-resolve-value value simulator)))))
(extend-type soot.jimple.InstanceFieldRef
  SimulatorValueResolver
  (simulator-resolve-value [field simulator]
    (let [instance (simulator-resolve-value (.. field getBase)
                                            simulator)
          field (.. field getFieldRef)
          value (simulator-get-field instance field)]
      (if (= value :nil)
        (make-field-sexp instance field)
        (simulator-resolve-value value simulator)))))
(extend-type soot.jimple.StaticFieldRef
  SimulatorValueResolver
  (simulator-resolve-value [field simulator]
    (let [value (simulator-get-field nil field)]
      (if (= value :nil)
        (make-field-sexp nil field)
        (simulator-resolve-value value simulator)))))
(extend-type soot.jimple.NullConstant
  SimulatorValueResolver
  (simulator-resolve-value [_ simulator]
    nil))
(extend-type soot.jimple.ClassConstant
  SimulatorValueResolver
  (simulator-resolve-value [const simulator]
    (let [value (make-class-sexp (get-soot-class const))]
      value)))
(extend-type soot.jimple.Constant
  SimulatorValueResolver
  (simulator-resolve-value [const simulator]
    (let [value (try
                  (.. const value)
                  (catch Exception e
                    (make-constant-sexp const)))]
      value)))

simulator assignment protocol

simulator should be an atom to have persistent effect

(defprotocol SimulatorAssignment
  (simulator-assign [target value simulator]))
(extend-type nil
  SimulatorAssignment
  (simulator-assign [local value simulator]
    nil))
(extend-type soot.Local
  SimulatorAssignment
  (simulator-assign [local value simulator]
    (swap! simulator simulator-set-local
           local value)))
(extend-type soot.SootField
  SimulatorAssignment
  (simulator-assign [field value simulator]
    (simulator-set-field (simulator-get-this @simulator)
                         field value)))
(extend-type soot.SootFieldRef
  SimulatorAssignment
  (simulator-assign [field value simulator]
    (simulator-set-field (simulator-get-this @simulator)
                         field value)))
(extend-type soot.jimple.FieldRef
  SimulatorAssignment
  (simulator-assign [field value simulator]
    (simulator-assign (.. field getFieldRef) value simulator)))
(extend-type soot.jimple.ArrayRef
  SimulatorAssignment
  (simulator-assign [field value simulator]
    (let [base (.. field getBase)
          base-value (-> base (simulator-resolve-value @simulator))
          index-value (-> (.. field getIndex)
                          (simulator-resolve-value @simulator))]
      (aset base-value index-value value))))

get method interesting invokes and helpers

initial instance of classes within circumscription

simulator's global state

(def ^:dynamic *init-instances*
  nil)
(def ^:dynamic *simulator-global-state*
  nil)
(defmacro with-simulator
  [& body]
  ;; initialized here to avoid unintended retention across different runs
  `(binding [*init-instances* (atom nil)
             *simulator-global-state* (atom nil)]
     ~@body))

initialize class by invoking

(defn initialize-classes
  [{:keys [classes circumscription]
    :or {circumscription :all}
    :as initialize-params}
   {:keys [verbose
           soot-debug-show-exceptions]
    :as options}]
  (reset! *simulator-global-state*
          {:fields {:static {}
                    :instance {}}})
  (let [;; soot.SootMethod cannot be reliably compared for value (as in a set)
        circumscription (if (= circumscription :all)
                          circumscription
                          (try
                            (->> circumscription
                                 (map #(.. % getSignature))
                                 set)
                            (catch Exception e
                              (set circumscription))))]
    (doseq [^SootClass class classes]
      (swap! *init-instances* assoc-in [(->> class get-soot-class-name)]
             (simulator-new-instance class))
      (doseq [^SootMethod clinit (.. (soot.EntryPoints/v) (clinitsOf class))]
        (try
          (simulate-method {:method clinit
                            :this nil
                            :params nil}
                           (assoc-in options [:circumscription]
                                     circumscription))
          (catch Exception e
            (when soot-debug-show-exceptions
              (print-stack-trace e))))))))

get both explicit and implicit interesting invokes

(defn get-all-interesting-invokes
  [^SootMethod root-method
   interesting-method?
   circumscription
   {:keys [verbose
           soot-debug-show-exceptions]
    :as options}]
  (let [all-explicit-invokes (atom #{})
        all-implicit-invokes (atom #{})
        all-component-invokes (atom #{})
        all-invoke-paths (atom nil)
        ;; soot.SootMethod cannot be reliably compared for value (as in a set)
        circumscription (if (= circumscription :all)
                          circumscription
                          (try
                            (->> circumscription
                                 (map #(.. % getSignature))
                                 set)
                            (catch Exception e
                              (set circumscription))))]
    (try
      (let [{:keys [returns
                    explicit-invokes
                    implicit-invokes
                    component-invokes
                    invoke-paths]}
            ;; full simulation
            (simulate-method {:method
                              root-method
                              :this
                              ;; use initial instance if available
                              (let [root-method-class (->> root-method get-soot-class)
                                    instance (get-in @*init-instances*
                                                     [(->> root-method get-soot-class-name)])]
                                (if instance
                                  instance
                                  (simulator-new-instance root-method-class)))
                              :params
                              (->> (.. root-method getParameterTypes)
                                   (map #(make-external-sexp %)))
                              :interesting-method?
                              interesting-method?}
                             (assoc-in options [:circumscription]
                                       circumscription))]
        ;; interesting invokes can be explicit or implicit
        (swap! all-explicit-invokes into
               explicit-invokes)
        (swap! all-implicit-invokes into
               implicit-invokes)
        (swap! all-component-invokes into
               component-invokes)
        (reset! all-invoke-paths invoke-paths))
      (catch Exception e
        (when soot-debug-show-exceptions
          (print-stack-trace e))))
    ;; return result
    {:explicit-invokes @all-explicit-invokes
     :implicit-invokes @all-implicit-invokes
     :component-invokes @all-component-invokes
     :invoke-paths @all-invoke-paths}))

simulate method

(defn- simulate-method
  [{:keys [method this params interesting-method?]
    :or {interesting-method? (constantly true)}
    :as simulation-params}
   {:keys [circumscription
           soot-basic-block-simulation-budget
           soot-method-simulation-depth-budget
           soot-debug-show-exceptions]
    :or {circumscription :all}
    :as options}]
  (let [method (try (soot-resolve method)
                    (catch Exception e method))
        method-name (try
                      (.. method getSignature)
                      (catch Exception e))
        default-return #{(make-invoke-sexp :invoke method this params)}]
    (cond
      (not (instance? soot.SootMethod method))
      {:returns default-return
       :explicit-invokes #{}
       :implicit-invokes #{}
       :component-invokes #{}
       :invoke-paths method-name}      
      ;; only simulate method within circumscription
      (and (not= circumscription :all)
           (not (contains? circumscription
                           (.. method getSignature))))
      (do
        {:returns default-return
         :explicit-invokes #{}
         :implicit-invokes #{}
         :component-invokes #{}
         :invoke-paths method-name})
      (< soot-method-simulation-depth-budget 0)
      (do
        {:returns #{(make-error-sexp :no-budget
                                     {:method method
                                      :this this
                                      :params params})}
         :explicit-invokes #{}
         :implicit-invokes #{}
         :component-invokes #{}
         :invoke-paths method-name})
      ;; no method body, cannot proceed
      (try
        (.. method retrieveActiveBody)
        false
        (catch Exception e
          true))
      (do
        {:returns #{(make-error-sexp :no-method-body
                                     {:method method
                                      :this this
                                      :params params})}
         :explicit-invokes #{}
         :implicit-invokes #{}
         :component-invokes #{}
         :invoke-paths method-name})
      :otherwise
      (let [all-returns (atom #{})
            all-explicit-invokes (atom #{})
            all-implicit-invokes (atom #{})
            all-component-invokes (atom #{})
            all-invoke-paths (atom (when method-name
                                     {method-name #{}}))
            body (.. method getActiveBody)
            stmt-info
            (let [stmts (->> (.. body getUnits snapshotIterator) iterator-seq vec)
                  stmt-2-index (->> stmts
                                    (map-indexed #(vector %2 %1))
                                    (into {}))]
              {:stmts stmts
               :stmt-2-index stmt-2-index})
            bb-budget (atom soot-basic-block-simulation-budget)]
        (process-worklist
         ;; the initial worklist
         #{{:simulator (create-simulator this params)
            :start-stmt (first (:stmts stmt-info))}}
         ;; the process
         (fn [worklist]
           ;; width-first search to prevent malicious code exhausting bb-budget
           (->> worklist
                (mapcat (fn [{:keys [simulator start-stmt]}]
                          (when (and @bb-budget
                                     (> @bb-budget 0))
                            (let [{:keys [simulator next-start-stmts]}
                                  (simulate-basic-block {:simulator simulator
                                                         :stmt-info stmt-info
                                                         :start-stmt start-stmt
                                                         :method method
                                                         :interesting-method?
                                                         interesting-method?}
                                                        options)]
                              (swap! bb-budget dec)
                              (swap! all-returns into
                                     (-> simulator
                                         simulator-get-returns))
                              (swap! all-explicit-invokes into
                                     (-> simulator
                                         simulator-get-explicit-invokes))
                              (swap! all-implicit-invokes into
                                     (-> simulator
                                         simulator-get-implicit-invokes))
                              (swap! all-component-invokes into
                                     (-> simulator
                                         simulator-get-component-invokes))
                              (when method-name
                                (swap! all-invoke-paths update-in [method-name]
                                       into
                                       (-> simulator
                                           simulator-get-invoke-paths)))
                              ;; add the following to worklist
                              (for [start-stmt next-start-stmts]
                                ;; control flow sensitive!
                                {:simulator (-> simulator
                                                simulator-clear-returns
                                                simulator-clear-explicit-invokes
                                                simulator-clear-implicit-invokes
                                                simulator-clear-component-invokes
                                                simulator-clear-invoke-paths)
                                 :start-stmt start-stmt}))))))))
        {:returns @all-returns
         :explicit-invokes @all-explicit-invokes
         :implicit-invokes @all-implicit-invokes
         :component-invokes @all-component-invokes
         :invoke-paths (if (empty? (get-in @all-invoke-paths [method-name]))
                         method-name
                         @all-invoke-paths)}))))

simulate a basic block

(defn- simulate-basic-block
  [{:keys [simulator stmt-info start-stmt method interesting-method?]
    :as simulation-params}
   {:keys [soot-method-simulation-depth-budget
           soot-simulation-conservative-branching
           soot-simulation-linear-scan
           soot-debug-show-each-statement
           soot-debug-show-locals-per-statement
           soot-debug-show-all-per-statement
           soot-debug-show-exceptions
           verbose]
    :as options}]
  (let [simulator (atom simulator)
        [basic-block residue]
        (split-with (if soot-simulation-linear-scan
                      ;; linear scan do not split at branching
                      (constantly true)
                      ;; otherwise, split at first branch or return
                      #(let [^Stmt stmt %]
                         (and (.. stmt fallsThrough)
                              (not (.. stmt branches)))))
                    (subvec (:stmts stmt-info)
                            (get (:stmt-2-index stmt-info)
                                 start-stmt)))]
    ;; simulate statements in the basic block
    (doseq [^Stmt stmt basic-block]
      (try
        (.. stmt
            (apply (proxy [StmtSwitch] []
                     (caseAssignStmt [stmt]
                       (let [target (.. stmt getLeftOp)
                             value (-> (.. stmt getRightOp)
                                       (simulator-evaluate
                                        {:simulator simulator
                                         :interesting-method?
                                         interesting-method?}
                                        options))]
                         (simulator-assign target value simulator)))
                     (caseBreakpointStmt [stmt])
                     (caseEnterMonitorStmt [stmt])
                     (caseExitMonitorStmt [stmt])
                     (caseGotoStmt [stmt])
                     (caseIdentityStmt [stmt]
                       (let [target (.. stmt getLeftOp)
                             value (-> (.. stmt getRightOp)
                                       (simulator-evaluate
                                        {:simulator simulator
                                         :interesting-method?
                                         interesting-method?}
                                        options))]
                         (simulator-assign target value simulator)))
                     (caseIfStmt [stmt])
                     (caseInvokeStmt [stmt]
                       (-> (.. stmt getInvokeExpr)
                           (simulator-evaluate {:simulator simulator
                                                :interesting-method?
                                                interesting-method?}
                                               options)))
                     (caseLookupSwitchStmt [stmt])
                     (caseNopStmt [stmt])
                     (caseRetStmt [stmt])
                     (caseReturnStmt [stmt])
                     (caseReturnVoidStmt [stmt])
                     (caseTableSwitchStmt [stmt])
                     (caseThrowStmt [stmt])
                     (defaultCase [stmt]))))
        (catch Exception e
          (when soot-debug-show-exceptions
            (print-stack-trace e))))
      (when (or soot-debug-show-each-statement
                soot-debug-show-locals-per-statement
                soot-debug-show-all-per-statement)
        (println stmt)
        (when (or soot-debug-show-locals-per-statement
                  soot-debug-show-all-per-statement)
          (println "- locals -")
          (pprint (:locals @simulator))
          (when soot-debug-show-all-per-statement
            (println "- globals -")
            (pprint @*simulator-global-state*))
          (println "----------"))))
    (let [return (atom {:simulator nil ; to be filled at the end
                        :next-start-stmts nil})
          ;; the first stmt of residue, if existed, is a brancher
          stmt (first residue)]
      (when stmt
        (.. stmt
            (apply (proxy [StmtSwitch] []
                     (caseAssignStmt [stmt])
                     (caseBreakpointStmt [stmt])
                     (caseEnterMonitorStmt [stmt])
                     (caseExitMonitorStmt [stmt])
                     (caseGotoStmt [^soot.jimple.internal.JGotoStmt stmt]
                       (swap! return update-in [:next-start-stmts]
                              conj (.. stmt getTarget)))                     
                     (caseIdentityStmt [stmt])
                     (caseIfStmt [^soot.jimple.internal.JIfStmt stmt]
                       (let [condition (.. stmt getCondition)
                             value (-> condition
                                       (simulator-evaluate {:simulator simulator
                                                            :interesting-method?
                                                            interesting-method?}
                                                           options))                             
                             target-stmt (.. stmt getTarget)
                             next-stmt (second residue)]
                         (if soot-simulation-conservative-branching
                           ;; conservative branching
                           ;; senstive to value
                           ;; good: exact, eliminate dead branch
                           ;; bad: may not cover enough branches when budget depelete
                           (if-not (extends? Sexp (class value))
                             (if value
                               ;; if value is true, take target-stmt
                               (when target-stmt
                                 (swap! return update-in [:next-start-stmts]
                                        conj target-stmt))
                               ;; if value is false, take next-stmt
                               (when next-stmt
                                 (swap! return update-in [:next-start-stmts]
                                        conj next-stmt)))
                             ;; otherwise, take both stmts
                             (doseq [stmt [next-stmt target-stmt]
                                     :when stmt]
                               (swap! return update-in [:next-start-stmts]
                                      conj stmt)))
                           ;; aggresive branching
                           ;; insensitive to value
                           ;; good: cover as much branches as possible
                           ;; bad: not exact, may get into dead branch
                           (doseq [stmt [next-stmt target-stmt]
                                   :when stmt]
                             (swap! return update-in [:next-start-stmts]
                                    conj stmt)))))                     
                     (caseInvokeStmt [stmt])
                     (caseLookupSwitchStmt [stmt])
                     (caseNopStmt [stmt])
                     (caseRetStmt [stmt])
                     (caseReturnStmt [stmt]
                       (doto simulator
                         (swap! simulator-add-returns
                                [(-> (.. stmt getOp)
                                     (simulator-evaluate {:simulator simulator
                                                          :interesting-method?
                                                          interesting-method?}
                                                         options))])))
                     (caseReturnVoidStmt [stmt]
                       ;; nothing to do)
                     (caseTableSwitchStmt [stmt])
                     (caseThrowStmt [stmt])
                     (defaultCase [stmt])))))
      (swap! return assoc-in [:simulator]
             @simulator)
      @return)))

frame simulator manipulators

(defrecord ^:private Simulator
  [;; for a method frame
   this params locals returns
   ;; during simulation
   explicit-invokes implicit-invokes component-invokes invoke-paths])
(defn- create-simulator
  [this params]
  (map->Simulator {:this this
                   :params (vec params)
                   :locals {}
                   :returns #{}
                   :explicit-invokes #{}
                   :implicit-invokes #{}
                   :component-invokes #{}
                   :invoke-paths #{}}))
(defn- simulator-new-instance
  [& [class]]
  (let [instance (gensym (str "instance"
                              (when-let [class-name (get-soot-class-name class)]
                                (str "-" class-name "-"))))]
    (make-instance-sexp class instance)))

evaluate expr in simulator (simulator should be an Clojure atom to allow updates)

(defn- simulator-evaluate
  [expr
   {:keys [simulator interesting-method?]
    :as simulation-params}
   {:keys [soot-method-simulation-depth-budget
           soot-simulation-collection-size-budget
           soot-no-implicit-cf
           soot-dump-all-invokes
           soot-debug-show-implicit-cf
           soot-debug-show-safe-invokes
           soot-debug-show-exceptions
           layout-callbacks
           verbose]
    :as options}]
  (let [result (atom nil)
        ;; binary operation
        binary-operator-expr
        (fn [expr operator operator-name]
          (let [op1 (-> (.. expr getOp1) (simulator-resolve-value @simulator) int)
                op2 (-> (.. expr getOp2) (simulator-resolve-value @simulator) int)
                default-return (make-binary-operator-sexp operator-name
                                                          [op1 op2])]
            (try
              (operator op1 op2)
              (catch Exception e
                (when soot-debug-show-exceptions
                  (print-stack-trace e))
                default-return))))
        ;; unary operation
        unary-operator-expr
        (fn [expr operator operator-name]
          (let [op (-> (.. expr getOp) (simulator-resolve-value @simulator) int)
                default-return (make-unary-operator-sexp operator-name
                                                         [op])]
            (try
              (operator op)
              (catch Exception e
                (when soot-debug-show-exceptions
                  (print-stack-trace e))
                default-return)))) 
        ;; invoke operation
        invoke-expr
        (fn [invoke-type ^SootMethodRef method base args]
          (let [base-value (simulator-resolve-value base @simulator)
                args (->> args
                          (map #(simulator-resolve-value % @simulator))
                          vec) 
                default-return (make-invoke-sexp invoke-type
                                                 method
                                                 base-value
                                                 args)
                method-class (-> method get-soot-class)
                class-name (-> method get-soot-class-name)
                method-name (-> method get-soot-name)]
            (try
              ;; only add interesting methods
              (when (or soot-dump-all-invokes
                        (interesting-method? method))
                (doto simulator
                  (swap! simulator-add-explicit-invokes
                         [{:method method
                           :args args}])))
              (let [invoke-method (fn [method this params & [implicit?]]
                                    (try
                                      ;; try resolve method
                                      (soot-resolve method)
                                      (let [{:keys [returns
                                                    explicit-invokes
                                                    implicit-invokes
                                                    component-invokes
                                                    invoke-paths]}
                                            (simulate-method {:method method
                                                              :this this
                                                              :params params
                                                              :interesting-method?
                                                              interesting-method?}
                                                             (update-in options
                                                                        [:soot-method-simulation-depth-budget]
                                                                        dec))]
                                        (do
                                          (doto simulator
                                            ;; implicit is contagious
                                            (swap! (if implicit?
                                                     simulator-add-implicit-invokes
                                                     simulator-add-explicit-invokes)
                                                   explicit-invokes)
                                            (swap! simulator-add-implicit-invokes
                                                   implicit-invokes)
                                            (swap! simulator-add-component-invokes
                                                   component-invokes)
                                            (swap! simulator-add-invoke-paths
                                                   #{invoke-paths}))
                                          ;; if the result is unique, extract it
                                          (if (== 1 (count returns))
                                            (first returns)
                                            returns)))                                      
                                      (catch Exception e)
                                      (finally
                                        (when (or soot-dump-all-invokes
                                                  (try
                                                    (interesting-method? method)
                                                    (catch Exception e)))
                                          (doto simulator
                                            (swap! (if implicit?
                                                     simulator-add-implicit-invokes
                                                     simulator-add-explicit-invokes)
                                                   [{:method method
                                                     :args params}]))))))]
                (cond
                  ;; safe invokes
                  (let [t (get safe-invokes class-name)]
                    (or (= t :all)
                        (contains? t method-name)))
                  (try
                    (when soot-debug-show-safe-invokes
                      (println "safe invoke:"
                               class-name base-value method-name args))                    
                    (let [result (case invoke-type
                                   :special-invoke
                                   (simulator-assign
                                    base
                                    (clojure.lang.Reflector/invokeConstructor (Class/forName class-name)
                                                                              (object-array args))
                                    simulator)
                                   :static-invoke
                                   (clojure.lang.Reflector/invokeStaticMethod class-name
                                                                              method-name
                                                                              (object-array args))
                                   ;; otherwise
                                   (clojure.lang.Reflector/invokeInstanceMethod base-value
                                                                                method-name
                                                                                (object-array args)))]
                      (when soot-debug-show-safe-invokes
                        (println "safe invoke result:"
                                 result))
                      (if (and (instance? java.util.Collection result)
                               (> (.size result) soot-simulation-collection-size-budget))
                        default-return
                        result))
                    (catch Exception e
                      default-return))
                  ;; setContentView
                  (#{"setContentView"} method-name)
                  (let [layout-id (first args)]
                    (cond
                      (number? layout-id)
                      (doseq [{:keys [method]
                               :as layout-callback}
                              (get layout-callbacks layout-id)]
                        (when layout-callback
                          (let [info (dissoc layout-callback :method)]
                            (try
                              (doseq [the-method (find-method-candidates method-class
                                                                         method
                                                                         [info])]
                                (invoke-method the-method base-value [info])) 
                              (catch Exception e
                                default-return)))))
                      :otherwise
                      default-return))
                  ;; special-invokes
                  (= invoke-type :special-invoke)
                  (try
                    (cond
                      ;; Runnable is the one to be run
                      (and (transitive-ancestor? "java.lang.Thread" method-class)
                           (first args))
                      (simulator-assign base (first args) simulator)
                      :otherwise
                      (simulator-assign base
                                        (simulator-new-instance method-class)
                                        simulator))
                    default-return
                    (catch Exception e
                      default-return))
                  ;; implicit cf: task
                  (and (not soot-no-implicit-cf)
                       (implicit-cf-task? method))
                  (try
                    (let [root-class-name (->> method
                                               get-implicit-cf-root-class-names
                                               first)
                          x [root-class-name method-name]]
                      (when soot-debug-show-implicit-cf
                        (println "implicit cf:" x base-value args))
                      (cond
                        (#{["java.lang.Thread" "start"]
                           ["java.lang.Runnable" "run"]}
                         x)
                        (do
                          (doseq [implicit-target
                                  (find-method-candidates (get-soot-class base-value)
                                                          "run"
                                                          [])]
                            (when soot-debug-show-implicit-cf
                              (println (format "implicit cf to: %1$s.%2$s:"
                                               root-class-name method-name)
                                       method-class
                                       base-value
                                       implicit-target))
                            (invoke-method implicit-target base-value [] true)))
                        (#{["java.util.concurrent.Callable" "call"]}
                         x)
                        (doseq [implicit-target (find-method-candidates method-class
                                                                        "call"
                                                                        [])]
                          (when soot-debug-show-implicit-cf
                            (println (format "implicit cf to: %1$s.%2$s:"
                                             root-class-name method-name)
                                     method-class
                                     base-value
                                     implicit-target))                          
                          (invoke-method implicit-target base-value [] true))
                        (#{["java.util.concurrent.Executor" "execute"]
                           ["java.util.concurrent.ExecutorService" "execute"]}
                         x)
                        (let [target-obj (first args)]
                          (doseq [implicit-target
                                  (find-method-candidates (get-soot-class target-obj)
                                                          "run"
                                                          [])]
                            (when soot-debug-show-implicit-cf
                              (println (format "implicit cf to: %1$s.%2$s:"
                                               root-class-name method-name)
                                       method-class
                                       base-value
                                       implicit-target))                          
                            (invoke-method implicit-target target-obj [] true)))
                        (#{["java.util.concurrent.ExecutorService" "submit"]}
                         x)
                        (let [target-obj (first args)]
                          (doseq [implicit-target
                                  (find-method-candidates (get-soot-class target-obj)
                                                          "run"
                                                          [])]
                            (when soot-debug-show-implicit-cf
                              (println (format "implicit cf to: %1$s.%2$s:"
                                               root-class-name method-name)
                                       method-class
                                       base-value
                                       implicit-target))                            
                            (invoke-method implicit-target target-obj [] true))
                          (doseq [implicit-target
                                  (find-method-candidates (get-soot-class target-obj)
                                                          "call"
                                                          [])]
                            (when soot-debug-show-implicit-cf
                              (println (format "implicit cf to: %1$s.%2$s:"
                                               root-class-name method-name)
                                       method-class
                                       base-value
                                       implicit-target))                            
                            (invoke-method implicit-target target-obj [] true)))
                        (#{["android.os.Handler" "post"]
                           ["android.os.Handler" "postAtFrontOfQueue"]
                           ["android.os.Handler" "postAtTime"]
                           ["android.os.Handler" "postDelayed"]}
                         x)
                        (let [target-obj (first args)]
                          (doseq [implicit-target
                                  (find-method-candidates (get-soot-class target-obj)
                                                          "run"
                                                          [])]
                            (when soot-debug-show-implicit-cf
                              (println (format "implicit cf to: %1$s.%2$s:"
                                               root-class-name method-name)
                                       method-class
                                       base-value
                                       implicit-target))
                            (invoke-method implicit-target target-obj [] true)))
                        (#{["java.lang.Class" "forName"]}
                         x)
                        (let [target-obj (first args)]
                          (try
                            (-> target-obj get-soot-class)
                            (catch Exception e
                              (make-class-sexp target-obj))))
                        (#{["java.lang.Class" "getMethod"]}
                         x)
                        (let [target-obj (first args)]
                          (try
                            ;; there could be more than one such method
                            (let [candidates
                                  (find-method-candidates (get-soot-class base-value)
                                                          (str target-obj)
                                                          (count (second args)))]
                              (if-not (empty? candidates)
                                candidates
                                (make-method-sexp base-value target-obj)))
                            (catch Exception e
                              (make-method-sexp base-value target-obj))))
                        (#{["java.lang.reflect.Method" "invoke"]}
                         x)
                        (try
                          (let [result (atom #{})]
                            (if-not (instance? woa.apk.dex.soot.sexp.Sexp
                                               base-value)
                              ;; try candidates
                              (doseq [method base-value]
                                (let [invoke-instance (first args)
                                      invoke-args (second args)]
                                  (when (= (count invoke-args)
                                           (.. method getParameterCount))
                                    (when soot-debug-show-implicit-cf
                                      (println (format "implicit cf to: %1$s.%2$s:"
                                                       root-class-name method-name)
                                               method
                                               invoke-instance
                                               invoke-args))
                                    (when-let [r (try
                                                   (invoke-method method
                                                                  invoke-instance
                                                                  invoke-args
                                                                  true)
                                                   (catch Exception e))]
                                      (swap! result conj r)))))
                              ;; otherwise, MethodSexp
                              (do
                                (doto simulator
                                  (swap! simulator-add-implicit-invokes
                                         [{:method base-value
                                           :args (second args)}]))
                                (try
                                  (doto simulator
                                    (swap! simulator-add-invoke-paths
                                           #{(format "%1$s.%2$s[%3$d args]"
                                                     (get-soot-class-name base-value)
                                                     (get-soot-name base-value)
                                                     (count (second args)))}))
                                  (catch Exception e))
                                (swap! result conj
                                       (make-invoke-sexp :reflect base-value
                                                         (first args) (vec (second args))))))
                            (first result))
                          (catch Exception e
                            (make-invoke-sexp :reflect base-value
                                              (first args) (vec (second args)))))
                        (#{["java.lang.Class" "getField"]}
                         x)
                        (let [target-obj (first args)]
                          (try
                            (.. (-> base-value get-soot-class)
                                (getFieldByNameUnsafe (str target-obj)))
                            (catch Exception e
                              (make-field-sexp base-value target-obj))))
                        (and (= "java.lang.reflect.Field" root-class-name)
                             (#{"get" "getBoolean" "getByte" "getChar"
                                "getDouble" "getFloat" "getInt" "getLong" "getShort"}
                              method-name))
                        (try
                          (let [field base-value
                                value (simulator-get-field nil base-value)]
                            value)
                          (catch Exception e
                            (make-field-sexp (simulator-get-this @simulator) base-value)))
                        (and (= "java.lang.reflect.Field" root-class-name)
                             (#{"equals"}) method-name)
                        (try
                          (let [field base-value
                                value (simulator-get-field nil field)]
                            (= value (first args)))
                          (catch Exception e
                            (make-field-sexp (simulator-get-this @simulator)
                                             base-value)))
                        (and (= "java.lang.reflect.Field" root-class-name)
                             (#{"set" "setBoolean" "setByte" "setChar"
                                "setDouble" "setFloat" "setInt" "setLong" "setShort"}
                              method-name))
                        (try
                          (let [field base-value
                                value (first args)]
                            (simulator-set-field nil field value)
                            value)
                          (catch Exception e
                            (make-field-sexp (simulator-get-this @simulator) base-value)))
                        :default default-return))
                    (catch Exception e
                      default-return))
                  ;; implicit cf: component
                  (and (not soot-no-implicit-cf)
                       (implicit-cf-component? method))
                  (try
                    (let [root-class-name (->> method
                                               get-implicit-cf-root-class-names
                                               first)
                          x [root-class-name method-name]]
                      (cond
                        ;;                   (#{["android.content.Context" "startActivity"]
                        ;;                      ["android.content.Context" "startActivities"]}
                        ;;                    x)
                        ;;                   (update-result :category :component
                        ;;                                  :type "android.app.Activity"
                        ;;                                  :instance (with-out-str (pr (first args))))
                        ;;                   (#{["android.content.Context" "startService"]
                        ;;                      ["android.content.Context" "stopService"]
                        ;;                      ["android.content.Context" "bindService"]
                        ;;                      ["android.content.Context" "unbindService"]}
                        ;;                    x)
                        ;;                   (update-result :category :component
                        ;;                                  :type "android.app.Service"
                        ;;                                  :instance (with-out-str (pr (first args))))
                        ;;                   (#{["android.content.Context" "sendBroadcast"]
                        ;;                      ["android.content.Context" "sendBrocastAsUser"]
                        ;;                      ["android.content.Context" "sendOrderedBroadcast"]
                        ;;                      ["android.content.Context" "sendOrderedBroadcastAsUser"]
                        ;;                      ["android.content.Context" "sendStickyBroadcast"]
                        ;;                      ["android.content.Context" "sendStickyBroadcastAsUser"]}
                        ;;                    x)
                        ;;                   (update-result :category :component
                        ;;                                  :type "android.content.BroadcastReceiver"
                        ;;                                  :instance (with-out-str (pr (first args))))
                        ;;                   (#{["android.content.Context" "registerComponentCallbacks"]}
                        ;;                    x)
                        ;;                   (update-result :category :component
                        ;;                                  :type "android.content.ComponentCallbacks"
                        ;;                                  :instance (with-out-str (pr (first args))))
                        ;;                   (#{["android.content.Context" "registerReceiver"]}
                        ;;                    x)
                        ;;                   (update-result :category :component
                        ;;                                  :type "android.content.BroadcastReceiver"
                        ;;                                  :instance (with-out-str (pr args))))))))
                        :default default-return))
                    (catch Exception e
                      default-return))
                  :default
                  (invoke-method method base-value args)))
              (catch Exception e
                default-return)
              (finally
                (try
                  (doto simulator
                    (swap! simulator-add-invoke-paths
                           #{(.. method getSignature)}))
                  (catch Exception e))))))
        cast-expr
        (fn [expr]
          (let [value (-> (.. expr getOp) (simulator-resolve-value @simulator))
                cast-type (.. expr getCastType)
                default-return (make-cast-sexp value cast-type)]
            (try
              (let [result ((cond
                              (instance? soot.BooleanType cast-type)
                              boolean
                              (instance? soot.ByteType cast-type)
                              byte
                              (instance? soot.CharType cast-type)
                              char
                              (instance? soot.ShortType cast-type)
                              short
                              (instance? soot.IntType cast-type)
                              int
                              (instance? soot.LongType cast-type)
                              long
                              (instance? soot.FloatType cast-type)
                              float
                              (instance? soot.DoubleType cast-type)
                              double
                              :otherwise
                              identity)
                            value)]
                result)
              (catch Exception e default-return))))
        new-array-expr
        (fn [expr]
          (let [base-type (.. expr getBaseType)
                size (-> (.. expr getSize) (simulator-resolve-value @simulator))
                default-return (make-new-array-sexp base-type size)]
            (try
              (let [result
                    (if (< size soot-simulation-collection-size-budget)
                      ((cond
                         (instance? soot.BooleanType base-type)
                         boolean-array
                         (instance? soot.ByteType base-type)
                         byte-array
                         (instance? soot.CharType base-type)
                         char-array
                         (instance? soot.ShortType base-type)
                         short-array
                         (instance? soot.IntType base-type)
                         int-array
                         (instance? soot.LongType base-type)
                         long-array
                         (instance? soot.FloatType base-type)
                         float-array
                         (instance? soot.DoubleType base-type)
                         double-array
                         :otherwise
                         object-array)
                       size)                      
                      default-return)]
                result)
              (catch Exception e default-return))))
        new-multi-array-expr
        (fn [expr]
          (let [base-type (.. expr getBaseType)
                sizes (->> (.. expr getSizes)
                           (map #(simulator-resolve-value % @simulator)))
                size (reduce * sizes)
                default-return (make-new-array-sexp base-type sizes)]
            (try
              (let [result
                    (if (< size
                           soot-simulation-collection-size-budget)
                      (apply make-array
                             (cond
                               (instance? soot.BooleanType base-type)
                               Boolean/TYPE
                               (instance? soot.ByteType base-type)
                               Byte/TYPE
                               (instance? soot.CharType base-type)
                               Character/TYPE
                               (instance? soot.ShortType base-type)
                               Short/TYPE
                               (instance? soot.IntType base-type)
                               Integer/TYPE
                               (instance? soot.LongType base-type)
                               Long/TYPE
                               (instance? soot.FloatType base-type)
                               Float/TYPE
                               (instance? soot.DoubleType base-type)
                               Double/TYPE
                               :otherwise
                               Object)
                             sizes)                      
                      default-return)]
                result)
              (catch Exception e default-return))))]
    (try
      (.. expr
          (apply
           (proxy [JimpleValueSwitch] []
             ;; case local
             (caseLocal [local]
               (reset! result
                       (simulator-resolve-value local @simulator)))
             ;; ConstantSwitch
             (caseClassConstant [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             (caseDoubleConstant [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             (caseFloatConstant [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             (caseIntConstant [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             (caseLongConstant [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             (caseMethodHandle [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             (caseNullConstant [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             (caseStringConstant [const]
               (reset! result
                       (simulator-resolve-value const @simulator)))
             ;; ExprSwitch
             (caseAddExpr [expr]
               (reset! result
                       (binary-operator-expr expr + :add)))
             (caseAndExpr [expr]
               (reset! result
                       (binary-operator-expr expr bit-and :and)))
             (caseCastExpr [expr]
               (reset! result
                       (cast-expr expr)))
             (caseCmpExpr [expr]
               (reset! result
                       (binary-operator-expr expr compare :cmp)))
             (caseCmpgExpr [expr]
               ;; JVM-specific artifacts; N/A on Dalvik
               (reset! result
                       (binary-operator-expr expr compare :cmpg)))
             (caseCmplExpr [expr]
               ;; JVM-specific artifacts; N/A on Dalvik
               (reset! result
                       (binary-operator-expr expr compare :cmpl)))
             (caseDivExpr [expr]
               (reset! result
                       (binary-operator-expr expr / :div)))
             (caseDynamicInvokeExpr [expr]
               ;; JVM8 specific; N/A on Dalvik
               (reset! result
                       (invoke-expr :dynamic-invoke
                                    (.. expr getBootstrapMethodRef)
                                    nil
                                    (.. expr getBootstrapArgs))))
             (caseEqExpr [expr]
               (reset! result
                       ;; only non-sexp can be meaningfully compared
                       (binary-operator-expr
                        expr
                        (fn [op1 op2]
                          (if (and (not (extends? Sexp (class op1)))
                                   (not (extends? Sexp (class op2))))
                            (== op1 op2)
                            (make-binary-operator-sexp == [op1 op2])))
                        :eq)))
             (caseGeExpr [expr]
               (reset! result
                       (binary-operator-expr expr >= :ge)))
             (caseGtExpr [expr]
               (reset! result
                       (binary-operator-expr expr > :gt)))
             (caseInstanceOfExpr [expr]
               (reset! result
                       (let [check-type (-> (.. expr getCheckType) (simulator-resolve-value @simulator))
                             op (-> (.. expr getOp) (simulator-resolve-value @simulator))
                             default-return (make-instance-of-sexp check-type op)]
                         (try
                           (let [check-type-class (-> check-type get-soot-class)
                                 check-type-name (-> check-type get-soot-class-name)]
                             (cond
                               (instance? woa.apk.dex.soot.sexp.InvokeSexp op)
                               (let [method (:method op)
                                     return-type (cond
                                                   (instance? soot.SootMethodRef method)
                                                   (.. method returnType)
                                                   (instance? soot.SootMethod method)
                                                   (.. method getReturnType))
                                     type-class (-> return-type get-soot-class)]
                                 (if (transitive-ancestor? check-type-class
                                                           type-class)
                                   ;; only positive answer is certain
                                   1
                                   default-return))
                               :default default-return))
                           (catch Exception e
                             (when soot-debug-show-exceptions
                               (print-stack-trace e))
                             default-return)))))
             (caseInterfaceInvokeExpr [expr]
               (reset! result
                       (invoke-expr :interface-invoke
                                    (.. expr getMethodRef)
                                    (.. expr getBase)
                                    (.. expr getArgs))))
             (caseLeExpr [expr]
               (reset! result
                       (binary-operator-expr expr <= :le)))
             (caseLengthExpr [expr]
               (reset! result
                       (unary-operator-expr expr count :length)))
             (caseLtExpr [expr]
               (reset! result
                       (binary-operator-expr expr < :lt)))
             (caseMulExpr [expr]
               (reset! result
                       (binary-operator-expr expr * :mul)))
             (caseNeExpr [expr]
               (reset! result
                       ;; only non-sexp can be meaningfully compared                       
                       (binary-operator-expr
                        expr
                        (fn [op1 op2]
                          (if (and (not (extends? Sexp (class op1)))
                                   (not (extends? Sexp (class op2))))
                            (not= op1 op2)
                            (make-binary-operator-sexp not= [op1 op2])))
                        :ne)))
             (caseNegExpr [expr]
               (reset! result
                       (unary-operator-expr expr - :neg)))
             (caseNewArrayExpr [expr]
               (reset! result
                       (new-array-expr expr)))
             (caseNewExpr [expr]
               ;; will be evaluated in caseSpecialInvokeExpr where the arguments are ready)
             (caseNewMultiArrayExpr [expr]
               (reset! result
                       (new-multi-array-expr expr)))
             (caseOrExpr [expr]
               (reset! result
                       (binary-operator-expr expr bit-or :or)))
             (caseRemExpr [expr]
               (reset! result
                       (binary-operator-expr expr rem :rem)))
             (caseShlExpr [expr]
               (reset! result
                       (binary-operator-expr expr not= :shl)))
             (caseShrExpr [expr]
               (reset! result
                       (binary-operator-expr expr bit-shift-right :shr)))
             (caseSpecialInvokeExpr [expr]
               (reset! result
                       (invoke-expr :special-invoke
                                    (.. expr getMethodRef)
                                    (.. expr getBase)
                                    (.. expr getArgs))))
             (caseStaticInvokeExpr [expr]
               (reset! result
                       (invoke-expr :static-invoke
                                    (.. expr getMethodRef)
                                    nil
                                    (.. expr getArgs))))
             (caseSubExpr [expr]
               (reset! result
                       (binary-operator-expr expr - :sub)))
             (caseUshrExpr [expr]
               (reset! result
                       (binary-operator-expr expr unsigned-bit-shift-right :ushr)))
             (caseVirtualInvokeExpr [expr]
               (reset! result
                       (invoke-expr :virtual-invoke
                                    (.. expr getMethodRef)
                                    (.. expr getBase)
                                    (.. expr getArgs))))
             (caseXorExpr [expr]
               (reset! result
                       (binary-operator-expr expr bit-xor :xor)))
             ;; RefSwitch
             (caseArrayRef [ref]
               (reset! result
                       (let [base (-> (.. ref getBase) (simulator-resolve-value @simulator))
                             index (-> (.. ref getIndex) (simulator-resolve-value @simulator))
                             default-return (make-array-ref-sexp base index)]
                         (try
                           (aget base index)
                           (catch Exception e
                             (when soot-debug-show-exceptions
                               (print-stack-trace e))
                             default-return)))))
             (caseCaughtExceptionRef [ref]
               ;; irrelevant)
             (caseInstanceFieldRef [ref]
               (reset! result
                       (simulator-resolve-value ref @simulator)))
             (caseParameterRef [ref]
               (reset! result
                       (simulator-get-param @simulator (.. ref getIndex))))
             (caseStaticFieldRef [ref]
               (reset! result
                       (simulator-resolve-value ref @simulator)))
             (caseThisRef [ref]
               (reset! result
                       (simulator-get-this @simulator)))  
             ;; default case
             (defaultCase [expr]))))
      (catch Exception e
        (when soot-debug-show-exceptions
          (print-stack-trace e))))
    @result))

:nil signify N/A

(defn- simulator-get-field
  [instance field]
  (let [field (-> field soot-resolve)
        class-name (-> field get-soot-class-name)
        field-name (-> field get-soot-name)
        field-id [class-name field-name]
        instance (cond
                   (instance? woa.apk.dex.soot.sexp.InstanceSexp instance)
                   (:instance instance)
                   :otherwise instance)]
    (if (.. field isStatic)
      (get-in @*simulator-global-state* [:fields :static field-id] :nil)
      (get-in @*simulator-global-state* [:fields :instance instance field-id] :nil))))
(defn- simulator-set-field
  [instance field value]
  (let [field (-> field soot-resolve)
        class-name (-> field get-soot-class-name)
        field-name (-> field get-soot-name)
        field-id [class-name field-name]
        instance (cond
                   (instance? woa.apk.dex.soot.sexp.InstanceSexp instance)
                   (:instance instance)
                   :otherwise instance)]
    (if (.. field isStatic)
      (swap! *simulator-global-state* assoc-in [:fields :static field-id] value)
      (swap! *simulator-global-state* assoc-in [:fields :instance instance field-id] value))))

:nil signify N/A

(defn- simulator-get-this
  [simulator]
  (get-in simulator [:this] :nil))

:nil signify N/A

(defn- simulator-get-param
  [simulator param]
  (get-in simulator [:params param] :nil))
(defn- simulator-set-local
  [simulator local val]
  (assoc-in simulator [:locals local]
            val))

:nil signify N/A

(defn- simulator-get-local
  [simulator local]
  (get-in simulator [:locals local] :nil))
(defn- simulator-add-returns
  [simulator invokes]
  (update-in simulator [:returns] into
             invokes))
(defn- simulator-get-returns
  [simulator]
  (get-in simulator [:returns]))
(defn- simulator-clear-returns
  [simulator]
  (assoc-in simulator [:returns] #{}))
(defn- simulator-add-explicit-invokes
  [simulator invokes]
  (update-in simulator [:explicit-invokes] into
             invokes))
(defn- simulator-get-explicit-invokes
  [simulator]
  (get-in simulator [:explicit-invokes]))
(defn- simulator-clear-explicit-invokes
  [simulator]
  (assoc-in simulator [:explicit-invokes] #{}))
(defn- simulator-add-implicit-invokes
  [simulator invokes]
  (update-in simulator [:implicit-invokes] into
             invokes))
(defn- simulator-get-implicit-invokes
  [simulator]
  (get-in simulator [:implicit-invokes]))
(defn- simulator-clear-implicit-invokes
  [simulator]
  (assoc-in simulator [:implicit-invokes] #{}))
(defn- simulator-add-component-invokes
  [simulator invokes]
  (update-in simulator [:component-invokes] into
             invokes))
(defn- simulator-get-component-invokes
  [simulator]
  (get-in simulator [:component-invokes]))
(defn- simulator-clear-component-invokes
  [simulator]
  (assoc-in simulator [:component-invokes] #{}))
(defn- simulator-add-invoke-paths
  [simulator invokes]
  (update-in simulator [:invoke-paths] into
             invokes))
(defn- simulator-get-invoke-paths
  [simulator]
  (get-in simulator [:invoke-paths]))
(defn- simulator-clear-invoke-paths
  [simulator]
  (assoc-in simulator [:invoke-paths] #{}))

implicit control flow helpers

filter methods that contain implicit control flow invokes

(defn filter-implicit-cf-invoke-methods
  [methods]
  (->> methods
       (filter
        (fn [^SootMethod method]
          (->> [method]
               mapcat-invoke-methodrefs
               (filter implicit-cf?)
               not-empty)))))

test whether class possibly contains implicit cf

(defn implicit-cf-class?
  [class]
  (->> class get-transitive-implicit-cf-super-class-and-interface not-empty))

test whether methodref is possibly an implicit cf

(def implicit-cf?
  get-implicit-cf-root-class-names)
(defn implicit-cf-task?
  [method]
  (set/intersection (->> method get-implicit-cf-root-class-names)
                    (set (->> implicit-cf-marker-task keys))))
(defn implicit-cf-component?
  [method]
  (set/intersection (->> method get-implicit-cf-root-class-names)
                    (set (->> implicit-cf-marker-component keys))))
(defn get-transitive-implicit-cf-super-class-and-interface
  [class]
  (set/intersection (set (keys implicit-cf-marker))
                    (->> class
                         get-transitive-super-class-and-interface
                         (map get-soot-class-name)
                         set)))
(defn get-implicit-cf-root-class-names
  [method]
  (let [class (->> method get-soot-class)
        name (->> method get-soot-name)]
    (->> (get-transitive-implicit-cf-super-class-and-interface class)
         (filter #(let [t (get implicit-cf-marker %)]
                    (or (= t :all)
                        (contains? t name))))
         not-empty)))

domain knowledge

(def ^:private implicit-cf-marker-task
  {"java.lang.Thread" #{"start"}
   "java.lang.Runnable" #{"run"}
   "java.util.concurrent.Callable" #{"call"}
   "java.util.concurrent.Executor" #{"execute"}
   "java.util.concurrent.ExecutorService" #{"submit"
                                            "execute"}
   "java.lang.Class" #{"forName"
                       "getMethod"
                       "getField"}
   "java.lang.reflect.Method" #{"invoke"}
   "java.lang.reflect.Field" :all
   "android.os.Handler" #{"post" "postAtFrontOfQueue"
                          "postAtTime" "postDelayed"}})
(def ^:private implicit-cf-marker-component
  {"android.content.Context" #{"startActivity" "startActivities"
                               "startService" "stopService"
                               "bindService" "unbindService"
                               "sendBroadcast" "sendBrocastAsUser"
                               "sendOrderedBroadcast" "sendOrderedBroadcastAsUser"
                               "sendStickyBroadcast" "sendStickyBroadcastAsUser"
                               "registerComponentCallbacks"
                               "registerReceiver"}})

these methods mark implicit control flows

(def ^:private implicit-cf-marker
  (merge implicit-cf-marker-task
         implicit-cf-marker-component))

safe classes are the ones that can be simulated in Clojure

(def ^:private safe-invokes
  {;;; java.lang
   ;; interface
   "java.lang.Iterable" :all
   ;; classes
   "java.lang.String" :all
   "java.lang.StringBuilder" :all
   "java.lang.StringBuffer" :all
   "java.lang.Math" :all
   "java.lang.StrictMath" :all
   "java.lang.Integer" :all
   "java.lang.Long" :all
   "java.lang.Double" :all
   "java.lang.Float" :all
   "java.lang.Byte" :all
   "java.lang.Character" :all
   "java.lang.Short" :all
   "java.lang.Boolean" :all
   "java.lang.Void" :all
   "java.lang.System" #{"nanoTime"
                        "currentTimeMillis"}
   ;;; java.util
   ;; interface
   "java.util.Collection" :all
   "java.util.Comparator" :all
   "java.util.Deque" :all
   "java.util.Enumeration" :all
   "java.util.Formattable" :all
   "java.util.Iterator" :all
   "java.util.List" :all
   "java.util.ListIterator" :all
   "java.util.Map" :all
   "java.util.Map$Entry" :all
   "java.util.NavigableMap" :all
   "java.util.NavigableSet" :all
   "java.util.Queue" :all
   "java.util.RandomAccess" :all
   "java.util.Set" :all
   "java.util.SortedMap" :all
   "java.util.SortedSet" :all
   ;; classes
   "java.util.ArrayList" :all
   "java.util.ArrayDeque" :all
   "java.util.Arrays" :all
   "java.util.BitSet" :all
   "java.util.Calendar" :all
   "java.util.Collections" :all
   "java.util.Currency" :all
   "java.util.Date" :all
   "java.util.Dictionary" :all
   "java.util.EnumMap" :all
   "java.util.EnumSet" :all
   "java.util.Formatter" :all
   "java.util.GregorianCalendar" :all
   "java.util.HashMap" :all
   "java.util.HashSet" :all
   "java.util.Hashtable" :all
   "java.util.IdentityHashMap" :all
   "java.util.LinkedHashMap" :all
   "java.util.LinkedHashSet" :all
   "java.util.LinkedList" :all
   "java.util.Locale" :all
   "java.util.Locale$Builder" :all
   "java.util.Objects" :all
   "java.util.PriorityQueue" :all
   "java.util.Properties" :all
   "java.util.Random" :all
   "java.util.SimpleTimeZone" :all
   "java.util.Stack" :all
   "java.util.StringTokenizer" :all
   "java.util.TreeMap" :all
   "java.util.TreeSet" :all
   "java.util.UUID" :all
   "java.util.Vector" :all
   "java.util.WeakHashMap" :all})
 
(ns woa.apk.dex.soot.util
  ;; internal libs
  (:use woa.util)
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; import
  (:import
   (soot G
         G$GlobalObjectGetter
         PhaseOptions
         PackManager
         Scene
         Pack
         Unit
         SootClass
         SootMethod
         SootMethodRef)
   (soot.options Options)
   (soot.jimple Stmt)))

declaration

(declare soot-queryable?)
(declare find-method-candidates)
(declare get-application-classes get-application-methods)
(declare get-method-body map-class-bodies run-body-packs)
(declare mapcat-invoke-methodrefs resolve-methodrefs mapcat-invoke-methods)
(declare get-transitive-super-class-and-interface
         get-interesting-transitive-super-class-and-interface
         transitive-ancestor?)
(declare filter-interesting-methods)
(declare get-cg mapcat-edgeout-methods)
(declare android-api?)
(declare with-soot new-g-objgetter)
(declare mute unmute with-silence)

implementation

SootQuery

(defprotocol SootQuery
  (get-soot-class [this])
  (get-soot-class-name [this])
  (get-soot-name [this])
  (soot-resolve [this]))

test whether SottQuery can be applied on cand without Exception

(defn soot-queryable?
  [cand]
  (try
    (let [class (-> cand get-soot-class)]
      (-> cand get-soot-name)
      (-> cand get-soot-class-name)
      (.. class getPackageName)
      true)
    (catch Exception e
      false)))
(extend-type nil
  SootQuery
  (get-soot-class [this]
    nil)
  (get-soot-class-name [this]
    nil)
  (get-soot-name [this]
    nil)
  (soot-resolve [this]
    nil))
(extend-type soot.SootClass
  SootQuery
  (get-soot-class [this]
    this)
  (get-soot-class-name [this]
    (get-soot-name this))
  (get-soot-name [this]
    (.. this getName))
  (soot-resolve [this]
    this))
(extend-type soot.SootMethod
  SootQuery
  (get-soot-class [this]
    (.. this getDeclaringClass))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (.. this getName))
  (soot-resolve [this]
    this))
(extend-type soot.SootMethodRef
  SootQuery
  (get-soot-class [this]
    (.. this declaringClass))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (.. this name))
  (soot-resolve [this]
    (.. this resolve)))
(extend-type soot.SootField
  SootQuery
  (get-soot-class [this]
    (.. this getDeclaringClass))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (.. this getName))
  (soot-resolve [this]
    this))
(extend-type soot.SootFieldRef
  SootQuery
  (get-soot-class [this]
    (.. this declaringClass))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (.. this name))
  (soot-resolve [this]
    (.. this resolve)))
(extend-type String
  SootQuery
  (get-soot-class [this]
    (.. (Scene/v) (getSootClass this)))
  (get-soot-class-name [this]
    this)
  (get-soot-name [this]
    this)
  (soot-resolve [this]
    ;; only Class string can be reasonably resolved
    (get-soot-class this)))
(extend-type soot.jimple.ClassConstant
  SootQuery
  (get-soot-class [this]
    (->> (.. this getValue) get-soot-class))
  (get-soot-class-name [this]
    (.. this getValue))
  (get-soot-name [this]
    (->> this get-soot-class-name))
  (soot-resolve [this]
    (->> this get-soot-class)))
(extend-type soot.RefType
  SootQuery
  (get-soot-class [this]
    (.. this getSootClass))
  (get-soot-class-name [this]
    (.. this getClassName))
  (get-soot-name [this]
    (->> this get-soot-class-name))
  (soot-resolve [this]
    (->> this get-soot-class)))
(extend-type soot.ArrayType
  SootQuery
  (get-soot-class [this]
    (->> (.. this getArrayElementType) get-soot-class))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (->> this get-soot-class-name))
  (soot-resolve [this]
    (->> this get-soot-class)))
(extend-type soot.jimple.FieldRef
  SootQuery
  (get-soot-class [this]
    (->> (.. this getFieldRef) get-soot-class))
  (get-soot-class-name [this]
    (->> this get-soot-class get-soot-name))
  (get-soot-name [this]
    (->> (.. this getFieldRef) get-soot-name))
  (soot-resolve [this]
    (.. this getField)))

Soot Method helper

args=nil: all method of the-class with method-name; otherwise: match by argument numbers

(defn find-method-candidates
  [the-class method-name args]
  (when-let [methods (not-empty
                      (->> (.. the-class getMethods)
                           (filter #(= (->> % get-soot-name)
                                       method-name))))]
    (cond
      (nil? args)
      methods
      :otherwise
      (->> methods
           (filter #(= (if (number? args) args (count args))
                       (.. ^SootMethod % getParameterCount)))))))

Soot Class helpers

get application classes in scene

(defn get-application-classes
  [scene]
  (->> (.. scene getApplicationClasses snapshotIterator)
       iterator-seq
       set))

get application methods in scene

(defn get-application-methods
  [scene]
  (->> scene
       get-application-classes
       (mapcat #(try
                  (.. ^SootClass % getMethods)
                  (catch Exception e nil)))
       set))

Soot Body helpers

get method body

(defn get-method-body
  [^SootMethod method]
  (if (.. method hasActiveBody)
    (.. method getActiveBody)
    (when (and (not (.. method isPhantom))
               ;; method must have backing source
               (.. method getSource))
      (.. method retrieveActiveBody))))

map classes to their method bodies

(defn map-class-bodies
  [classes]
  (->> classes
       (remove #(.. ^SootClass % isPhantom))
       (mapcat #(->> (.. ^SootClass % getMethods)
                     seq
                     (map get-method-body)
                     (filter identity)))))

run body packs over application classes

(defn run-body-packs
  [& {:keys [scene pack-manager body-packs verbose]}]
  (doto scene
    (.loadNecessaryClasses))
  ;; force application class bodies to be mapped at least once 
  (let [bodies (->> scene get-application-classes map-class-bodies)
        packs (->> body-packs (map #(.. ^PackManager pack-manager (getPack ^String %))))]
    (doseq [^Pack pack packs]
      (when pack
        (doseq [^SootBody body bodies]
          (try
            (.. pack (apply body))
            ;; catch Exception to prevent it destroys outer threads
            (catch Exception e
              (print-stack-trace-if-verbose e verbose))))))))

invoker-invokee relationship helpers

phantom SootClass has SootMethodRef but not SootMethod

mapcat methods to their invoked methodrefs

(defn mapcat-invoke-methodrefs
  [methods]
  (->> methods
       (remove #(.. ^SootMethod % isPhantom))
       ;; try retrieveActiveBody
       (filter #(try
                  (.. ^SootMethod % retrieveActiveBody)
                  true
                  (catch Exception e
                    false)))
       (mapcat #(iterator-seq (.. ^SootMethod % retrieveActiveBody getUnits snapshotIterator)))
       (filter #(.. ^Stmt % containsInvokeExpr))
       (map #(.. ^Stmt % getInvokeExpr getMethodRef))))

mapcat methods to their invoked methods

(defn mapcat-invoke-methods
  [methods]
  (->> methods
       mapcat-invoke-methodrefs
       ;; deduplication early
       set
       resolve-methodrefs
       set))

resolve methodrefs

(defn resolve-methodrefs
  [methodrefs]
  (->> methodrefs
       (remove #(.. ^SootMethodRef % declaringClass isPhantom))
       (filter #(try
                  (.. ^SootMethodRef % resolve)
                  true
                  (catch Exception e
                    false)))
       (map #(.. ^SootMethodRef % resolve))))

interesting method helpers

filter interesting methodrefs

(defn filter-interesting-methods
  [interesting-method? methods]
  (->> methods
       (filter interesting-method?)))

Soot callgraph helpers

get Call Graph from scene

(defn get-cg
  [scene]
  (when (.. scene hasCallGraph)
    (.. scene getCallGraph)))

mapcat methods to their edgeout methods on cg

(defn mapcat-edgeout-methods
  [methods cg]
  (when cg
    (->> methods
         (mapcat #(iterator-seq (.. ^soot.jimple.toolkits.callgraph.CallGraph cg (edgesOutOf %))))
         (map #(.. ^soot.jimple.toolkits.callgraph.Edge % getTgt))
         set)))

helpers

test see if obj is Android API

(defn android-api?
  [obj]
  (re-find #"^(android\.|com\.android\.|dalvik\.)"
           (-> obj get-soot-class-name)))

Soot body wrapper

Soot mutex: Soot is unfortunately Singleton

(def soot-mutex
  (Object.))

System's exsiting security manager

(def system-security-manager
  (System/getSecurityManager))

prevent Soot brining down the system with System.exit

(def noexit-security-manager
  ;; http://stackoverflow.com/questions/21029651/security-manager-in-clojure/21033599#21033599
  (proxy [SecurityManager] []
    (checkPermission
      ([^java.security.Permission perm]
       (when (.startsWith (.getName perm) "exitVM")
         (throw (SecurityException. "no exit for Soot"))))
      ([^java.security.Permission perm ^Object context]
       (when (.startsWith (.getName perm) "exitVM")
         (throw (SecurityException. "no exit for Soot")))))))

get transitive super class and interfaces known to Soot

this memoized function is initilized in with-soot

(def get-transitive-super-class-and-interface
  nil)

get interesting transitive super class and interfaces known to Soot

this memoized function is initilized in with-soot

(def get-interesting-transitive-super-class-and-interface
  nil)

name-or-class-a is a transitive ancestor (super class/interface) of class-b

this memoized function is initilized in with-soot

(def transitive-ancestor?
  nil)

create a new Soot context (G$GlobalObjectGetter)

(defn new-g-objgetter
  []
  (let [g (new G)]
    (reify G$GlobalObjectGetter
      (getG [this] g)
      (reset [this]))))

wrap body with major Soot refs at the call time: g, scene, pack-manager, options, phase-options; g can be (optionally) provided with g-objgetter (nil to ask fetch the G at the call time); (G/reset) at the end if "reset?" is true

(defmacro with-soot
  [g-objgetter reset? & body]
  `(locking soot-mutex
     (let [get-transitive-super-class-and-interface#
           (memoize
            (fn [class-or-interface#]
              ;; preserve order
              (let [known# (atom [])
                    class-or-interface# (get-soot-class class-or-interface#)]
                (loop [worklist# #{class-or-interface#}
                       visited# #{}]
                  (when-not (empty? worklist#)
                    (let [new-worklist# (atom #{})]
                      (doseq [item# worklist#
                              :when (not (visited# item#))]
                        (swap! known# conj item#)
                        ;; interfaces
                        (swap! new-worklist# into (->> (.. item# getInterfaces snapshotIterator)
                                                       iterator-seq))
                        ;; superclass?
                        (when (.. item# hasSuperclass)
                          (swap! new-worklist# conj (.. item# getSuperclass))))
                      (recur (set/difference @new-worklist# worklist#)
                             (set/union visited# worklist#)))))
                @known#)))
           get-interesting-transitive-super-class-and-interface#
           (memoize
            (fn [class-or-interface# interesting?#]
              ;; preserve order
              (let [known# (atom [])
                    class-or-interface# (get-soot-class class-or-interface#)]
                (loop [worklist# #{class-or-interface#}
                       visited# #{}]
                  (when-not (empty? worklist#)
                    (let [new-worklist# (atom #{})]
                      (doseq [item# worklist#
                              :when (not (visited# item#))]
                        (if (interesting?# item#)
                          ;; found the most close interesting ancestor: do not follow its ancestors
                          (swap! known# conj item#)
                          ;; otherwise, follow its ancestors
                          (do
                            ;; interfaces
                            (swap! new-worklist# into (->> (.. item# getInterfaces snapshotIterator)
                                                           iterator-seq))
                            ;; superclass?
                            (when (.. item# hasSuperclass)
                              (swap! new-worklist# conj (.. item# getSuperclass))))))
                      (recur (set/difference @new-worklist# worklist#)
                             (set/union visited# worklist#)))))
                @known#)))           
           transitive-ancestor?#
           (memoize
            (fn [name-or-class-a# class-b#]
              (contains? (->> class-b#
                              get-transitive-super-class-and-interface
                              (map #(.. ^SootClass % getName))
                              set)
                         (if (instance? SootClass name-or-class-a#)
                           (.. name-or-class-a# getName)
                           (str name-or-class-a#)))))
           soot-init# (fn []
                        ;; set up memoize functions so that they won't retain objects across
                        (alter-var-root #'get-transitive-super-class-and-interface
                                        (fn [_#] get-transitive-super-class-and-interface#))
                        (alter-var-root #'get-interesting-transitive-super-class-and-interface
                                        (fn [_#] get-interesting-transitive-super-class-and-interface#))
                        (alter-var-root #'transitive-ancestor?
                                        (fn [_#] transitive-ancestor?#)))
           ;; we have to use this instead of clean# due to the use in ~(when reset? ...)           
           ~'_soot-clean_ (fn []
                            (alter-var-root #'get-transitive-super-class-and-interface
                                            (constantly nil))
                            (alter-var-root #'transitive-ancestor?
                                            (constantly nil))
                            (G/setGlobalObjectGetter nil))]
       (try
         (soot-init#)
         (System/setSecurityManager noexit-security-manager)
         (when (instance? G$GlobalObjectGetter ~g-objgetter)
           (G/setGlobalObjectGetter ~g-objgetter))
         (let [~'soot-g (G/v)
               ~'soot-scene (Scene/v)
               ~'soot-pack-manager (PackManager/v)
               ~'soot-options (Options/v)
               ~'soot-phase-options (PhaseOptions/v)]
           ~@body
           ~(when reset?
              `(~'_soot-clean_)))
         (catch Exception e#
           ;; reset Soot state
           (~'_soot-clean_)
           (throw e#))
         (finally
           (System/setSecurityManager system-security-manager))))))

mutter

(def ^:private mutter (java.io.PrintStream. (proxy [java.io.OutputStream] []
                                    (write [_ _1 _2]))))
(def ^:private original-system-out System/out)

execute the body in silence

(defmacro with-silence
  [& body]
  `(try
     (mute)
     ~@body
     (finally
       (unmute))))

no output

(defn mute
  []
  (set! (. (G/v) out) mutter)
  (System/setOut mutter))

allow output again

(defn unmute
  []
  (System/setOut original-system-out)
  (set! (. (G/v) out) original-system-out))
 
(ns woa.apk.parse
  ;; internal libs
  (:require [woa.apk.dex.parse
             :refer [get-the-dex-sha256-digest]]           
            [woa.apk.aapt.parse
             :refer [get-manifest]]
            [woa.apk.util
             :refer [get-apk-cert-sha256-digest get-file-sha256-digest]])  
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]]))

declaration

(declare parse-apk)

implementation

parse apk: the common part

(defn parse-apk
  [apk-name]
  {:manifest (get-manifest apk-name)
   :dex-sha256 (get-the-dex-sha256-digest apk-name)
   :cert-sha256 (get-apk-cert-sha256-digest apk-name)
   :sha256 (get-file-sha256-digest apk-name)})
 
(ns woa.apk.util
  ;; internal libs
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])  
  ;; special libs
  (:require [pandect.algo.sha256 :refer [sha256-bytes sha256]]
            [clojure.java.shell :refer [sh]])
  ;; imports
  ;; http://stackoverflow.com/a/1802126
  (:import (java.io ByteArrayInputStream))
  ;; http://stackoverflow.com/a/5419767
  (:import (java.util.zip ZipFile
                          ZipInputStream
                          ZipEntry))
  ;; http://stackoverflow.com/a/19194580
  (:import (java.nio.file Files
                          Paths
                          StandardCopyOption))   
  ;; http://stackoverflow.com/a/1264756
  (:import (org.apache.commons.io IOUtils)))
(declare get-apk-file-bytes get-apk-file-input-stream
         extract-apk-file
         get-apk-file-sha256-digest get-apk-cert-sha256-digest)

get bytes of file-name in apk

(defn ^bytes get-apk-file-bytes
  [apk file-name]
  (with-open [apk (ZipFile. ^String apk)]
    (IOUtils/toByteArray ^java.io.InputStream (.getInputStream apk (.getEntry apk file-name)))))

get an input stream of file-name in apk

(defn ^java.io.InputStream get-apk-file-input-stream
  [apk file-name]
  (ByteArrayInputStream. (get-apk-file-bytes apk file-name)))

extract file-name in apk to output-file-name

(defn extract-apk-file
  [apk file-name output-file-name]
  (Files/copy ^java.io.InputStream (get-apk-file-input-stream apk file-name)
              (Paths/get output-file-name (into-array String [""]))
              (into-array StandardCopyOption [StandardCopyOption/REPLACE_EXISTING])))

get sha256 digest of file-name in apk

(defn get-apk-file-sha256-digest
  [apk file-name]
  (sha256 (get-apk-file-bytes apk file-name)))

get sha256 digest of apk's cert

(defn get-apk-cert-sha256-digest
  [apk]
  (let [raw (:out (sh "keytool" "-printcert" "-jarfile"
                      apk))
        [_ digest] (re-find #"SHA256:\s+(\S+)" raw)]
    digest))

get sha256 digest of the-file

(defn get-file-sha256-digest
  [the-file]
  (sha256 (io/file the-file)))
 
(ns woa.core
  ;; internal libs
  (:require [woa.util
             :refer [print-stack-trace-if-verbose]]
            [woa.core.signature
             :refer [compute-cgdfd-signature
                     compute-cgdfd]]
            [woa.apk.parse
             :as apk-parse]
            [woa.apk.aapt.parse
             :as aapt-parse]
            [woa.neo4j.core
             :as neo4j]
            [woa.apk.dex.soot.parse
             :as soot-parse]
            [woa.virustotal.core
             :as vt])
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; special libs
  (:require [clojure.tools.cli :refer [parse-opts]])
  (:require [clojure.java.shell :refer [sh]])
  (:require [me.raynes.fs :as fs])
  (:require [clojure.tools.nrepl.server :refer [start-server stop-server]])
  (:require [cider.nrepl :refer [cider-nrepl-handler]])
  (:require [taoensso.nippy :as nippy])
  ;; imports
  (:import (java.util.concurrent Executors
                                 TimeUnit))
  ;; config
  (:gen-class))
(def cli-options
  [
   ;; general options
   ["-h" "--help" "you are reading it now"]
   ["-v" "--verbose" "be verbose (more \"v\" for more verbosity)"
    :default 0
    :assoc-fn (fn [m k _] (update-in m [k] inc))]
   ["-L" "--no-line-reading" "do not read from stdin; exit if other tasks complete"]
   ["-i" "--interactive" "do not exit (i.e., shutdown-agents) at the end"]
   [nil "--delay-start SEC" "delay start for a random seconds from 1 to (max) SEC"
    :parse-fn #(Integer/parseInt %)
    :default 0]
   ;; nREPL config
   [nil "--nrepl-port PORT" "REPL port"
    :parse-fn #(Integer/parseInt %)
    :validate [#(< 0 % 0x10000)
               (format "Must be a number between 0 and %d (exclusively)"
                       0x10000)]]
   ;; prepations
   [nil "--prep-tags TAGS" "TAGS is a Clojure vector of pairs of label types to properties, e.g., [[[\"Dataset\"] {\"id\" \"dataset-my\" \"name\" \"my dataset\"}]]"]
   [nil "--prep-virustotal" "obtain VirusTotal tags"]
   [nil "--virustotal-apikey APIKEY" "VirusTotal API key"]
   [nil "--virustotal-rate-limit LIMIT-PER-MIN" "number of maximal API calls per minute"
    :parse-fn #(Integer/parseInt %)
    :default 4]
   [nil "--virustotal-backoff SEC" "number of seconds to backoff when exceeding rate limit"
    :parse-fn #(Integer/parseInt %)
    :default 5]   
   [nil "--virustotal-submit" "whether submit sample to VirusTotal if not found"]
   ;; Soot config
   ["-s" "--soot-task-build-model" "build APK model with Soot"]
   [nil "--soot-android-jar-path PATH" "path of android.jar for Soot's Dexpler"]
   [nil "--soot-basic-block-simulation-budget BUDGET" "basic block simulation budget"
    :parse-fn #(Long/parseLong %)
    :default 50]
   [nil "--soot-method-simulation-depth-budget BUDGET" "method invocation simulation budget"
    :parse-fn #(Long/parseLong %)
    :default 10]
   [nil "--soot-simulation-collection-size-budget BUDGET" "array size simulation budget"
    :parse-fn #(Long/parseLong %)
    :default 10000]   
   [nil "--soot-simulation-conservative-branching" "branching based on conditions: more precision at the cost of less coverage before budget depletion."]
   [nil "--soot-simulation-linear-scan" "do not branch or loop: more coverage at the cost of precision"]   
   ["-j" "--soot-parallel-jobs JOBS"
    "number of concurrent threads for analyzing methods"
    :parse-fn #(Integer/parseInt %)
    :default 1
    :validate [#(> % 0)
               (format "You need at least 1 job to proceed")]]
   [nil "--soot-show-result" "show APK analysis result"]
   [nil "--soot-no-implicit-cf" "do not detect implicit control flows (for comparison)"]
   [nil "--soot-dump-all-invokes" "dump all invokes"]
   [nil "--soot-result-exclude-app-methods" "exclude app internal methods from the result"]
   [nil "--soot-debug-show-each-statement" "debug facility: show each processed statement"]
   [nil "--soot-debug-show-locals-per-statement" "debug facility: show locals per each statement"]
   [nil "--soot-debug-show-all-per-statement" "debug facility: show all per each statement"]
   [nil "--soot-debug-show-implicit-cf" "debug facility: show all implicit control flows"]
   [nil "--soot-debug-show-safe-invokes" "debug facility: show all safe invokes"]
   [nil "--soot-debug-show-exceptions" "debug facility: show all exceptions"]
   ["-d" "--dump-model FILE" "dump binary APK model; append dump file paths to FILE"]
   ["-O" "--overwrite-model" "overwrite model while dumping"]   
   ["-l" "--load-model FILE" "load binary APK model; load from dump file paths in FILE"]
   ["-c" "--convert-model" "convert model between binary and readable formats"]
   [nil "--readable-model" "dump/load readable APK model; --dump/load-model FILE will dump/load readable model directly to/from FILE"]
   [nil "--println-model" "println loaded APK model"]
   [nil "--pprint-model" "pprint loaded APK model"]
   [nil "--debug-cgdfd" "debug facility: show cgdfd and signature"]
   ;; Neo4j config
   [nil "--neo4j-port PORT" "Neo4j server port"
    :parse-fn #(Integer/parseInt %)
    :default 7474
    :validate [#(< 0 % 0x10000)
               (format "Must be a number between 0 and %d (exclusively)"
                       0x10000)]]
   [nil "--neo4j-protocol PROTOCOL" "Neo4j server protocol (http/https)"
    :default "http"]
   [nil "--neo4j-conn-backoff SEC" "Neo4j connection retry max random backoff in seconds"
    :parse-fn #(Integer/parseInt %)
    :default 60]
   ;; Neo4j tasks
   ["-n" "--neo4j-task-populate" "populate Neo4j with APK model"]
   ["-t" "--neo4j-task-tag" "tag Neo4j Apk nodes with labels"]
   ["-T" "--neo4j-task-untag" "untag Neo4j Apk nodes with labels"]
   ["-g" "--neo4j-task-add-callback-signature" "add Neo4j CallbackSignature nodes"]
   ["-G" "--neo4j-task-remove-callback-signature" "remove Neo4j CallbackSignature nodes"]   
   [nil "--neo4j-include-methodinstance" "include MethodInstance in the WoA model"]
   [nil "--neo4j-no-callgraph" "not include call graph (CG) in the WoA model"]   
   ["-D" "--neo4j-dump-model-batch-csv PREFIX" "dump Neo4j batch import CSV files to PREFIX; ref: https://github.com/jexp/batch-import/tree/2.1"]
   ;; misc tasks
   [nil "--dump-manifest" "dump AndroidManifest.xml"]
   ])

for consumption by nREPL session

(def main-options
  (atom nil))

establish critical section

(def mutex
  (Object.))

completed task counter

(def completed-task-counter
  (atom 0))

synchronize verbose ouput

(defmacro with-mutex-locked
  [& body]
  `(locking mutex
     ~@body))
(defn- debug-print-cgdfd
  [apk]
  (let [result (atom {})]
    (let [the-dex (:dex apk)]
      (dorun
       (for [comp-package-name (->> the-dex keys)]
         (dorun
          (for [comp-class-name (->> (get-in the-dex [comp-package-name])
                                     keys)]
            (dorun
             (for [callback-name (->> (get-in the-dex [comp-package-name
                                                       comp-class-name
                                                       :callbacks])
                                      keys)]
               (let [invoke-paths (get-in the-dex
                                          [comp-package-name
                                           comp-class-name
                                           :callbacks
                                           callback-name
                                           :invoke-paths])
                     cgdfd (compute-cgdfd invoke-paths)
                     signature (compute-cgdfd-signature cgdfd)]
                 (swap! result assoc
                        [comp-package-name
                         comp-class-name
                         callback-name]
                        {:cgdfd cgdfd
                         :signature signature})))))))))
    (doseq [k (keys @result)]
      (swap! result update-in [k]
             (fn [old]
               (update-in old [:cgdfd]
                          #(->> %
                                (sort-by first)
                                vec)))))
    (swap! result assoc
           :apk (:sha256 apk))
    (pprint @result)))

do the real work on apk

(defn work
  [{:keys [file-path tags]
    :as task}
   {:keys [verbose
           soot-task-build-model
           dump-model overwrite-model readable-model
           debug-cgdfd
           neo4j-port neo4j-protocol
           neo4j-task-populate neo4j-task-tag neo4j-task-untag
           neo4j-task-add-callback-signature neo4j-task-remove-callback-signature
           dump-manifest]
    :as options}]
  (when (and file-path (fs/readable? file-path))
    (when (and verbose (> verbose 1))
      (println "processing" file-path))
    (let [start-time (System/currentTimeMillis)]
      (try
        (when dump-manifest
          (print (aapt-parse/get-manifest-xml file-path))
          (flush))
        (when soot-task-build-model
          (let [apk (apk-parse/parse-apk file-path)
                dump-fname (str (:sha256 apk) ".model-dump")]
            (when (or overwrite-model
                      (not (and dump-model (fs/exists? dump-fname)
                                (do
                                  (when verbose
                                    (println dump-fname
                                             "exists: skipped;"
                                             "overwrite with \"--overwrite-model\""))
                                  true))))
              (let [apk (soot-parse/parse-apk file-path
                                              (merge options
                                                     ;; piggyback layout-callbacks on options
                                                     {:layout-callbacks
                                                      (aapt-parse/get-layout-callbacks file-path)}))]
                (when dump-model
                  (try
                    (with-open [model-io (io/writer dump-model :append true)]
                      (binding [*out* model-io]
                        (if readable-model
                          (prn apk)
                          (with-open [model-io (io/output-stream dump-fname)]
                            (nippy/freeze-to-out! (java.io.DataOutputStream. model-io)
                                                  apk)
                            ;; write the dump file name out
                            (println dump-fname)))))
                    (catch Exception e
                      (print-stack-trace-if-verbose e verbose))))
                (when debug-cgdfd
                  (debug-print-cgdfd apk))
                (when neo4j-task-populate
                  (neo4j/populate-from-parsed-apk apk
                                                  options))
                (cond
                  neo4j-task-add-callback-signature
                  (try
                    (neo4j/add-callback-signature apk
                                                  options)
                    (catch Exception e
                      (print-stack-trace-if-verbose e verbose)))
                  neo4j-task-remove-callback-signature
                  (try
                    (neo4j/remove-callback-signature apk
                                                     options)
                    (catch Exception e
                      (print-stack-trace-if-verbose e verbose))))))))
        (let [apk (apk-parse/parse-apk file-path)]
          (cond
            neo4j-task-tag (neo4j/tag-apk apk tags options)
            neo4j-task-untag (neo4j/untag-apk apk tags options)))        
        (when (and verbose (> verbose 0))
          (with-mutex-locked
            (swap! completed-task-counter inc)
            (println (format "%1$d: %2$s processed in %3$.3f seconds"
                             @completed-task-counter
                             file-path
                             (/ (- (System/currentTimeMillis) start-time)
                                1000.0)))))
        (catch Exception e
          (print-stack-trace-if-verbose e verbose))))))

main entry

(defn -main
  [& args]
  (let [raw (parse-opts args cli-options)
        {:keys [options summary errors]} raw
        {:keys [verbose interactive delay-start help no-line-reading
                prep-tags
                prep-virustotal
                virustotal-rate-limit virustotal-backoff virustotal-submit
                nrepl-port
                load-model dump-model convert-model println-model pprint-model
                readable-model
                debug-cgdfd
                neo4j-task-populate
                neo4j-task-add-callback-signature neo4j-task-remove-callback-signature
                neo4j-dump-model-batch-csv]} options]
    (try
      ;; print out error messages if any
      (when errors
        (binding [*out* *err*]
          (doseq [error errors]
            (println error))))
      ;; whether help is requested
      (cond
        help
        (do
          (println "<BUILDINFO>")
          (println summary))
        (or prep-tags prep-virustotal)
        (do
          ;; for API rate limit
          (let [vt-api-call-counter (atom virustotal-rate-limit)
                vt-start-time (atom (System/currentTimeMillis))]
            (loop [line (read-line)]
              (when line
                (try
                  (prn (-> {:file-path line :tags []}
                           (update-in [:tags] into
                                      (when (and prep-tags
                                                 (not (str/blank? prep-tags)))
                                        (read-string prep-tags)))
                           (update-in [:tags] into
                                      (when prep-virustotal
                                        (let [apk (apk-parse/parse-apk line)
                                              try-backoff
                                              (fn []
                                                (when (<= @vt-api-call-counter 0)
                                                  (let [now (System/currentTimeMillis)
                                                        sleep-time
                                                        (max (* virustotal-backoff 1000)
                                                             (- (+ @vt-start-time
                                                                   (* 60 1000))
                                                                now))]
                                                    (reset! vt-api-call-counter
                                                            virustotal-rate-limit)
                                                    (reset! vt-start-time
                                                            now)
                                                    (Thread/sleep sleep-time))))]
                                          (when-let [sha256 (:sha256 apk)]
                                            (try-backoff)
                                            (when-let [result (vt/get-report {:sha256 sha256}
                                                                             options)]
                                              (swap! vt-api-call-counter dec)
                                              (when (and verbose (> verbose 2))
                                                (binding [*out* *err*]
                                                  (println "virustotal report" result)))
                                              (let [ret (atom nil)]
                                                (cond
                                                  ;; if result is a map, the result is returned
                                                  (map? result)
                                                  (reset! ret
                                                          (vt/make-report-result-into-tags result))
                                                  (= result :status-exceed-api-limit)
                                                  (try-backoff)
                                                  (= result :response-not-found)
                                                  (when virustotal-submit
                                                    (try-backoff)
                                                    (let [result
                                                          (vt/submit-file {:file-content (io/file line)}
                                                                          options)]
                                                      (when (and verbose (> verbose 2))
                                                        (binding [*out* *err*]
                                                          (println "virustotal submit" result))))
                                                    (swap! vt-api-call-counter dec)))
                                                @ret))))))))
                  (catch Exception e
                    (print-stack-trace-if-verbose e verbose)))
                (recur (read-line))))))
        :otherwise
        (do
          (when (and delay-start
                     (> delay-start 0))
            (let [delay-start (rand-int delay-start)]
              (when (> verbose 1)
                (println "delay start" delay-start "seconds"))
              (Thread/sleep (* delay-start 1000))))
          (when nrepl-port
            ;; use separate thread to start nREPL, so do not delay other task
            (.. (Thread.
                 (fn []
                   (try
                     (start-server :port nrepl-port
                                   :handler cider-nrepl-handler)
                     (catch Exception e
                       (when (> verbose 1)
                         (binding [*out* *err*]
                           (println "error: nREPL server cannot start at port"
                                    nrepl-port)))))))
                start))
          (when neo4j-task-populate
            ;; "create index" only need to executed once if populate-neo4j is requested
            (when (> verbose 1)
              (with-mutex-locked
                (println "Neo4j:" "creating index")))
            (neo4j/create-index options)
            (when (> verbose 1)
              (with-mutex-locked
                (println "Neo4j:" "index created"))))
          ;; load Soot model and populate Neo4j graph
          ;; single-threaded to avoid Neo4j contention
          (when load-model
            (try
              (let [counter (atom 0)]
                (with-open [model-io (io/reader load-model)]
                  (binding [*in* model-io]
                    (loop [line (read-line)]
                      (when line
                        (let [apk (try
                                    (if readable-model
                                      (read-string line)
                                      (with-open [model-io (io/input-stream line)]
                                        (nippy/thaw-from-in! (java.io.DataInputStream. model-io))))
                                    (catch Exception e
                                      (print-stack-trace-if-verbose e verbose)
                                      nil))]
                          (when apk
                            (when neo4j-dump-model-batch-csv
                              (neo4j/add-to-batch-csv apk options))
                            (when (and apk convert-model dump-model)
                              (try
                                (with-open [model-io (io/writer dump-model :append true)]
                                  (binding [*out* model-io]
                                    (if readable-model ; if --load-model is in readable format
                                      ;; convert to binary model
                                      (let [dump-fname (str (:sha256 apk) ".model-dump")]
                                        (with-open [model-io (io/output-stream dump-fname)]
                                          (nippy/freeze-to-out! (java.io.DataOutputStream. model-io)
                                                                apk)
                                          ;; write the dump file name out
                                          (println dump-fname)))
                                      ;; convert to readable model
                                      (prn apk))))
                                (catch Exception e
                                  (print-stack-trace-if-verbose e verbose))))
                            ((cond pprint-model pprint
                                   println-model println
                                   ;; nop
                                   :otherwise (constantly nil)) apk)
                            (when debug-cgdfd
                              (debug-print-cgdfd apk))
                            (swap! counter inc)
                            (when (and verbose
                                       (> verbose 0))
                              (println (format "%1$d:" @counter)
                                       (get-in apk [:manifest :package])
                                       (get-in apk [:manifest :android:versionCode])
                                       (get-in apk [:sha256])))
                            (when neo4j-task-populate
                              (try
                                (neo4j/populate-from-parsed-apk apk
                                                                options)
                                (catch Exception e
                                  (print-stack-trace-if-verbose e verbose))))
                            (cond
                              neo4j-task-add-callback-signature
                              (try
                                (neo4j/add-callback-signature apk
                                                              options)
                                (catch Exception e
                                  (print-stack-trace-if-verbose e verbose)))
                              neo4j-task-remove-callback-signature
                              (try
                                (neo4j/remove-callback-signature apk
                                                                 options)
                                (catch Exception e
                                  (print-stack-trace-if-verbose e verbose)))))
                          (recur (read-line))))))))
              (when neo4j-dump-model-batch-csv
                (neo4j/dump-batch-csv neo4j-dump-model-batch-csv options))
              (catch Exception e
                (print-stack-trace-if-verbose e verbose))))
          ;; do the work for each line
          (when-not no-line-reading
            (loop [line (read-line)]
              (when line
                ;; ex.: {:file-path "a/b.apk" :tags [{["Dataset"] {"id" "dst-my" "name" "my dataset"}}]}
                ;; tags must have "id" node property
                (let [{:keys [file-path tags] :as task}
                      (try
                        (read-string line)
                        (catch Exception e
                          (print-stack-trace-if-verbose e verbose)
                          nil))]
                  (try
                    (when (and file-path (fs/readable? file-path))
                      (work task options))
                    (catch Exception e
                      (print-stack-trace-if-verbose e verbose)))
                  (recur (read-line))))))
          (when neo4j-task-populate
            (when (> verbose 1)
              (with-mutex-locked
                (println "Neo4j:" "marking Android API")))
            ;; mark Android API
            (neo4j/mark-android-api options)
            (when (> verbose 1)
              (with-mutex-locked
                (println "Neo4j:" "Android API marked")))))) 
      (when interactive
        ;; block when interactive is requested
        @(promise))
      (catch Exception e
        (print-stack-trace-if-verbose e verbose))
      (finally
        ;; clean-up
        (shutdown-agents)    
        (when (> verbose 1)
          (with-mutex-locked
            (println "shutting down")))
        (System/exit 0)))))
 
(ns woa.core.invoke-path
  ;; internal libs
  (:use woa.util)
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; special libs)

declaration

(declare invoke-path-get-invocatee-map
         invoke-path-get-node
         invoke-path-get-descendants
         invoke-path-get-node-name)

implementation

invocatee map is a map from nodes to their invocatees

(defn invoke-path-get-invocatee-map
  [invoke-paths]
  (let [invocatees (atom {})]
    (process-worklist
     #{invoke-paths}
     (fn [worklist]
       (let [new-worklist (atom #{})]
         (dorun
          (for [work worklist]
            (let [node (invoke-path-get-node work)
                  descendants (invoke-path-get-descendants work)
                  children (map invoke-path-get-node descendants)]
              (swap! invocatees update-in [node]
                     #(->> (into %1 %2) set)
                     children)
              (swap! new-worklist into descendants))))
         @new-worklist)))
    @invocatees))
(defn invoke-path-get-node [invoke-paths]
  (cond
    (map? invoke-paths) (->> invoke-paths keys first)
    :otherwise invoke-paths))
(defn invoke-path-get-descendants [invoke-paths]
  (cond
    (map? invoke-paths) (->> invoke-paths vals first)
    :otherwise nil))
(defn invoke-path-get-node-name [node]
  (cond
    ;; Soot signature format
    (re-matches #"^<.+>$" node)
    (let [[_ class method]
          (re-find #"^<([^:]+):\s+\S+\s+([^(]+)\("
                   node)]
      (str class "." method))
    (re-matches #"^[^<].+\[.+\]" node)
    (let [[_ classmethod]
          (re-find #"^(.+)\[" node)]
      classmethod)
    :otherwise
    node))
 
(ns woa.core.signature
  ;; internal libs
  (:use woa.util)
  (:use woa.core.invoke-path)
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; special libs
  (:require [incanter.stats :as stats]))

declaration

(declare compute-cgdfd-signature compute-cgdfd)

implementation

compute the CGDFD-based signature from the input CGDFD (replace NaN with 0)

(defn compute-cgdfd-signature
  [cgdfd]
  (try
    (let [total (reduce + (vals cgdfd))
          cgdfd (mapcat #(let [[out-degree multiplicity] %]
                           (repeat multiplicity out-degree))
                        cgdfd)]
      ;; filter on NaN
      (->> [total
            (stats/mean cgdfd)
            (stats/sd cgdfd)
            (stats/skewness cgdfd)
            (stats/kurtosis cgdfd)]
           (map #(let [n %]
                   (cond
                     (try
                       (.isNaN n)
                       (catch Exception e false))
                     0
                     :otherwise n)))
           vec))
    (catch Exception e
      (print-stack-trace e)
      nil)))

compute CGDFD (Call Graph Degree Frequency Distribution) from the input invoke-paths that represent the CG (Call Graph)

(defn compute-cgdfd
  [invoke-paths]
  (let [invocatees (invoke-path-get-invocatee-map invoke-paths)
        cgdfd (atom {})]
    (doseq [[_ invocatees] invocatees]
      (swap! cgdfd update-in [(count invocatees)]
             #(let [now %]
                (cond
                  (nil? now) 1
                  :otherwise (inc now)))))
    @cgdfd))
 
(ns woa.neo4j.core
  ;; internal libs
  (:use woa.util)
  (:use (woa.core invoke-path
                  signature))
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; special libs
  (:require [clojurewerkz.neocons.rest :as nr]
            [clojurewerkz.neocons.rest.transaction :as ntx]))
(declare populate-from-parsed-apk
         add-to-batch-csv dump-batch-csv
         tag-apk untag-apk
         add-callback-signature remove-callback-signature
         create-index mark-android-api
         connect android-api?)
(def defaults (atom {:neo4j-port 7474
                     :neo4j-protocol "http"}))

add to batch csv that is to be dumped laterhttps://github.com/jexp/batch-import/tree/2.1

(let [node-props (atom #{})
      rel-props (atom #{})
      node-counter (atom -1)
      nodes (atom {}) ; node => node-counter
      rels (atom {})
      merge-node (fn [[labels props :as node]]
                   (swap! node-props into (keys props))
                   (when-not (get @nodes node)
                     (swap! nodes assoc node (swap! node-counter inc))))
      node-updates (atom {})
      update-node (fn [old-node [labels props :as new-node]]
                    (when-let [id (get @nodes old-node)]
                      (swap! node-props into (keys props))
                      ;; cache the updates
                      (swap! node-updates assoc old-node new-node)))
      merge-rel (fn [node1 [labels props :as rel] node2]
                  (let [n1 (get @nodes node1)
                        n2 (get @nodes node2)]
                    (when (and n1 n2)
                      (swap! rel-props into (keys props))
                      (swap! rels update-in [[n1 n2]]
                             #(->> (conj %1 %2) set)
                             rel))))]
  (defn add-to-batch-csv
  [apk
   {:keys [neo4j-include-methodinstance
           neo4j-no-callgraph]
    :as options}]
  (let [manifest (:manifest apk)
        dex-sha256 (:dex-sha256 apk)
        cert-sha256 (:cert-sha256 apk)
        apk-sha256 (:sha256 apk)
        apk-package (:package manifest)
        apk-version-name (:android:versionName manifest)
        apk-version-code (:android:versionCode manifest)
        the-dex (:dex apk)]
    (let [signing-key [["SigningKey"] {"sha256" cert-sha256}]
          apk [["Apk"] {"sha256" apk-sha256
                        "package" apk-package
                        "versionCode" apk-version-code
                        "versionName" apk-version-name}]
          dex [["Dex"] {"sha256" dex-sha256}]]
      (merge-node signing-key)
      (merge-node apk)
      (merge-node dex)
      (merge-rel signing-key [["SIGN"] nil] apk)
      (merge-rel apk [["CONTAIN"] nil] dex)
      ;; permissions
      (doseq [perm (->> manifest :uses-permission (map name))]
        (let [n [["Permission"] {"name" perm}]]
          (merge-node n)
          (merge-rel apk [["USE"] nil] n)))
      (doseq [perm (->> manifest :permission (map name))]
        (let [n [["Permission"] {"name" perm}]]
          (merge-node n)
          (merge-rel apk [["DEFINE"] nil] n)))
      ;; component package and class
      (dorun
       (for [comp-package-name (->> the-dex keys)]
         (let [comp-package [["Package"] {"name" comp-package-name}]]
           (merge-node comp-package)
           (dorun
            (for [comp-class-name (->> (get-in the-dex [comp-package-name]) keys)]
              (let [comp-class [["Class"] {"name" comp-class-name}]]
                (merge-node comp-class)
                (merge-rel comp-package [["CONTAIN"] nil] comp-class)
                (merge-rel dex [["CONTAIN"] nil] comp-class)
                (let [{:keys [android-api-ancestors callbacks]}
                      (->> (get-in the-dex [comp-package-name comp-class-name]))]
                  (dorun
                   (for [ancestor android-api-ancestors]
                     (let [ancestor-package-name (:package ancestor)
                           ancestor-package [["Package"] {"name" ancestor-package-name}]
                           ancestor-class-name (:class ancestor)
                           ancestor-class [["Class"] {"name" ancestor-class-name}]]
                       (merge-node ancestor-package)
                       (merge-node ancestor-class)
                       (merge-rel ancestor-package [["CONTAIN"] nil] ancestor-class)
                       (merge-rel comp-class [["DESCEND"] nil] ancestor-class))))
                  (dorun
                   (for [callback-name (->> callbacks keys)]
                     (let [callback [["Method" "Callback"]
                                     {"name" (str comp-class-name "." callback-name)}]]
                       (merge-node callback)
                       (merge-rel comp-class [["CONTAIN"] nil] callback)
                       (let [path [comp-package-name comp-class-name :callbacks callback-name]]
                         ;; explicit control flow
                         (let [path (conj path :explicit)]
                           ;; deduplication
                           (let [callback-invokes (->> (get-in the-dex path)
                                                       (map #(select-keys % [:package :class :method]))
                                                       (into #{}))]
                             (dorun
                              (for [callback-invoke callback-invokes]
                                (let [invoke-package-name (:package callback-invoke)
                                      invoke-package [["Package"] {"name" invoke-package-name}]
                                      invoke-class-name (:class callback-invoke)
                                      invoke-class [["Class"] {"name" invoke-class-name}]
                                      invoke-name (:method callback-invoke)
                                      invoke [["Method"]
                                              {"name" (str invoke-class-name "." invoke-name)}]]
                                  (merge-node invoke-package)
                                  (merge-node invoke-class)
                                  (merge-node invoke)
                                  (merge-rel invoke-package [["CONTAIN"] nil] invoke-class)
                                  (merge-rel invoke-class [["CONTAIN"] nil] invoke)
                                  (merge-rel callback [["EXPLICIT_INVOKE"] nil] invoke)
                                  (merge-rel invoke [["INVOKED_BY"] nil] apk)))))
                           (when neo4j-include-methodinstance
                             (dorun
                              (for [callback-invoke (get-in the-dex path)]
                                (let [invoke-class-name (:class callback-invoke)
                                      invoke-class [["Class"] {"name" invoke-class-name}]
                                      invoke-name (:method callback-invoke)
                                      args (:args callback-invoke)
                                      invoke [["Method"] {"name" invoke-name}]
                                      invoke-instance [["MethodInstance"]
                                                       {"name" (str invoke-class-name "." invoke-name)
                                                        "args" args}]]
                                  (merge-node invoke-instance)
                                  (merge-rel invoke-instance [["INSTANCE_OF"] nil] invoke)
                                  (merge-rel callback [["EXPLICIT_INVOKE"] nil] invoke-instance))))))
                         ;; implicit control flow
                         (let [path (conj path :implicit)]
                           ;; deduplication
                           (let [callback-invokes (->> (get-in the-dex path)
                                                       (map #(select-keys % [:package :class :method]))
                                                       (into #{}))]
                             (dorun
                              (for [callback-invoke callback-invokes]
                                (let [invoke-package-name (:package callback-invoke)
                                      invoke-package [["Package"] {"name" invoke-package-name}]
                                      invoke-class-name (:class callback-invoke)
                                      invoke-class [["Class"] {"name" invoke-class-name}]
                                      invoke-name (:method callback-invoke)
                                      invoke [["Method"]
                                              {"name" (str invoke-class-name "." invoke-name)}]]
                                  (merge-node invoke-package)
                                  (merge-node invoke-class)
                                  (merge-node invoke)
                                  (merge-rel invoke-package [["CONTAIN"] nil] invoke-class)
                                  (merge-rel invoke-class [["CONTAIN"] nil] invoke)
                                  (merge-rel callback [["IMPLICIT_INVOKE"] nil] invoke)
                                  (merge-rel invoke [["INVOKED_BY"] nil] apk)))))
                           (when neo4j-include-methodinstance
                             (dorun
                              (for [callback-invoke (get-in the-dex path)]
                                (let [invoke-class-name (:class callback-invoke)
                                      invoke-class [["Class"] {"name" invoke-class-name}]
                                      invoke-name (:method callback-invoke)
                                      args (:args callback-invoke)
                                      invoke [["Method"] {"name" invoke-name}]
                                      invoke-instance [["MethodInstance"]
                                                       {"name" (str invoke-class-name "." invoke-name)
                                                        "args" args}]]
                                  (merge-node invoke-instance)
                                  (merge-rel invoke-instance [["INSTANCE_OF"] nil] invoke)
                                  (merge-rel callback [["IMPLICIT_INVOKE"] nil] invoke-instance))))))
                         ;; invokes that descend from Android API
                         (let [path (conj path :descend)]
                           (dorun
                            (for [[api-invoke callback-invokes] (get-in the-dex path)]
                              (let [api-package-name (:package api-invoke)
                                    api-package [["Package"] {"name" api-package-name}]
                                    api-class-name (:class api-invoke)
                                    api-class [["Class"] {"name" api-class-name}]
                                    api-name (:method api-invoke)
                                    api [["Method"]
                                         {"name" (str api-class-name "." api-name)}]]
                                (merge-node api-package)
                                (merge-node api-class)
                                (merge-node api)
                                (merge-rel api-package [["CONTAIN"] nil] api-class)
                                (merge-rel api-class [["CONTAIN"] nil] api)
                                (dorun
                                 (for [callback-invoke callback-invokes]
                                   (let [invoke-package-name (:package callback-invoke)
                                         invoke-package [["Package"] {"name" invoke-package-name}]
                                         invoke-class-name (:class callback-invoke)
                                         invoke-class [["Class"] {"name" invoke-class-name}]
                                         invoke-name (:method callback-invoke)
                                         invoke [["Method"]
                                                 {"name" (str invoke-class-name "." invoke-name)}]]
                                     (merge-rel invoke [["DESCEND"] nil] api))))))))
                         (when-not neo4j-no-callgraph
                           (let [path (conj path :invoke-paths)
                                 invoke-paths (get-in the-dex path)]
                             (when invoke-paths
                               ;; link the root node to the Callback and the Apk
                               (let [root-node (invoke-path-get-node invoke-paths)
                                     node [["CallGraphNode"]
                                           {"name" (invoke-path-get-node-name root-node)
                                            "signature" root-node
                                            "apk" apk-sha256}]]
                                 (merge-node node)
                                 (merge-rel apk [["CALLGRAPH"] nil] node)
                                 (merge-rel callback [["CALLGRAPH"] nil] node))
                               ;; iteratively link descendants
                               (process-worklist
                                #{invoke-paths}
                                (fn [worklist]
                                  (let [new-worklist (atom #{})]
                                    (dorun
                                     (for [work worklist]
                                       (let [node (invoke-path-get-node work)
                                             descendants (invoke-path-get-descendants work)
                                             children (map invoke-path-get-node descendants)
                                             parent-node [["CallGraphNode"]
                                                          {"name" (invoke-path-get-node-name node)
                                                           "signature" node
                                                           "apk" apk-sha256}]]
                                         (merge-node parent-node)
                                         (dorun
                                          (for [child children]
                                            (let [child-node [["CallGraphNode"]
                                                              {"name" (invoke-path-get-node-name child)
                                                               "signature" child
                                                               "apk" apk-sha256}]]
                                              (merge-node child-node)
                                              (merge-rel parent-node [["INVOKE"] nil] child-node))))
                                         (swap! new-worklist into descendants))))
                                    @new-worklist))))))
                         ;; callback signature
                         (let [path (conj path :invoke-paths)
                               invoke-paths (get-in the-dex path)]
                           (when invoke-paths
                             (let [cgdfd (compute-cgdfd invoke-paths)
                                   signature (compute-cgdfd-signature cgdfd)]
                               (when signature
                                 (let [signature-node
                                       [["CallbackSignature"]
                                        {"name"  (str comp-class-name "." callback-name)
                                         "apk" apk-sha256
                                         "signature:double_array"
                                         (->> signature
                                              (map str)
                                              (str/join ","))}]]
                                   (merge-node signature-node)
                                   (merge-rel apk [["CALLBACK_SIGNATURE"] nil] signature-node)
                                   (merge-rel callback [["CALLBACK_SIGNATURE"] nil] signature-node)))))))))))))))))
      ;; app components
      (doseq [comp-type [:activity :service :receiver]]
        (doseq [[comp-name {:keys [intent-filter-action
                                   intent-filter-category]}]
                (->> manifest
                     comp-type)]
          (let [comp-name (name comp-name)
                comp [["Class"] {"name" comp-name}]
                new-comp [["Class" "Component" (->> comp-type name str/capitalize)]
                          {"name" comp-name}]
                intent-filter-actions (map name intent-filter-action)
                intent-filter-categories (map name intent-filter-category)]
            (update-node comp new-comp)
            (doseq [intent-filter-action-name intent-filter-actions]
              (let [intent-filter-action [["IntentFilterAction"]
                                          {"name" intent-filter-action-name}]]
                (merge-node intent-filter-action)
                (merge-rel intent-filter-action [["TRIGGER"] nil] comp)))
            (doseq [intent-filter-category-name intent-filter-categories]
              (let [intent-filter-category [["IntentFilterCategory"]
                                            {"name" intent-filter-category-name}]]
                (merge-node intent-filter-category)
                (merge-rel intent-filter-category [["TRIGGER"] nil] comp)))))))))
  (defn dump-batch-csv
  [batch-csv-prefix {:keys [] :as options}]
  ;; do the updates
  (doseq [[old-node new-node] @node-updates]
    (when-let [id (get @nodes old-node)]
      (swap! nodes dissoc old-node)
      (swap! nodes assoc new-node id)))
  (with-open [out (io/writer (str batch-csv-prefix ".nodes"))]
    (binding [*out* out]
      (let [id-to-node (set/map-invert @nodes)
            node-props (seq @node-props)]
        (println (format "%1$s%2$s%3$s"
                         (str/join "\t" node-props)
                         (if node-props "\t" "")
                         "l:label"))
        (dotimes [id (inc @node-counter)]
          (let [[labels props :as node] (get id-to-node id)]
            (println (format "%1$s%2$s%3$s"
                             (str/join "\t"
                                       (map #(get props %)
                                            node-props))
                             (if node-props "\t" "")                             
                             (str/join "," labels))))))))
  (with-open [out (io/writer (str batch-csv-prefix ".rels"))]
    (binding [*out* out]
      (let [rel-props (seq @rel-props)]
        (println (format "start\tend\t%1$s%2$s%3$s"
                         (str/join "\t" rel-props)
                         (if rel-props "\t" "")
                         "l:label"))
        (doseq [rel (->> (keys @rels) sort)]
          (let [[start end] rel]
            (doseq [[labels props] (get @rels rel)]
              (println (format "%1$s\t%2$s\t%3$s%4$s%5$s"
                               start end
                               (str/join "\t"
                                         (map #(get props %)
                                              rel-props))
                               (if rel-props "\t" "")
                               (str/join "," labels)))))))))))

populate the database with the parsed apk structure

(defn populate-from-parsed-apk
  [apk {:keys [neo4j-include-methodinstance
               neo4j-no-callgraph]
        :as options}]
  (let [manifest (:manifest apk)
        dex-sha256 (:dex-sha256 apk)
        cert-sha256 (:cert-sha256 apk)
        apk-sha256 (:sha256 apk)
        apk-package (:package manifest)
        apk-version-name (:android:versionName manifest)
        apk-version-code (:android:versionCode manifest)
        dex (:dex apk)
        conn (connect options)
        transaction (ntx/begin-tx conn)]
    (ntx/with-transaction
      conn
      transaction
      true
      (ntx/execute
       conn transaction
       [(ntx/statement
         (str/join " "
                   ["MERGE (signkey:SigningKey {sha256:{certsha256}})"
                    "MERGE (apk:Apk {sha256:{apksha256},package:{apkpackage},versionCode:{apkversioncode},versionName:{apkversionname}})"
                    "MERGE (dex:Dex {sha256:{dexsha256}})"
                    "MERGE (signkey)-[:SIGN]->(apk)-[:CONTAIN]->(dex)"
                    "FOREACH ("
                    "perm in {usespermission} |"
                    "  MERGE (n:Permission {name:perm})"
                    "  MERGE (n)<-[:USE]-(apk)"
                    ")"
                    "FOREACH ("
                    "perm in {permission} |"
                    "  MERGE (n:Permission {name:perm})"
                    "  MERGE (n)<-[:DEFINE]-(apk)"
                    ")"])
         {:certsha256 cert-sha256
          :apksha256 apk-sha256
          :dexsha256 dex-sha256
          :usespermission (->> manifest
                               :uses-permission
                               (map name)
                               ;; only consider Android internal API ones
                               ;;(filter android-api?))
          :permission (->> manifest
                           :permission
                           (map name)
                           ;; only consider API ones
                           ;;(filter android-api?))
          :apkpackage apk-package
          :apkversionname apk-version-name
          :apkversioncode apk-version-code})])
      (ntx/execute
       conn transaction
       (let [result (atom [])]
         (doseq [package-name (->> dex keys)]
           (let [class-names (->> (get-in dex [package-name]) keys)]
             (swap! result conj
                    (ntx/statement
                     (str/join " "
                               ["MERGE (dex:Dex {sha256:{dexsha256}})"
                                "MERGE (package:Package {name:{packagename}})"
                                "FOREACH ("
                                "classname in {classnames} |"
                                "  MERGE (class:Class {name:classname})"
                                "  MERGE (package)-[:CONTAIN]->(class)"
                                "  MERGE (dex)-[:CONTAIN]->(class)"
                                ")"])
                     {:dexsha256 dex-sha256
                      :packagename package-name
                      :classnames class-names}))))
         @result))
      (ntx/execute
       conn transaction
       (let [result (atom [])]
         (doseq [package-name (->> dex keys)]
           (let [class-names (->> (get-in dex [package-name]) keys)]
             (doseq [class-name class-names]
               (let [{:keys [android-api-ancestors callbacks]} (->> (get-in dex [package-name class-name]))]
                 (doseq [base android-api-ancestors]
                   (let [ancestor-package (:package base)
                         ancestor-class (:class base)]
                     (swap! result conj
                            (ntx/statement
                             (str/join " "
                                       ["MERGE (class:Class {name:{classname}})"
                                        "MERGE (ancestorpackage:Package {name:{ancestorpackage}})"
                                        "MERGE (ancestorclass:Class {name:{ancestorclass}})"
                                        "MERGE (ancestorpackage)-[:CONTAIN]->(ancestorclass)"
                                        "MERGE (class)-[:DESCEND]->(ancestorclass)"])
                             {:classname class-name
                              :ancestorpackage ancestor-package
                              :ancestorclass ancestor-class}))))))))
         @result))      
      (ntx/execute
       conn transaction
       (let [result (atom [])]
         ;; http://stackoverflow.com/a/26366775
         (dorun
          (for [package-name (->> dex keys)]
            (let [class-names (->> (get-in dex [package-name]) keys)]
              (dorun
               (for [class-name class-names]
                 (let [{:keys [android-api-ancestors callbacks]} (->> (get-in dex [package-name class-name]))]
                   (dorun
                    (for [callback-name (->> callbacks keys)]
                      (let [path [package-name class-name :callbacks callback-name]]
                        (swap! result conj
                               (ntx/statement
                                (str/join " "
                                          ["MERGE (class:Class {name:{classname}})"
                                           "MERGE (callback:Method:Callback {name:{callbackname}})"
                                           "MERGE (class)-[:CONTAIN]->(callback)"])
                                {:classname class-name
                                 :callbackname (str class-name "." callback-name)}))
                        ;; explicit control flow
                        (let [path (conj path :explicit)]
                          ;; deduplication
                          (let [callback-invokes (->> (get-in dex path)
                                                      (map #(select-keys % [:package :class :method]))
                                                      (into #{}))]
                            (dorun
                             (for [callback-invoke callback-invokes]
                               (let [invoke-package-name (:package callback-invoke)
                                     invoke-class-name (:class callback-invoke)
                                     invoke-name (:method callback-invoke)]
                                 (swap! result conj
                                        (ntx/statement
                                         (str/join " "
                                                   ["MERGE (apk:Apk {sha256:{apksha256}})"
                                                    "MERGE (callback:Callback {name:{callbackname}})"
                                                    "MERGE (invokepackage:Package {name:{invokepackagename}})"
                                                    "MERGE (invokeclass:Class {name:{invokeclassname}})"
                                                    "MERGE (invoke:Method {name:{invokename}})"
                                                    "MERGE (invokepackage)-[:CONTAIN]->(invokeclass)-[:CONTAIN]->(invoke)"
                                                    "MERGE (callback)-[:EXPLICIT_INVOKE]->(invoke)"
                                                    ;; to quickly find Apk from Method
                                                    "MERGE (apk)<-[:INVOKED_BY]-(invoke)"])
                                         {:apksha256 apk-sha256
                                          :callbackname (str class-name "." callback-name)
                                          :invokepackagename invoke-package-name
                                          :invokeclassname invoke-class-name
                                          :invokename (str invoke-class-name "." invoke-name)}))))))
                          (when neo4j-include-methodinstance
                            (dorun
                             (for [callback-invoke (get-in dex path)]
                               (let [invoke-class-name (:class callback-invoke)
                                     invoke-name (:method callback-invoke)
                                     args (:args callback-invoke)]
                                 (swap! result conj
                                        (ntx/statement
                                         (str/join " "
                                                   ["MERGE (callback:Callback {name:{callbackname}})"
                                                    "MERGE (invoke:Method {name:{invokename}})"
                                                    "MERGE (invokeinst:MethodInstance {name:{invokename},args:{args}})"
                                                    "MERGE (invoke)<-[:INSTANCE_OF]-(invokeinst)"
                                                    "MERGE (callback)-[:EXPLICIT_INVOKE]->(invokeinst)"])
                                         {:callbackname (str class-name "." callback-name)
                                          :invokename (str invoke-class-name "." invoke-name)
                                          :args args})))))))
                        ;; implicit control flow
                        (let [path (conj path :implicit)]
                          ;; deduplication
                          (let [callback-invokes (->> (get-in dex path)
                                                      (map #(select-keys % [:package :class :method]))
                                                      (into #{}))]
                            (dorun
                             (for [callback-invoke callback-invokes]
                               (let [invoke-package-name (:package callback-invoke)
                                     invoke-class-name (:class callback-invoke)
                                     invoke-name (:method callback-invoke)]
                                 (swap! result conj
                                        (ntx/statement
                                         (str/join " "
                                                   ["MERGE (apk:Apk {sha256:{apksha256}})"
                                                    "MERGE (callback:Callback {name:{callbackname}})"
                                                    "MERGE (invokepackage:Package {name:{invokepackagename}})"
                                                    "MERGE (invokeclass:Class {name:{invokeclassname}})"
                                                    "MERGE (invoke:Method {name:{invokename}})"
                                                    "MERGE (invokepackage)-[:CONTAIN]->(invokeclass)-[:CONTAIN]->(invoke)"
                                                    "MERGE (callback)-[:IMPLICIT_INVOKE]->(invoke)"
                                                    ;; to quickly find Apk from Method
                                                    "MERGE (apk)<-[:INVOKED_BY]-(invoke)"])
                                         {:apksha256 apk-sha256
                                          :callbackname (str class-name "." callback-name)
                                          :invokepackagename invoke-package-name
                                          :invokeclassname invoke-class-name
                                          :invokename (str invoke-class-name "." invoke-name)}))))))
                          (when neo4j-include-methodinstance
                            (dorun
                             (for [callback-invoke (get-in dex path)]
                               (let [invoke-class-name (:class callback-invoke)
                                     invoke-name (:method callback-invoke)
                                     args (:args callback-invoke)]
                                 (swap! result conj
                                        (ntx/statement
                                         (str/join " "
                                                   ["MERGE (callback:Callback {name:{callbackname}})"
                                                    "MERGE (invoke:Method {name:{invokename}})"
                                                    "MERGE (invokeinst:MethodInstance {name:{invokename},args:{args}})"
                                                    "MERGE (invoke)<-[:INSTANCE_OF]-(invokeinst)"
                                                    "MERGE (callback)-[:IMPLICIT_INVOKE]->(invokeinst)"])
                                         {:callbackname (str class-name "." callback-name)
                                          :invokename (str invoke-class-name "." invoke-name)
                                          :args args})))))))
                        ;; invokes that descend from Android API
                        (let [path (conj path :descend)]
                          (dorun
                           (for [[api-invoke callback-invokes] (get-in dex path)]
                             (let [api-package-name (:package api-invoke)
                                   api-class-name (:class api-invoke)
                                   api-name (:method api-invoke)]
                               (swap! result conj
                                      (ntx/statement
                                       (str/join " "
                                                 ["MERGE (apipackage:Package {name:{apipackagename}})"
                                                  "MERGE (apiclass:Class {name:{apiclassname}})"
                                                  "MERGE (apiname:Method {name:{apiname}})"
                                                  "MERGE (apipackage)-[:CONTAIN]->(apiclass)-[:CONTAIN]->(apiname)"])
                                       {:apipackagename api-package-name
                                        :apiclassname api-class-name
                                        :apiname (str api-class-name "." api-name)}))
                               (dorun
                                (for [callback-invoke callback-invokes]
                                  (let [invoke-package-name (:package callback-invoke)
                                        invoke-class-name (:class callback-invoke)
                                        invoke-name (:method callback-invoke)]
                                    (swap! result conj
                                           (ntx/statement
                                            (str/join " "
                                                      ["MERGE (apiname:Method {name:{apiname}})"
                                                       "MERGE (invokename:Method {name:{invokename}})"
                                                       "MERGE (apiname)<-[:DESCEND]-(invokename)"])
                                            (merge {:apiname (str api-class-name "." api-name)
                                                    :invokename (str invoke-class-name "." invoke-name)}))))))))))
                        (when-not neo4j-no-callgraph
                          (let [path (conj path :invoke-paths)
                                invoke-paths (get-in dex path)]
                            (when-let [root-node (invoke-path-get-node invoke-paths)]
                              ;; link the root node to the Callback and the Apk
                              (swap! result conj
                                     (ntx/statement
                                      (str/join " "
                                                ["MERGE (apk:Apk {sha256:{apksha256}})"
                                                 "MERGE (callback:Callback {name:{callbackname}})"
                                                 "MERGE (cgnode:CallGraphNode {name:{name},apk:{apksha256},signature:{signature}})"
                                                 "MERGE (apk)-[:CALLGRAPH]->(cgnode)<-[:CALLGRAPH]-(callback)"])
                                      {:apksha256 apk-sha256
                                       :name (invoke-path-get-node-name root-node)
                                       :signature root-node
                                       :callbackname (str class-name "." callback-name)})))
                            (process-worklist
                             #{invoke-paths}
                             (fn [worklist]
                               (let [new-worklist (atom #{})]
                                 (dorun
                                  (for [work worklist]
                                    (let [node (invoke-path-get-node work)
                                          descendants (invoke-path-get-descendants work)
                                          children (map invoke-path-get-node descendants)]
                                      (swap! result conj
                                             (ntx/statement
                                              (str/join " "
                                                        ["MERGE (node:CallGraphNode {name:{name},apk:{apksha256},signature:{signature}})"
                                                         "FOREACH ("
                                                         "child in {children} |"
                                                         "  MERGE (childnode:CallGraphNode {name:child.name,signature:child.signature,apk:{apksha256}})"
                                                         "  MERGE (node)-[:INVOKE]->(childnode)"
                                                         ")"])
                                              {:apksha256 apk-sha256
                                               :name (invoke-path-get-node-name node)
                                               :signature node
                                               :children (map #(let [node %]
                                                                 {:name (invoke-path-get-node-name node)
                                                                  :signature node})
                                                              children)}))
                                      (swap! new-worklist into descendants))))
                                 @new-worklist))))))))))))))         
         @result))
      ;; app components
      (ntx/execute
       conn transaction
       (let [result (atom [])]
         (doseq [comp-type [:activity :service :receiver]]
           (doseq [[comp-name {:keys [intent-filter-action
                                      intent-filter-category]}]
                   (->> manifest
                        comp-type)]
             (let [comp-name (name comp-name)
                   intent-filter-action (map name intent-filter-action)
                   intent-filter-category (map name intent-filter-category)]
               (swap! result conj
                      (ntx/statement
                       (str/join " "
                                 ["MERGE (dex:Dex {sha256:{dexsha256}})"
                                  "MERGE (ic:Class {name:{compname}})"
                                  (format "SET ic:%1$s:Component"
                                          (->> comp-type name str/capitalize))
                                  "MERGE (dex)-[:CONTAIN]->(ic)"
                                  "FOREACH ("
                                  "action IN {intentfilteraction} |"
                                  "  MERGE (n:IntentFilterAction {name:action})"
                                  "  MERGE (n)-[:TRIGGER]->(ic)"
                                  ")"
                                  "FOREACH ("
                                  "category IN {intentfiltercategory} |"
                                  "  MERGE (n:IntentFilterCategory {name:category})"
                                  "  MERGE (n)-[:TRIGGER]->(ic)"
                                  ")"
                                  ])
                       {:dexsha256 dex-sha256
                        :compname comp-name
                        :intentfilteraction intent-filter-action
                        :intentfiltercategory intent-filter-category})))))         
         @result))
      ;; any more query within the transaction?)))

tag an existing Apk node with the tagsuntag an existing Apk node with the tags

(let [common (fn [apk tags
                  {:keys [verbose] :as options}
                  op]
               (when-not (empty? tags)
                 (let [statements (atom [])
                       apk-sha256 (:sha256 apk)]
                   (doseq [[types prop] tags]
                     (swap! statements conj
                            (ntx/statement
                             (str/join " "
                                       ["MATCH (a:Apk {sha256:{apksha256}})"
                                        (format "MERGE (l:%1$s:Tag {id:{prop}.id})"
                                                (->> types
                                                     ;; to satisfy Neo4j identifier requirement
                                                     (map #(-> (str %)
                                                               (str/replace #"\s+" "")
                                                               (str/replace #"-" "_")))
                                                     (str/join ":")))
                                        "SET l={prop}"
                                        "MERGE (l)-[r:TAG]->(a)"
                                        (case op
                                          :untag "DELETE r"
                                          :tag ""
                                          "")])
                             {:apksha256 apk-sha256
                              :prop prop})))
                   (let [conn (connect options)
                         transaction (ntx/begin-tx conn)]
                     (try
                       (ntx/commit conn transaction @statements)
                       (catch Exception e
                         (print-stack-trace-if-verbose e verbose)))))))]
  (defn tag-apk
  [apk tags
   {:keys [] :as options}]
  (common apk tags options :tag))
  (defn untag-apk
  [apk tags
   {:keys [] :as options}]
  (common apk tags options :untag)))

add component callback signature

(defn add-callback-signature
  [apk
   {:keys [verbose] :as options}]
  (let [apk-sha256 (:sha256 apk)
        the-dex (:dex apk)]
    (dorun
     (for [comp-package-name (->> the-dex keys)]
       (do
         (dorun
          (for [comp-class-name (->> (get-in the-dex
                                             [comp-package-name])
                                     keys)]
            (do
              (let [{:keys [android-api-ancestors callbacks]}
                    (->> (get-in the-dex
                                 [comp-package-name
                                  comp-class-name]))]
                (dorun
                 (for [callback-name (->> callbacks keys)]
                   (do
                     (let [path [comp-package-name
                                 comp-class-name
                                 :callbacks
                                 callback-name]]
                       (let [path (conj path :invoke-paths)
                             invoke-paths (get-in the-dex path)]
                         (when invoke-paths
                           (let [cgdfd (compute-cgdfd invoke-paths)
                                 signature (compute-cgdfd-signature cgdfd)]
                             (when signature
                               (let [statements (atom [])]
                                 (swap! statements conj
                                        (ntx/statement
                                         (str/join " "
                                                   ["MATCH (a:Apk {sha256:{apksha256}})"
                                                    "MATCH (cb:Callback {name:{callbackname}})"
                                                    "MERGE (sig:CallbackSignature {name:{callbackname},apk:{apksha256}})"
                                                    "MERGE (a)-[:CALLBACK_SIGNATURE]->(sig)<-[:CALLBACK_SIGNATURE]-(cb)"
                                                    "SET sig.signature={signature}"])
                                         {:apksha256 apk-sha256
                                          :callbackname (str comp-class-name "." callback-name)
                                          :signature signature}))
                                 (let [conn (connect options)
                                       transaction (ntx/begin-tx conn)]
                                   (try
                                     (ntx/commit conn transaction @statements)
                                     (catch Exception e
                                       (print-stack-trace-if-verbose e verbose))))))))))))))))))))))

remove component callback signature

(defn remove-callback-signature
  [apk
   {:keys [verbose] :as options}]
  (let [apk-sha256 (:sha256 apk)]
    (let [statements (atom [])]
      (swap! statements conj
             (ntx/statement
              (str/join " "
                        ["MATCH (a:Apk {sha256:{apksha256}})-[:CALLBACK_SIGNATURE]->(sig:CallbackSignature)"
                         "WITH sig"
                         "MATCH (sig)<-[e:CALLBACK_SIGNATURE]-()"
                         "DELETE sig, e"])
              {:apksha256 apk-sha256}))
      (let [conn (connect options)
            transaction (ntx/begin-tx conn)]
        (try
          (ntx/commit conn transaction @statements)
          (catch Exception e
            (print-stack-trace-if-verbose e verbose)))))))

create index

(defn create-index
  [{:keys []
    :as options}]
  (let [statements (map ntx/statement
                        (map (fn [[label prop]]
                               (str "CREATE INDEX ON :"
                                    label "(" prop ")"))
                             [["SigningKey" "sha256"]
                              ["Apk" "sha256"]
                              ["Dex" "sha256"]
                              ["Permission" "name"]
                              ["Package" "name"]
                              ["Class" "name"]
                              ["Method" "name"]
                              ["MethodInstance" "name"]
                              ["Callback" "name"]
                              ["Activity" "name"]
                              ["Service" "name"]
                              ["Receiver" "name"]
                              ["IntentFilterAction" "name"]
                              ["IntentFilterCategory" "name"]
                              ["AndroidAPI" "name"]
                              ["Tag" "id"]
                              ["CallGraphNode" "apk"]
                              ["CallGraphNode" "name"]
                              ["CallbackSignature" "apk"]
                              ["CallbackSignature" "name"]]))]
    (let [conn (connect options)
          transaction (ntx/begin-tx conn)]
      (ntx/commit conn transaction statements))))

label =~'^(?:com.)?android' nodes as AndroidAPI; should be infrequently used

(defn mark-android-api
  [{:keys []
    :as options}]
  (let [conn (connect options)
        transaction (ntx/begin-tx conn)]
    (ntx/with-transaction
      conn
      transaction
      true
      (ntx/execute conn transaction
                   [(ntx/statement
                     (str/join " "
                               ["MATCH (n)"
                                "WHERE n.name=~{regex}"
                                "SET n:AndroidAPI"])
                     {:regex "L?(?:android\\.|com\\.android\\.|dalvik\\.).*"})]))))

connect to local neo4j server at PORT

(defn connect
  [{:keys [neo4j-port neo4j-protocol
           neo4j-conn-backoff
           verbose]
    :as options
    :or {neo4j-port (:neo4j-port @defaults)
         neo4j-protocol (:neo4j-protocol @defaults)}}]
  (let [port (if neo4j-port neo4j-port (:neo4j-port @defaults))
        protocol (if neo4j-protocol neo4j-protocol (:neo4j-protocol @defaults))
        retry (atom nil)
        conn (atom nil)]
    (loop []
      (try
        (reset! conn (nr/connect (format "%1$s://localhost:%2$d/db/data/" protocol port)))
        (reset! retry false)
        ;; java.io.IOException catches java.net.SocketException and other situations
        (catch java.io.IOException e
          (let [backoff (rand-int neo4j-conn-backoff)]
            (when (and verbose (> verbose 1))
              (binding [*out* *err*]
                (println "Neo4j connection exception, retry in"
                         backoff
                         "seconds")))
            (Thread/sleep (* backoff 1000)))
          (reset! retry true)))
      (when @retry
        (recur)))
    @conn))

test whether NAME is part of Android API

(defn android-api?
  [name]
  (let [name (str name)]
    (re-find #"^L?(?:android\.|com\.android\.|dalvik\.)" name)))
 

recipes for queries

(ns woa.neo4j.recipe
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]]))

declaration

(declare get-app-skeleton
         get-app-by-class-complexity)

implementation

get the skeleton of an app

(defn get-app-skeleton
  [{:keys [sha256 package versionCode versionName]
    :as apk}
   &
   {:keys [return?]
    :as options}]
  (str/join " "
            [(format "MATCH (apk:Apk%1$s)"
                     (if apk
                       (format "{%1$s}"
                               (str/join ","
                                         (->> apk
                                              (map (fn [[k v]]
                                                     (str (name k) ":"
                                                          (format "\"%1$s\""
                                                                  v)))))))
                       ""))
             "OPTIONAL MATCH (signingKey:SigningKey)"
             "-[:SIGN]-> (apk)"
             "-[:CONTAIN]-> (dex:Dex)"
             "-[:CONTAIN]-> (class:Class)"
             "-[:CONTAIN]-> (callback:Callback)"
             "OPTIONAL MATCH (callback) -[:INVOKE]-> (invoke)"
             "OPTIONAL MATCH (explicitInvoke) <-[:EXPLICIT_INVOKE]- (callback) -[:IMPLICIT_INVOKE]-> (implicitInvoke)"
             (if return?
               "RETURN signingKey, apk, dex, class, callback, invoke, explicitInvoke, implicitInvoke"
               "")]))

sort apps by how many component classes they have

(defn get-app-by-class-complexity
  [&
   {:keys [skip limit desc where return?]
    :as options
    :or {limit 5}}]
  (str/join " "
            ["MATCH (apk:Apk)"
             "-[:CONTAIN]-> (:Dex)"
             "-[:CONTAIN]-> (class:Class)"
             "WITH apk, count(class) as cc"
             (if where (format "WHERE %1$s" where) "")
             (if return? "RETURN apk, cc" "")
             "ORDER BY cc"
             (if desc "DESC" "")
             (if limit (format "LIMIT %1$d" limit) "")
             (if skip (format "SKIP %1$d" skip) "")
             ]))
 
(ns woa.util
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; imports)

declaration

(declare process-worklist
         print-stack-trace-if-verbose)

implementation

process worklist until it is empty

process takes a worklist as input, and outputs the new worklist

(defn process-worklist
  [initial-worklist process]
  (loop [worklist initial-worklist]
    (when-not (empty? worklist)
      (recur (process worklist)))))

print-stack-trace Exception e to err if verbose is non-nil

(defn print-stack-trace-if-verbose
  [^Exception e verbose & [level]]
  (when (and verbose
             (or (not level) (> verbose level)))
    (binding [*out* *err*]
      (print-stack-trace e)
      ;; flush is critical for timely output
      (flush))))
 

https://www.virustotal.com/en/documentation/public-api/

(ns woa.virustotal.core
  ;; internal libs
  ;; common libs
  (:require [clojure.string :as str]
            [clojure.set :as set]
            [clojure.walk :as walk]
            [clojure.zip :as zip]
            [clojure.java.io :as io]
            [clojure.pprint :refer [pprint print-table]]
            [clojure.stacktrace :refer [print-stack-trace]])
  ;; special libs
  (:require [clj-http.client :as http-client]
            [clojure.data.json :as json]))

declaration

(declare submit-file
         request-rescan
         make-report-result-into-tags get-report
         ;; plumbing
         jsonfy-map clojurefy-map
         interpret-response-code interpret-status)

implementation

porcelain

submit file for checking

(defn submit-file
  [{:keys [file-content]
    :as resource}
   {:keys [virustotal-apikey]
    :as options}]
  (let [url "https://www.virustotal.com/vtapi/v2/file/scan"]
    ;; basic safety check
    (when (and file-content)
      (let [{:keys [status body]
             :as http-response}
            (http-client/post url
                              {:multipart [{:name "apikey"
                                            :content virustotal-apikey}
                                           {:name "file"
                                            :content file-content}]
                               :throw-exceptions false})
            i-status (interpret-status status)]
        (if (= i-status :status-ok)
          (let [result (->> body json/read-str clojurefy-map)
                {:keys [response-code]} result
                i-response-code (interpret-response-code response-code)]
            (if (= i-response-code :response-ok)
              (assoc-in result [:response-code]
                        i-response-code)
              i-response-code))
          i-status)))))

obtain report result

(defn request-rescan
  [{:keys [md5 sha1 sha256 scan-id]
    :as resource}
   {:keys [virustotal-apikey]
    :as options}]
  (let [url "https://www.virustotal.com/vtapi/v2/file/rescan"]
    (when-let [resource (or md5 sha1 sha256 scan-id)]
      (let [{:keys [status body]
             :as http-response}
            (http-client/post url
                              {:form-params {:apikey virustotal-apikey
                                             :resource resource}
                               :throw-exceptions false})
            i-status (interpret-status status)]
        (if (= i-status :status-ok)
          (let [result (->> body json/read-str clojurefy-map)
                {:keys [response-code]} result
                i-response-code (interpret-response-code response-code)]
            (if (= i-response-code :response-ok)
              (assoc-in result [:response-code]
                        i-response-code)
              i-response-code))
          i-status)))))

make report-result into the form suitable to supply as app :tags on command-line

(defn make-report-result-into-tags
  [report-result]
  (when-let [scans (:scans report-result)]
    (->> scans
         (filter (fn [[_ v]]
                   (get v "detected")))
         (map (fn [[k v]]
                [["Malware" "VirusTotal"]
                 (let [result (get v "result")]
                   (merge (dissoc v "detected")
                          {"id" (str/join "-"
                                          ["malware" k result])
                           "source" k}))]))
         vec)))

obtain report result

(defn get-report
  [{:keys [md5 sha1 sha256 scan-id]
    :as resource}
   {:keys [virustotal-apikey]
    :as options}]
  (let [url "https://www.virustotal.com/vtapi/v2/file/report"]
    (when-let [resource (or md5 sha1 sha256 scan-id)]
      (let [{:keys [status body]
             :as http-response}
            (http-client/post url
                              {:form-params {:apikey virustotal-apikey
                                             :resource resource}
                               :throw-exceptions false})
            i-status (interpret-status status)]
        (if (= i-status :status-ok)
          (let [result (->> body json/read-str clojurefy-map)
                {:keys [response-code]} result
                i-response-code (interpret-response-code response-code)]
            (if (= i-response-code :response-ok)
              (assoc-in result [:response-code]
                        i-response-code)
              i-response-code))
          i-status)))))

plumbing

interpret HTTP response code from VirusTotal

(defn interpret-response-code
  [response-code]
  (case response-code
    0 :response-not-found
    1 :response-ok
    -2 :response-still-queued
    (list :response-code response-code)))

interpret HTTP status from VirusTotal

(defn interpret-status
  [status]
  (case status
    200 :status-ok
    204 :status-exceed-api-limit
    403 :status-exceed-priviledge
    (list :status status)))

use underscored string as key

(defn jsonfy-map
  [the-map]
  (->> the-map
       (map (fn [[k v]]
              [(-> (cond
                     (keyword? k) (name k)
                     :otherwise (str k))
                   (str/replace "-" "_"))
               v]))
       (into {})))

use dashed keyword as key

(defn clojurefy-map
  [the-map]
  (->> the-map
       (map (fn [[k v]]
              [(-> k
                   (str/replace "_" "-")
                   keyword)
               v]))
       (into {})))