Merge pull request #50 from awest1339/celery

Celery
mitre · Oct 16, 2017 · 8f8efcc · 8f8efcc
2 parents 6ef81de + fbfc057
commit 8f8efcc
Show file tree

Hide file tree

Showing 30 changed files with 1,351 additions and 521 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,6 +11,7 @@ report.json
 __pycache__/
 *.py[cod]
 *.swp
+*.swo
 # C extensions
 *.so
 *.dll
@@ -61,5 +62,6 @@ target/
 # Sqlite DB
 sqlite.db
 task_db
+testing.db
 # Tmp Upload Dir
 utils/tmp/
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,13 @@
+sudo: required
+language: python
+python:
+  - "2.7"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+install:
+  - yes "" | sudo -HE ./install.sh
+  - pip install -r requirements.txt
+  - python multiscanner.py init
+script:
+  - pytest
diff --git a/README.md b/README.md
@@ -1,5 +1,6 @@
 MultiScanner
 ============
+[![Build Status](https://travis-ci.org/mitre/multiscanner.svg)](https://travis-ci.org/mitre/multiscanner)
 
 Introduction
 ------------
@@ -15,8 +16,8 @@ options can be found in [docs/modules.md](docs/modules.md)
 
 Requirements
 ------------
-Python 2.7 is recommended. Compatibility with 2.7+ and
-3.3+ is supported but not thoroughly maintained and tested. Please submit an issue
+Python 3.6 is recommended. Compatibility with 2.7+ and
+3.4+ is supported but not as thoroughly maintained and tested. Please submit an issue
 or a pull request fixing any issues found with other versions of Python.
 
 

diff --git a/analytics/ssdeep_compare.py b/analytics/ssdeep_compare.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+
+'''
+Simple implementation of ssdeep comparisions using a few optimizations
+described at the links below
+
+https://www.virusbulletin.com/virusbulletin/2015/11/optimizing-ssdeep-use-scale
+http://www.intezer.com/intezer-community-tip-ssdeep-comparisons-with-elasticsearch/
+
+Designed to be run on a regular basis (e.g., nightly).
+
+For each sample that has not run ssdeep analytic, search for samples where
+ssdeep.compare > 0 based on chunksize, chunk 7grams, and double-chunk 7grams.
+Update sample with any matches and mark ssdeep analytic as having run
+'''
+
+import sys
+import os
+import argparse
+import requests
+import json
+import ssdeep
+import configparser
+
+MS_WD = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if os.path.join(MS_WD, 'storage') not in sys.path:
+    sys.path.insert(0, os.path.join(MS_WD, 'storage'))
+if MS_WD not in sys.path:
+    sys.path.insert(0, os.path.join(MS_WD))
+
+import multiscanner
+import common
+import elasticsearch_storage
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Script to interact with Multiscanner\'s '
+        'Elasticsearch datastore to run ssdeep.compare using a few '
+        'optimizations based on ssdeep hash structure.')
+    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
+                        help='Increase output to stdout')
+    args = parser.parse_args()
+
+    storage_conf = multiscanner.common.get_config_path(multiscanner.CONFIG, 'storage')
+    config_object = configparser.SafeConfigParser()
+    config_object.optionxform = str
+    config_object.read(storage_conf)
+    conf = common.parse_config(config_object)
+    storage_handler = multiscanner.storage.StorageHandler(configfile=storage_conf)
+    es_handler = None
+    for handler in storage_handler.loaded_storage:
+        if isinstance(handler, elasticsearch_storage.ElasticSearchStorage):
+            es_handler = handler
+            break
+
+    if not es_handler:
+        print('[!] ERROR: This analytic only works with ES stroage module.')
+        sys.exit(0)
+
+    # probably not ideal...
+    ES = es_handler.es
+    INDEX = conf['ElasticSearchStorage']['index']
+    DOC_TYPE = 'sample'
+
+    # get all of the samples where ssdeep_compare has not been run
+    # e.g., ssdeepmeta.analyzed == false
+    query = {
+        '_source': ['ssdeep', 'SHA256'],
+        'query': {
+            'bool': {
+                'must': [
+                    { 'match': { 'ssdeep.analyzed': 'false' }}
+                ]
+            }
+        }
+    }
+
+    page = ES.search(
+        INDEX,
+        scroll='2m',
+        size=1000,
+        body=query)
+
+    records_list = []
+    while len(page['hits']['hits']) > 0:
+        for hit in page['hits']['hits']:
+            records_list.append(hit)
+        sid = page['_scroll_id']
+        page = ES.scroll(scroll_id=sid, scroll='2m')
+
+    for new_ssdeep_hit in records_list:
+        new_ssdeep_hit_src = new_ssdeep_hit.get('_source')
+        chunksize = new_ssdeep_hit_src.get('ssdeep').get('chunksize')
+        chunk = new_ssdeep_hit_src.get('ssdeep').get('chunk')
+        double_chunk = new_ssdeep_hit_src.get('ssdeep').get('double_chunk')
+        new_sha256 = new_ssdeep_hit_src.get('SHA256')
+
+        # build new query for docs that match our optimizations
+        # https://github.com/intezer/ssdeep-elastic/blob/master/ssdeep_elastic/ssdeep_querying.py#L35
+        opti_query = {
+            '_source': ['ssdeep', 'SHA256'],
+            'query': {
+                'bool': {
+                    'must': [
+                        {
+                            'terms': {
+                                'ssdeep.chunksize': [chunksize, chunksize / 2, chunksize * 2]
+                            }
+                        },
+                        {
+                            'bool': {
+                                'should': [
+                                    {
+                                        'match': {
+                                            'ssdeep.chunk': {
+                                                'query': chunk
+                                            }
+                                        }
+                                    },
+                                    {
+                                        'match': {
+                                            'ssdeep.double_chunk': {
+                                                'query': double_chunk
+                                            }
+                                        }
+                                    }
+                                ],
+                                'minimum_should_match': 1
+                            }
+                        },
+                        {
+                            'bool': {
+                                'must_not': {
+                                    'match': {
+                                        'SHA256': new_sha256
+                                    }
+                                }
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+
+        # this bool condition isn't working how I expect
+        #   if we have already updated the match dictionary to
+        #   include a hit, don't rerun it for the inverse
+                        # {
+                        #     'bool': {
+                        #         'must_not': {
+                        #             'exists': {
+                        #                 'field': 'ssdeep.matches.' + new_sha256
+                        #             }
+                        #         }
+                        #     }
+                        # }
+
+        opti_page = ES.search(
+            INDEX,
+            scroll='2m',
+            size=1000,
+            body=opti_query)
+
+        while len(opti_page['hits']['hits']) > 0:
+            # for each hit, ssdeep.compare != 0; update the matches
+            for opti_hit in opti_page['hits']['hits']:
+                opti_hit_src = opti_hit.get('_source')
+                opti_sha256 = opti_hit_src.get('SHA256')
+                result = ssdeep.compare(
+                            new_ssdeep_hit_src.get('ssdeep').get('ssdeep_hash'),
+                            opti_hit_src.get('ssdeep').get('ssdeep_hash'))
+
+                if args.verbose:
+                    print(
+                        new_ssdeep_hit_src.get('SHA256'),
+                        opti_hit_src.get('SHA256'),
+                        result)
+
+                msg = { 'doc': { 'ssdeep': { 'matches': { opti_sha256: result } } } }
+                ES.update(
+                    index=INDEX,
+                    doc_type=DOC_TYPE,
+                    id=new_ssdeep_hit.get('_id'),
+                    body=json.dumps(msg))
+
+                msg = { 'doc': { 'ssdeep': { 'matches': { new_sha256: result } } } }
+                ES.update(
+                    index=INDEX,
+                    doc_type=DOC_TYPE,
+                    id=opti_hit.get('_id'),
+                    body=json.dumps(msg))
+
+                opti_sid = opti_page['_scroll_id']
+                opti_page = ES.scroll(scroll_id=opti_sid, scroll='2m')
+
+        # analytic has run against sample, set ssdeep.analyzed = true
+        msg = { 'doc': { 'ssdeep': { 'analyzed': 'true'} } }
+        ES.update(
+            index=INDEX,
+            doc_type=DOC_TYPE,
+            id=new_ssdeep_hit.get('_id'),
+            body=json.dumps(msg))
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/modules.md b/docs/modules.md
@@ -52,9 +52,6 @@ This module uses a FireEye AX to scan the files. It uses the Malware Repository
 - **good path** - The folder name where good files are put
 - **cheatsheet** - Not implemented yet
 
-### [KasperskyScan] ###
-This module scans a file with Kaspersky anti-virus 15.
-
 ### [MD5] ###
 This module generates the MD5 hash of the files.
 

diff --git a/install.sh b/install.sh
@@ -54,22 +54,42 @@ read -p "Download TrID? <y/N> " prompt
 if [[ $prompt == "y" ]]; then
   mkdir -p /opt/trid
   cd /opt/trid
-  curl http://mark0.net/download/trid_linux_64.zip > trid.zip
+  curl -f --retry 3 http://mark0.net/download/trid_linux_64.zip > trid.zip
+  if [[ $? -ne 0 ]]; then
+    echo -e "\nFAILED\nTrying alternative mirror ..."
+    curl -f --retry 3 https://web.archive.org/web/20170711171339/http://mark0.net/download/trid_linux_64.zip > trid.zip
+  fi
   unzip trid.zip
   rm -f trid.zip
-  curl http://mark0.net/download/triddefs.zip > triddefs.zip
+  curl -f --retry 3 http://mark0.net/download/triddefs.zip > triddefs.zip
+  if [[ $? -ne 0 ]]; then
+    echo -e "\nFAILED\nTrying alternative mirror ..."
+    curl -f --retry 3 https://web.archive.org/web/20170827141200/http://mark0.net/download/triddefs.zip > triddefs.zip
+  fi
   unzip triddefs.zip
   rm -f triddefs.zip
   chmod 755 trid
   cd $CWD
 fi
 
+read -p "Download FLOSS? <y/N> " prompt
+if [[ $prompt == "y" ]]; then
+  curl -f --retry 3 https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/linux/dist/floss > /opt/floss
+  chmod 755 /opt/floss
+fi
+
 read -p "Download yararules.com signatures? <y/N> " prompt
 if [[ $prompt == "y" ]]; then
   git clone --depth 1 https://github.com/Yara-Rules/rules.git $DIR/etc/yarasigs/Yara-Rules
   echo You can update these signatures by running cd $DIR/etc/yarasigs/Yara-Rules \&\& git pull
 fi
 
+read -p "Download SupportIntelligence's Icewater yara signatures? <y/N> " prompt
+if [[ $prompt == "y" ]]; then
+  git clone --depth 1 https://github.com/SupportIntelligence/Icewater.git $DIR/etc/yarasigs/Icewater
+  echo You can update these signatures by running cd $DIR/etc/yarasigs/Icewater \&\& git pull
+fi
+
 read -p "Would you like to install MultiScanner as a system library? <y/N> " prompt
 if [[ $prompt == "y" ]]; then
   pip install -e $DIR