Skip to content

Commit

Permalink
Merge pull request #50 from awest1339/celery
Browse files Browse the repository at this point in the history
Celery
  • Loading branch information
Drewsif authored Oct 16, 2017
2 parents 6ef81de + fbfc057 commit 8f8efcc
Show file tree
Hide file tree
Showing 30 changed files with 1,351 additions and 521 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ report.json
__pycache__/
*.py[cod]
*.swp
*.swo
# C extensions
*.so
*.dll
Expand Down Expand Up @@ -61,5 +62,6 @@ target/
# Sqlite DB
sqlite.db
task_db
testing.db
# Tmp Upload Dir
utils/tmp/
13 changes: 13 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
sudo: required
language: python
python:
- "2.7"
- "3.4"
- "3.5"
- "3.6"
install:
- yes "" | sudo -HE ./install.sh
- pip install -r requirements.txt
- python multiscanner.py init
script:
- pytest
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
MultiScanner
============
[![Build Status](https://travis-ci.org/mitre/multiscanner.svg)](https://travis-ci.org/mitre/multiscanner)

Introduction
------------
Expand All @@ -15,8 +16,8 @@ options can be found in [docs/modules.md](docs/modules.md)

Requirements
------------
Python 2.7 is recommended. Compatibility with 2.7+ and
3.3+ is supported but not thoroughly maintained and tested. Please submit an issue
Python 3.6 is recommended. Compatibility with 2.7+ and
3.4+ is supported but not as thoroughly maintained and tested. Please submit an issue
or a pull request fixing any issues found with other versions of Python.


Expand Down
205 changes: 205 additions & 0 deletions analytics/ssdeep_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/usr/bin/env python

'''
Simple implementation of ssdeep comparisions using a few optimizations
described at the links below
https://www.virusbulletin.com/virusbulletin/2015/11/optimizing-ssdeep-use-scale
http://www.intezer.com/intezer-community-tip-ssdeep-comparisons-with-elasticsearch/
Designed to be run on a regular basis (e.g., nightly).
For each sample that has not run ssdeep analytic, search for samples where
ssdeep.compare > 0 based on chunksize, chunk 7grams, and double-chunk 7grams.
Update sample with any matches and mark ssdeep analytic as having run
'''

import sys
import os
import argparse
import requests
import json
import ssdeep
import configparser

MS_WD = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if os.path.join(MS_WD, 'storage') not in sys.path:
sys.path.insert(0, os.path.join(MS_WD, 'storage'))
if MS_WD not in sys.path:
sys.path.insert(0, os.path.join(MS_WD))

import multiscanner
import common
import elasticsearch_storage


def main():
parser = argparse.ArgumentParser(description='Script to interact with Multiscanner\'s '
'Elasticsearch datastore to run ssdeep.compare using a few '
'optimizations based on ssdeep hash structure.')
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
help='Increase output to stdout')
args = parser.parse_args()

storage_conf = multiscanner.common.get_config_path(multiscanner.CONFIG, 'storage')
config_object = configparser.SafeConfigParser()
config_object.optionxform = str
config_object.read(storage_conf)
conf = common.parse_config(config_object)
storage_handler = multiscanner.storage.StorageHandler(configfile=storage_conf)
es_handler = None
for handler in storage_handler.loaded_storage:
if isinstance(handler, elasticsearch_storage.ElasticSearchStorage):
es_handler = handler
break

if not es_handler:
print('[!] ERROR: This analytic only works with ES stroage module.')
sys.exit(0)

# probably not ideal...
ES = es_handler.es
INDEX = conf['ElasticSearchStorage']['index']
DOC_TYPE = 'sample'

# get all of the samples where ssdeep_compare has not been run
# e.g., ssdeepmeta.analyzed == false
query = {
'_source': ['ssdeep', 'SHA256'],
'query': {
'bool': {
'must': [
{ 'match': { 'ssdeep.analyzed': 'false' }}
]
}
}
}

page = ES.search(
INDEX,
scroll='2m',
size=1000,
body=query)

records_list = []
while len(page['hits']['hits']) > 0:
for hit in page['hits']['hits']:
records_list.append(hit)
sid = page['_scroll_id']
page = ES.scroll(scroll_id=sid, scroll='2m')

for new_ssdeep_hit in records_list:
new_ssdeep_hit_src = new_ssdeep_hit.get('_source')
chunksize = new_ssdeep_hit_src.get('ssdeep').get('chunksize')
chunk = new_ssdeep_hit_src.get('ssdeep').get('chunk')
double_chunk = new_ssdeep_hit_src.get('ssdeep').get('double_chunk')
new_sha256 = new_ssdeep_hit_src.get('SHA256')

# build new query for docs that match our optimizations
# https://github.com/intezer/ssdeep-elastic/blob/master/ssdeep_elastic/ssdeep_querying.py#L35
opti_query = {
'_source': ['ssdeep', 'SHA256'],
'query': {
'bool': {
'must': [
{
'terms': {
'ssdeep.chunksize': [chunksize, chunksize / 2, chunksize * 2]
}
},
{
'bool': {
'should': [
{
'match': {
'ssdeep.chunk': {
'query': chunk
}
}
},
{
'match': {
'ssdeep.double_chunk': {
'query': double_chunk
}
}
}
],
'minimum_should_match': 1
}
},
{
'bool': {
'must_not': {
'match': {
'SHA256': new_sha256
}
}
}
}
]
}
}
}

# this bool condition isn't working how I expect
# if we have already updated the match dictionary to
# include a hit, don't rerun it for the inverse
# {
# 'bool': {
# 'must_not': {
# 'exists': {
# 'field': 'ssdeep.matches.' + new_sha256
# }
# }
# }
# }

opti_page = ES.search(
INDEX,
scroll='2m',
size=1000,
body=opti_query)

while len(opti_page['hits']['hits']) > 0:
# for each hit, ssdeep.compare != 0; update the matches
for opti_hit in opti_page['hits']['hits']:
opti_hit_src = opti_hit.get('_source')
opti_sha256 = opti_hit_src.get('SHA256')
result = ssdeep.compare(
new_ssdeep_hit_src.get('ssdeep').get('ssdeep_hash'),
opti_hit_src.get('ssdeep').get('ssdeep_hash'))

if args.verbose:
print(
new_ssdeep_hit_src.get('SHA256'),
opti_hit_src.get('SHA256'),
result)

msg = { 'doc': { 'ssdeep': { 'matches': { opti_sha256: result } } } }
ES.update(
index=INDEX,
doc_type=DOC_TYPE,
id=new_ssdeep_hit.get('_id'),
body=json.dumps(msg))

msg = { 'doc': { 'ssdeep': { 'matches': { new_sha256: result } } } }
ES.update(
index=INDEX,
doc_type=DOC_TYPE,
id=opti_hit.get('_id'),
body=json.dumps(msg))

opti_sid = opti_page['_scroll_id']
opti_page = ES.scroll(scroll_id=opti_sid, scroll='2m')

# analytic has run against sample, set ssdeep.analyzed = true
msg = { 'doc': { 'ssdeep': { 'analyzed': 'true'} } }
ES.update(
index=INDEX,
doc_type=DOC_TYPE,
id=new_ssdeep_hit.get('_id'),
body=json.dumps(msg))

if __name__ == '__main__':
main()
3 changes: 0 additions & 3 deletions docs/modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ This module uses a FireEye AX to scan the files. It uses the Malware Repository
- **good path** - The folder name where good files are put
- **cheatsheet** - Not implemented yet

### [KasperskyScan] ###
This module scans a file with Kaspersky anti-virus 15.

### [MD5] ###
This module generates the MD5 hash of the files.

Expand Down
24 changes: 22 additions & 2 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,42 @@ read -p "Download TrID? <y/N> " prompt
if [[ $prompt == "y" ]]; then
mkdir -p /opt/trid
cd /opt/trid
curl http://mark0.net/download/trid_linux_64.zip > trid.zip
curl -f --retry 3 http://mark0.net/download/trid_linux_64.zip > trid.zip
if [[ $? -ne 0 ]]; then
echo -e "\nFAILED\nTrying alternative mirror ..."
curl -f --retry 3 https://web.archive.org/web/20170711171339/http://mark0.net/download/trid_linux_64.zip > trid.zip
fi
unzip trid.zip
rm -f trid.zip
curl http://mark0.net/download/triddefs.zip > triddefs.zip
curl -f --retry 3 http://mark0.net/download/triddefs.zip > triddefs.zip
if [[ $? -ne 0 ]]; then
echo -e "\nFAILED\nTrying alternative mirror ..."
curl -f --retry 3 https://web.archive.org/web/20170827141200/http://mark0.net/download/triddefs.zip > triddefs.zip
fi
unzip triddefs.zip
rm -f triddefs.zip
chmod 755 trid
cd $CWD
fi

read -p "Download FLOSS? <y/N> " prompt
if [[ $prompt == "y" ]]; then
curl -f --retry 3 https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/linux/dist/floss > /opt/floss
chmod 755 /opt/floss
fi

read -p "Download yararules.com signatures? <y/N> " prompt
if [[ $prompt == "y" ]]; then
git clone --depth 1 https://github.com/Yara-Rules/rules.git $DIR/etc/yarasigs/Yara-Rules
echo You can update these signatures by running cd $DIR/etc/yarasigs/Yara-Rules \&\& git pull
fi

read -p "Download SupportIntelligence's Icewater yara signatures? <y/N> " prompt
if [[ $prompt == "y" ]]; then
git clone --depth 1 https://github.com/SupportIntelligence/Icewater.git $DIR/etc/yarasigs/Icewater
echo You can update these signatures by running cd $DIR/etc/yarasigs/Icewater \&\& git pull
fi

read -p "Would you like to install MultiScanner as a system library? <y/N> " prompt
if [[ $prompt == "y" ]]; then
pip install -e $DIR
Expand Down
Loading

0 comments on commit 8f8efcc

Please sign in to comment.