Merge pull request #44 from awest1339/master

Latest stable version of feature-celery
mitre · Sep 29, 2017 · 6ef81de · 6ef81de
2 parents 14f0cc9 + 8f2d7cb
commit 6ef81de
Show file tree

Hide file tree

Showing 19 changed files with 838 additions and 228 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 config.ini
 storage.ini
 api_config.ini
+web_config.ini
 
 #Default report location
 report.json

diff --git a/libs/common.py b/libs/common.py
@@ -16,6 +16,7 @@
 except:
     SSH = False
 
+
 def load_module(name, path):
     """
     Loads a module by filename and path. Returns module object
@@ -31,14 +32,16 @@ def load_module(name, path):
         print(e)
     return loaded_mod
 
+
 def list2cmdline(list):
     """
     This is used to overwrite the default subprocess list2cmdline function.
-    
+
     The default subprocess list2cmdline function on windows messes with quotes arguments. This will not
     """
     return ' '.join(list)
 
+
 def convert_encoding(data, encoding='UTF-8', errors='replace'):
     """
     Converts dicts, lists, and strs to the encoding. It uses data.decode to do this.
@@ -65,6 +68,7 @@ def convert_encoding(data, encoding='UTF-8', errors='replace'):
     else:
         return data
 
+
 def parse_config(config_object):
     """Take a config object and returns it as a dictionary"""
     return_var = {}
@@ -78,24 +82,26 @@ def parse_config(config_object):
         return_var[section] = section_dict
     return return_var
 
-def get_storage_config_path(config_file):
-    """Gets the location of the storage config file from the multiscanner config file"""
+
+def get_config_path(config_file, component):
+    """Gets the location of the config file for the given multiscanner component
+    from the multiscanner config file
+
+    Components:
+        storage
+        api
+        web"""
     conf = configparser.SafeConfigParser()
     conf.read(config_file)
     conf = parse_config(conf)
     try:
-        return conf['main']['storage-config']
+        return conf['main']['%s-config' % component]
     except KeyError:
-        print("ERROR: Couldn't find 'storage-config' value in 'main' section "\
-              "of config file. Have you run 'python multiscanner.py init'?")
+        print("ERROR: Couldn't find '%s-config' value in 'main' section "
+              "of config file. Have you run 'python multiscanner.py init'?"
+              % component)
         sys.exit()
 
-def get_api_config_path(config_file):
-    """Gets the location of the API config file from the multiscanner config file"""
-    conf = configparser.SafeConfigParser()
-    conf.read(config_file)
-    conf = parse_config(conf)
-    return conf['main']['api-config']
 
 def dirname(path):
     """OS independent version of os.path.dirname"""
@@ -106,6 +112,7 @@ def dirname(path):
         split = path.split('\\')
         return '\\'.join(split[:-1])
 
+
 def basename(path):
     """OS independent version of os.path.basename"""
     if path.endswith('/') or path.endswith('\\'):
@@ -116,11 +123,12 @@ def basename(path):
     else:
         split = path.split('\\')
         return split[-1]
-
+
+
 def parseDir(directory, recursive=False):
     """
     Returns a list of files in a directory.
-    
+
     dir - The directory to search
     recursive - If true it will recursively find files.
     """
@@ -138,11 +146,12 @@ def parseDir(directory, recursive=False):
             else:
                 filelist.append(item)
     return filelist
-
+
+
 def parseFileList(FileList, recursive=False):
     """
     Takes a list of files and directories and returns a list of files.
-    
+
     FileList - A list of files and directories. Files in each directory will be returned
     recursive - If true it will recursively find files in directories.
     """
@@ -159,20 +168,21 @@ def parseFileList(FileList, recursive=False):
             pass
     return filelist
 
+
 def chunk_file_list(filelist, cmdlength=7191):
     """
     Takes the file list and splits it into chunks so windows won't break. Returns a list of lists of strings.
-    
+
     filelist - The list to be chunked
     cmdlength - Max length of all filenames appended to each other
     """
-    #This fixes if the cmd line would be far too long
-    #8191 is the windows limit
+    # This fixes if the cmd line would be far too long
+    # 8191 is the windows limit
     filechunks = []
     if len(list2cmdline(filelist)) >= cmdlength:
         filechunks.append(filelist[:len(filelist)/2])
         filechunks.append(filelist[len(filelist)/2:])
-        #Keeps splitting chunks until all are correct size
+        # Keeps splitting chunks until all are correct size
         splitter = True
         while splitter:
             splitter = False
@@ -186,17 +196,19 @@ def chunk_file_list(filelist, cmdlength=7191):
         filechunks = [filelist]
     return filechunks
 
+
 def queue2list(queue):
     """Takes a queue a returns a list of the elements in the queue."""
     list = []
     while not queue.empty():
         list.append(queue.get())
     return list
-
+
+
 def hashfile(fname, hasher, blocksize=65536):
     """
     Hashes a file in chunks and returns the hash algorithms digest.
-    
+
     fname - The file to be hashed
     hasher - The hasher from hashlib. E.g. hashlib.md5()
     blocksize - The size of each block to read in from the file
@@ -209,22 +221,24 @@ def hashfile(fname, hasher, blocksize=65536):
     afile.close()
     return hasher.hexdigest()
 
+
 def sshconnect(hostname, port=22, username=None, password=None, pkey=None, key_filename=None, timeout=None, allow_agent=True, look_for_keys=True, compress=False, sock=None):
     """A wrapper for paramiko, returns a SSHClient after it connects."""
     client = paramiko.SSHClient()
     client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
     client.connect(hostname, port=port, username=username, password=password, pkey=pkey, key_filename=key_filename, timeout=timeout, allow_agent=allow_agent, look_for_keys=look_for_keys, compress=compress, sock=sock)
     return client
-
+
+
 def sessionexec(client, cmd):
     """Creates a session object and executes a command. Returns the session object"""
     session = client.get_transport().open_session()
     session.exec_command(cmd)
     return session
-
+
+
 def sshexec(hostname, cmd, port=22, username=None, password=None, key_filename=None):
     """Connects and runs a command. Returns the contents of stdin."""
     client = sshconnect(hostname, port=port, username=username, password=password, key_filename=key_filename)
     stdin, stdout, stderr = client.exec_command(cmd)
     return stdout.read()
-
diff --git a/modules/Antivirus/Metadefender.py b/modules/Antivirus/Metadefender.py
@@ -118,7 +118,7 @@ def _parse_scan_result(response):
             overall_results = response_json.get("scan_results", {})
             scan_details = overall_results.get("scan_details", {})
             engine_results = []
-            for engine_name, engine_output in scan_details.iteritems():
+            for engine_name, engine_output in scan_details.items():
                 scan_code = engine_output.get("scan_result_i", MD_UNKNOWN_SCAN_RES)
                 scan_result_string = MD_SCAN_RES_CODES[scan_code]
                 engine_result = {'engine_name': engine_name,

diff --git a/modules/Signature/YaraScan.py b/modules/Signature/YaraScan.py
@@ -76,7 +76,7 @@ def scan(filelist, conf=DEFAULTCONF):
         finally:
             f.close()
         if hit:
-            hlist = []
+            hdict = {}
             for h in hit:
                 if not set(h.tags).intersection(set(conf["ignore-tags"])):
                     hit_dict = {
@@ -85,9 +85,13 @@ def scan(filelist, conf=DEFAULTCONF):
                         'rule'      : h.rule,
                         'tags'      : h.tags,
                     }
-                    hlist.append(hit_dict)
-            hlist = sorted(hlist, key=itemgetter('rule'))
-            matches.append((m, hlist))
+                    try:
+                        h_key = '{}:{}'.format(hit_dict['namespace'].split('/')[-1], hit_dict['rule'])
+                    except IndexError:
+                        h_key = '{}'.format(hit_dict['rule'])
+                    hdict[h_key] = hit_dict
+            matches.append((m, hdict))
+
 
     metadata = {}
     rulelist = list(ruleset)

diff --git a/multiscanner.py b/multiscanner.py
@@ -51,6 +51,7 @@
     "group-types": ["Antivirus"],
     "storage-config": os.path.join(MS_WD, 'storage.ini'),
     "api-config": os.path.join(MS_WD, 'api_config.ini'),
+    "web-config": os.path.join(MS_WD, 'web_config.ini'),
 }
 
 VERBOSE = False

diff --git a/storage/elasticsearch_storage.py b/storage/elasticsearch_storage.py
@@ -97,8 +97,21 @@ def setup(self):
                 name=CUCKOO_TEMPLATE_NAME,
                 body=json.dumps(template)
             )
-        if not es_indices.exists(self.index):
+
+        # Try to create the index, pass if it exists
+        try:
             es_indices.create(self.index)
+        except TransportError:
+            pass
+
+        # Set the total fields limit
+        try:
+            es_indices.put_settings(
+                index=self.index,
+                body={'index.mapping.total_fields.limit': ES_MAX},
+            )
+        except TransportError:
+            pass
 
         # Create parent-child mappings if don't exist yet
         mappings = es_indices.get_mapping(index=self.index)[self.index]['mappings'].keys()
@@ -327,13 +340,11 @@ def search(self, query_string, search_type='default'):
         '''Run a Query String query and return a list of sample_ids associated
         with the matches. Run the query against all document types.
         '''
-        print(search_type)
         if search_type == 'advanced':
             query = self.build_query(query_string)
         else:
             es_reserved_chars_re = '([\+\-=\>\<\!\(\)\{\}\[\]\^\"\~\*\?\:\\/ ])'
             query_string = re.sub(es_reserved_chars_re, r'\\\g<1>', query_string)
-            print(query_string)
             if search_type == 'default':
                 query = self.build_query("*" + query_string + "*")
             elif search_type == 'exact':

diff --git a/storage/sql_driver.py b/storage/sql_driver.py
@@ -153,12 +153,13 @@ def db_session_scope(self):
         finally:
             ses.close()
 
-    def add_task(self, task_id=None, task_status='Pending', sample_id=None):
+    def add_task(self, task_id=None, task_status='Pending', sample_id=None, timestamp=None):
         with self.db_session_scope() as ses:
             task = Task(
                 task_id=task_id,
                 task_status=task_status,
                 sample_id=sample_id,
+                timestamp=timestamp,
             )
             try:
                 ses.add(task)