-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMalwareBazaar_AMP_Transfer.py
300 lines (234 loc) · 10.6 KB
/
MalwareBazaar_AMP_Transfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
from file_locations import all_simple_custom_detections_url, amp_main_file, logger, \
main_mb_df_file, main_simple_custom_detections_url, mb_url, sha_output_files
import gc
import os
import pandas as pd
import requests
from requests.auth import HTTPBasicAuth
import time
# Create start time
start_time = time.time()
# Make df more reader friendly in 'Run' windows
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('expand_frame_repr', False)
# Add MalwareBazaar API key below
api_key = ''
# Add AMP API Username & Password below
amp_api_username = ''
amp_api_pw = ''
tags = [
'AgentTesla',
'AZORult',
'Babuk',
'BatLoader',
'BlackBasta',
'BlackCat',
'BruteRatel',
'BumbleBee',
'CobaltStrike',
'Emotet',
'FormBook',
'Hive',
'IceID',
'Lockbit',
'Lokibot',
'Luna',
'Magniber',
'Metasploit',
'Meterpreter',
'Mimikatz',
'Nighthawk',
'NJRat',
'Qbot',
'Raccoon',
'RedLine',
'Remcos',
'Sliver',
'SOCGholish',
'Trickbot',
'Ursnif',
'Venus',
'WannaCry'
]
def pull_from_malware_bazaar_api():
# Loop through MalwareBazaar tags and normalize data
for tag in tags:
try:
headers = {'API-KEY': api_key}
data = {'query': 'get_taginfo',
'tag': tag,
'limit': '1000'}
response = requests.post(mb_url, headers=headers, data=data)
# Transform get request into json format.
data = response.json()['data']
except Exception as e:
logger.error(f'Failed to pull data from Malware Bazaar API for {tag}. Most likely no results found.')
logger.error(f'Exception is {e} for {tag}')
try:
# Create dataframe after normalizing
df = pd.json_normalize(data)
# If empty ignore it
if df.shape[0] == 1:
logger.warning(f'No matches found for {tag}!')
# If not empty
else:
# Keep only SHA-256 hashes
df = df['sha256_hash']
# Create name of the SHA-256 output file on the fly
sha_output_file = os.path.join(os.environ['USERPROFILE'], 'Desktop',
'Python_Manipulated_Files', 'MalwareBazaar', tag + '.txt')
# Create txt file on the fly
df.to_csv(sha_output_file, index=False, header=0)
logger.info(f'Successfully sent text file to folder. Total count is {df.shape[0]} for {tag}.')
except Exception as e:
logger.error(f'Failed to send text file to folder for {tag}')
logger.error(f'Exception is {e} for {tag}')
continue
# Calculate how long script has taken until this point.
logger.info(f'Code took {((time.time() - start_time) / 60):.3f} minutes to execute so far.')
logger.info(f'------------------------------------------------------------------------------------------')
def create_master_df_from_malware_bazaar():
# Delete main_df file if it exists
try:
os.remove(main_mb_df_file)
logger.info(f'Successfully removed main_df_file!')
except Exception as e:
logger.warning(f'main_df file not found! Exception is {e}.')
# Create main df
main_df = pd.DataFrame()
# Start loop to aggregate MalwareBazaar hashes
for filename in os.listdir(sha_output_files):
f = os.path.join(sha_output_files, filename)
# Check if file
if os.path.isfile(f):
# This file should not exist yet. Thus ignore it if it does
if filename != 'AA_Quarantine_&_Prevent_Install.txt':
df = pd.read_csv(f, header=None)
# Adding how many rows
logger.info(f'{filename} is adding another {df.shape[0]} rows to main df.')
# Append df to main_df
main_df = main_df.append(df)
# Delete df reference then free up memory via garbage collection
del df
gc.collect()
# Main df has this many rows
logger.info(f'{main_df.shape[0]} after appending {filename}.')
# Send aggregated dataframe to directory
main_df.to_csv(main_mb_df_file, index=False, header=0)
logger.info(f'Successfully created AA_Quarantine_&_Prevent_Install in directory!')
logger.info(f'Code took {((time.time() - start_time) / 60):.3f} minutes to execute so far.')
logger.info(f'------------------------------------------------------------------')
def pull_main_hash_list():
# Use the request library to pull data. Pass in id and key or you will receive 401 error
request = requests.get(main_simple_custom_detections_url,
auth=HTTPBasicAuth(username=amp_api_username, password=amp_api_pw))
# Pull data in json format. Slice data based off 'data' element
json_response = request.json()
data = request.json()['data']['items']
# Create dataframe
all_hashes_in_amp_df = pd.json_normalize(data)
# Just pull the SHA256 column
all_hashes_in_amp_df = all_hashes_in_amp_df['sha256']
# Build base dataframe before breaking out data in items column
while 'next' in json_response['metadata']['links']:
next_url = json_response['metadata']['links']['next']
request = requests.get(next_url, auth=HTTPBasicAuth(username=amp_api_username, password=amp_api_pw))
json_response = request.json()
data = request.json()['data']['items']
df_append = pd.json_normalize(data)
df_append = df_append['sha256']
all_hashes_in_amp_df = all_hashes_in_amp_df.append(df_append, ignore_index=True)
logger.info(f'The shape of the SHA-256 AMP df is {all_hashes_in_amp_df.shape[0]}')
# Send amp df to txt file
all_hashes_in_amp_df.to_csv(amp_main_file, index=False, header=False)
logger.info(f'Code took {((time.time() - start_time) / 60):.3f} minutes to execute so far.')
logger.info(f'------------------------------------------------------------------')
# Return all_hashes_in_amp_df
return all_hashes_in_amp_df
def compare_differences(all_hashes_in_amp_df):
# Create AMP set off main df
amp_sha_set = set(all_hashes_in_amp_df.unique())
# Loop through directory to start pruning unique hashes
for filename in os.listdir(sha_output_files):
f = os.path.join(sha_output_files, filename)
# Check if file
if os.path.isfile(f):
# Create df per file
df = pd.read_csv(f, header=None)
# Add column name to pull set
df.columns = ['sha256']
# Create Malware Bazaar Set
malware_bazaar_set = set(df['sha256'])
# Find IPs missing from AMP by using set logic to compare MalwareBazaar with AMP.
missing_sha_from_amp = malware_bazaar_set - amp_sha_set
# Convert set to list
missing_sha_list = list(missing_sha_from_amp)
missing_sha_df = pd.DataFrame(missing_sha_list)
# missing_sha_df.rename(columns={0: 'sha256'}, inplace=True)
# If set is not empty
if missing_sha_from_amp:
missing_sha_df.to_csv(f, index=False, header=False)
logger.info(f'The following hashes from {filename} will be added: {missing_sha_from_amp}')
logger.info(f'\n')
else:
# If set is empty remove file
os.remove(f)
logger.info(f'Code took {((time.time() - start_time) / 60):.3f} minutes to execute so far.')
logger.info(f'------------------------------------------------------------------')
def insert_amp_sha256():
"""
Imports SHA_256 hashes into their respective buckets in AMP's Simple Custom Detection Lists.
"""
# Create requests for all Simple Custom Detection GUIDs for loop
request = requests.get(all_simple_custom_detections_url,
auth=HTTPBasicAuth(username=amp_api_username, password=amp_api_pw))
# Pull data in json format. Slice data based off 'data' element
# json_response = request.json()
data = request.json()['data']
# Create dataframe
amp_df = pd.json_normalize(data)
counter = 0
# Iterate over MalwareBazaar directory
for filename in os.listdir(sha_output_files):
f = os.path.join(sha_output_files, filename)
# Check if file
if os.path.isfile(f):
# Remove '.txt' so file so files names match in AMP and directory
name_of_file = filename.replace('.txt', '')
logger.info(name_of_file)
# Create df from file
df = pd.read_csv(f, header=None)
# Add header
df.columns = ['sha256']
# Query AMP df
new_amp_df = amp_df.loc[amp_df['name'] == name_of_file]
# Reset index to pull the correct GUID
new_amp_df = new_amp_df.reset_index(drop=True)
# Pull GUID to loop through
try:
guid = new_amp_df.guid[0]
logger.info(f'Guid is {guid}')
del new_amp_df
except Exception as e:
logger.warning(f'Exception is {e}. GUID is {guid}')
# This will hit the production environment!
for index, row in enumerate(df.itertuples(index=False)):
# Add hash to Simple Custom Detections
temporary_url = f'https://api.amp.cisco.com/v1/file_lists/{guid}/files/{row.sha256}'
# Send post request
requests.post(temporary_url, auth=(amp_api_username, amp_api_pw))
logger.info(f'Successfully updated {guid} with hash {row.sha256}')
counter += 1
# # Delete df reference then free up memory via garbage collection
del df
gc.collect()
logger.info(f'Total hashes added = {counter}')
logger.info(f'Code took {((time.time() - start_time) / 60):.3f} minutes to execute in its entirety.')
logger.info(f'------------------------------------------------------------------')
if __name__ == '__main__':
pull_from_malware_bazaar_api()
create_master_df_from_malware_bazaar()
all_hashes_in_amp_df = pull_main_hash_list()
compare_differences(all_hashes_in_amp_df)
insert_amp_sha256()