Source code for workbench.clients.upload_dir
"""This client pushes a big directory of different files into Workbench."""
import zerorpc
import os
import client_helper
import hashlib
import pprint
[docs]def all_files_in_directory(path):
""" Recursively ist all files under a directory """
file_list = []
for dirname, dirnames, filenames in os.walk(path):
for filename in filenames:
file_list.append(os.path.join(dirname, filename))
return file_list
[docs]def run():
"""This client pushes a big directory of different files into Workbench."""
# Grab server args
args = client_helper.grab_server_args()
# Start up workbench connection
workbench = zerorpc.Client(timeout=300, heartbeat=60)
workbench.connect('tcp://'+args['server']+':'+args['port'])
# Grab all the filenames from the data directory
data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data')
file_list = all_files_in_directory(data_dir)
# Upload the files into workbench
md5_list = []
for path in file_list:
# Skip OS generated files
if '.DS_Store' in path:
continue
with open(path,'rb') as f:
filename = os.path.basename(path)
# Here we're going to save network traffic by asking
# Workbench if it already has this md5
raw_bytes = f.read()
md5 = hashlib.md5(raw_bytes).hexdigest()
md5_list.append(md5)
if workbench.has_sample(md5):
print 'Workbench already has this sample %s' % md5
else:
# Store the sample into workbench
md5 = workbench.store_sample(raw_bytes, filename, 'unknown')
print 'Filename %s uploaded: type_tag %s, md5 %s' % (filename, 'unknown', md5)
# Okay now explode any container types
zip_files = workbench.generate_sample_set('zip')
_foo = workbench.set_work_request('unzip', zip_files); list(_foo) # See Issue #306
pcap_files = workbench.generate_sample_set('pcap')
_foo = workbench.set_work_request('pcap_bro', pcap_files); list(_foo) # See Issue #306
mem_files = workbench.generate_sample_set('mem')
_foo = workbench.set_work_request('mem_procdump', mem_files); list(_foo) # See Issue #306
# Make sure all files are properly identified
print 'Info: Ensuring File Identifications...'
type_tag_set = set()
all_files = workbench.generate_sample_set()
meta_all = workbench.set_work_request('meta', all_files)
for meta in meta_all:
type_tag_set.add(meta['type_tag'])
if meta['type_tag'] in ['unknown', 'own']:
print meta
pprint.pprint(type_tag_set)
[docs]def test():
"""Executes file_upload test."""
run()
if __name__ == '__main__':
run()