Source code for workbench.clients.pe_indexer

"""This client pushes PE Files -> ELS Indexer."""
import zerorpc
import os
import pprint
import client_helper

[docs]def run(): """This client pushes PE Files -> ELS Indexer.""" # Grab server args args = client_helper.grab_server_args() # Start up workbench connection workbench = zerorpc.Client(timeout=300, heartbeat=60) workbench.connect('tcp://'+args['server']+':'+args['port']) # Test out PEFile -> strings -> indexer -> search data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../data/pe/bad') file_list = [os.path.join(data_path, child) for child in os.listdir(data_path)][:20] for filename in file_list: # Skip OS generated files if '.DS_Store' in filename: continue with open(filename, 'rb') as f: base_name = os.path.basename(filename) md5 = workbench.store_sample(f.read(), base_name, 'exe') # Index the strings and features output (notice we can ask for any worker output) # Also (super important) it all happens on the server side. workbench.index_worker_output('strings', md5, 'strings', None) print '\n<<< Strings for PE: %s Indexed>>>' % (base_name) workbench.index_worker_output('pe_features', md5, 'pe_features', None) print '<<< Features for PE: %s Indexed>>>' % (base_name) # Well we should execute some queries against ElasticSearch at this point but as of # version 1.2+ the dynamic scripting disabled by default, see # 'http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-scripting.html#_enabling_dynamic_scripting # Now actually do something interesing with our ELS index # ES Facets are kewl (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-facets.html) facet_query = '{"facets" : {"tag" : {"terms" : {"field" : "string_list"}}}}' results = workbench.search_index('strings', facet_query) try: print '\nQuery: %s' % facet_query print 'Number of hits: %d' % results['hits']['total'] print 'Max Score: %f' % results['hits']['max_score'] pprint.pprint(results['facets']) except TypeError: print 'Probably using a Stub Indexer, if you want an ELS Indexer see the readme' # Fuzzy is kewl (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html) fuzzy_query = '{"fields":["md5","sparse_features.imported_symbols"],' \ '"query": {"fuzzy" : {"sparse_features.imported_symbols" : "loadlibrary"}}}' results = workbench.search_index('pe_features', fuzzy_query) try: print '\nQuery: %s' % fuzzy_query print 'Number of hits: %d' % results['hits']['total'] print 'Max Score: %f' % results['hits']['max_score'] pprint.pprint([(hit['fields']['md5'], hit['fields']['sparse_features.imported_symbols']) for hit in results['hits']['hits']]) except TypeError: print 'Probably using a Stub Indexer, if you want an ELS Indexer see the readme'
[docs]def test(): """Executes pe_strings_indexer test.""" run()
if __name__ == '__main__': run()