Tuesday, June 27, 2017

Python - MD5 Every File on your System, Filtering by File Extension

I wanted to get an MD5 for all the files on my (clean) Windows 7 machine, and compare them to a database of known malicious files to see if there were any matches.  This script dumps a file of the format <hash>: <filename>.



import os, sys, hashlib, re

def md5(fname):
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()
    
    
    
def md5Path(path):
    try:
        dir1 = os.walk(path)
        for root, dirs, files in dir1:
            for eachFile in files:
                fileName = root + "\\" + eachFile
                # change the regex to look for certain file types
                hit = re.search('\.(exe|dll|vbs|bat|doc.|xls.|ppt.|pdf|com|sys|bat|js|vbe)$', fileName)
                if hit:
                    try:
                        f.write( md5(fileName) + ": " + fileName + "\n")
                    except Exception as e:
                        print "Inner Exception.  " + str(e) + " on " + fileName
                else:
                    continue
    except Exception as e:
        print "Outer Exception. " + str(e) + " on " + fileName
            
f=open('base_md5.txt', 'w')
md5Path(sys.argv[1])
f.close()

No comments:

Post a Comment