# -*- coding: utf-8 -*- import hashlib sum = [] j = 0 f = open("gen_md5.txt", "a") for i in xrange(1000000000): tmp = (hashlib.md5(str(i)).hexdigest(),i) sum.append(tmp) j = j+1 if(j==10000000): for i in sum: f.write("{0} {1}".format(i,"\n")) j=0 sum = [] f.close()
使用
1
cat gen_md5.txt | grep \(\'54d7ed
匹配前几位,grep支持正则-E很方便。
或者使用python匹配
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# -*- coding: utf-8 -*-
class Load_Corpus_with_Iteration(object): # 使用迭代器读取 def __init__(self, path): self.path = path def __iter__(self): for line in open(self.path): yield line.split() corpus = Load_Corpus_with_Iteration('gen_md5.txt') for item in corpus: # print item if(item[0].startswith("('3322cf")): print item