清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import lmdb import os import argparse import gevent import gevent.queue TEMP_PATH="/dev/shm/test" #TEMP_PATH="/dev/shm/" env = lmdb.Environment('./lm_db/',readonly = False, map_size=1048576 * 1024, metasync=False, sync=True, map_async=True) files = os.listdir(TEMP_PATH) queue = gevent.queue.Queue() map(queue.put, files) def lmdbstore(db ,queue = queue): with db.begin(write=True) as lmdb_txn: try: for i in xrange(3000): item = queue.get() key = item value = file(os.path.join(TEMP_PATH, item)).read() lmdb_txn.put( key , value ) print(i, item) except StopIteration as exc: print( str(exc)) def delete(db): with db.begin( write = True ) as txn: cursor = txn.cursor() for idx,data in enumerate( cursor.iternext_nodup() ): txn.delete(data) def query(db): with db.begin( write = True) as txn: cursor = txn.cursor() for idx, data in enumerate( cursor.iternext_nodup()): print(idx,data) if __name__ == '__main__': #query( env ) lmdbstore(env, queue) env.sync(True)