Merge pull request #2358 from BjarniRunar/develop

Created dirty-db-cleaner.py - prunes old history from dirty.db files
This commit is contained in:
John McLear 2014-12-23 15:50:59 +00:00
commit 1ab7dfdb7b
1 changed files with 45 additions and 0 deletions

45
bin/dirty-db-cleaner.py Executable file
View File

@ -0,0 +1,45 @@
#!/usr/bin/python -u
#
# Created by Bjarni R. Einarsson, placed in the public domain. Go wild!
#
import json
import os
import sys
try:
dirtydb_input = sys.argv[1]
dirtydb_output = '%s.new' % dirtydb_input
assert(os.path.exists(dirtydb_input))
assert(not os.path.exists(dirtydb_output))
except:
print
print 'Usage: %s /path/to/dirty.db' % sys.argv[0]
print
print 'Note: Will create a file named dirty.db.new in the same folder,'
print ' please make sure permissions are OK and a file by that'
print ' name does not exist already. This script works by omitting'
print ' duplicate lines from the dirty.db file, keeping only the'
print ' last (latest) instance. No revision data should be lost,'
print ' but be careful, make backups. If it breaks you get to keep'
print ' both pieces!'
print
sys.exit(1)
dirtydb = {}
lines = 0
with open(dirtydb_input, 'r') as fd:
print 'Reading %s' % dirtydb_input
for line in fd:
lines += 1
data = json.loads(line)
dirtydb[data['key']] = line
if lines % 10000 == 0:
sys.stderr.write('.')
print
print 'OK, found %d unique keys in %d lines' % (len(dirtydb), lines)
with open(dirtydb_output, 'w') as fd:
for data in dirtydb.values():
fd.write(data)
print 'Wrote data to %s. All done!' % dirtydb_output