Source code for labblouin.pdbCompare
#!/bin/python
# pdbCompare.py
# -------------------------
# May 16, 2012; Alex Safatli
# -------------------------
# Functions to help in comparing
# two sets of PDB files.
#
# Makes use of a special sort of md5
# signature in order to uniquely identify
# a PDB file by acquiring ATOM lines with
# alpha-carbons (CA).
#
# Can be run from command line with two
# arguments.
import pfam
import glob
import os
import md5
import sys
[docs]def getChecksum(pdb_file):
'''
Given a PDB file, retrieve a
unique checksum.
'''
if not os.path.isfile(pdb_file):
return None
f_in = open(pdb_file, 'r')
checksum = md5.new()
for line in f_in:
if 'ATOM' in line and ' CA ' in line:
checksum.update(line.strip())
return checksum.digest()
[docs]def pdbCrosscheck(folder1, folder2):
'''
Given two folder paths, determine what PDBs
are unique amongst the two
'''
if (not os.path.isdir(folder1)) or \
(not os.path.isdir(folder2)):
return None
notunique = []
notunique_in_1 = []
notunique_in_2 = []
pdbList1 = glob.glob(os.path.join(folder1, '*.pdb'))
pdbList2 = glob.glob(os.path.join(folder2, '*.pdb'))
for pdb1 in pdbList1:
for pdb2 in pdbList2:
if getChecksum(pdb1) == getChecksum(pdb2):
notunique.append(pdb1)
notunique.append(pdb2)
for pdb in pdbList1:
if pdb in notunique:
notunique_in_1.append(pdb)
for pdb in pdbList2:
if pdb in notunique:
notunique_in_2.append(pdb)
return (notunique_in_1,notunique_in_2, folder1, folder2)
[docs]def outputResults(pdbcrosscheck_results, fpath=None):
compare = pdbcrosscheck_results
str_out = 'All non-unique PDBs in %s:\n%s\n' % (compare[2], \
pfam.list2txttable(compare[0], compare[2]))
str_out += 'All non-unique PDBs in %s:\n%s\n' % (compare[3], \
pfam.list2txttable(compare[1], compare[3]))
if fpath is not None:
fout = open(fpath, 'w')
fout.write(str_out)
fout.close()
return str_out
# ----------------- If run from command-line -------------------- #
if __name__ == "__main__":
if len(sys.argv) != 3:
print 'Usage: pdbCompare.py folder1 folder2\n'
exit()
folder1 = sys.argv[1]
folder2 = sys.argv[2]
compare = pdbCrosscheck(folder1, folder2)
if compare is None:
raise SystemError('Error encountered with crosscheck.')
exit()
for c in xrange(len(compare[0])):
compare[0][c] = os.path.splitext(os.path.basename(compare[0][c]))[0]
for c in xrange(len(compare[1])):
compare[1][c] = os.path.splitext(os.path.basename(compare[1][c]))[0]
print outputResults(compare)