Source code for labblouin.expresso
#!/bin/python
''' An (incomplete) parsing library for output from the T-Coffee Expresso executable.
expresso Python Library / Summer 2013 / Alex Safatli
E-mail: safatli@cs.dal.ca
Dependencies: - '''
[docs]class expressoParser:
''' Parses Expresso output. '''
def __init__(self,fin):
self.filein = fin
self.lines = {}
self.data = None
self.__read__()
def __read__(self):
''' Read an Expresso output file; store the data inside this object. '''
fh = open(self.filein)
self.data = fh.readlines()
fh.close()
for li in self.data:
splitted = li.split()
if len(splitted) > 1: name, _, dat = splitted
elif len(splitted) == 1:
name = splitted[0]
dat = ''
else: continue
self.lines[name.strip('>')] = dat.strip()
[docs] def getPDBcodes(self):
''' Return PDB code and chain as a tuple for each line. '''
return [(x[:-1],x[-1]) for x in self.lines.values() if x]
[docs] def writeNonEmpty(self,fout):
''' Writes non-empty PDB IDs to file. '''
fh = open(fout,'w')
for name in self.getNonEmptySequenceNames():
fh.write('>%s _P_ %s\n' % (name,self.lines[name]))
fh.close()
[docs] def getNonEmptySequenceNames(self):
''' Gets non-empty PDB names. '''
return [x for x in self.lines if x]