Source code for labblouin.expresso

#!/bin/python

''' An (incomplete) parsing library for output from the T-Coffee Expresso executable.
expresso Python Library / Summer 2013 / Alex Safatli

E-mail: safatli@cs.dal.ca
Dependencies: - '''

[docs]class expressoParser:
    
    ''' Parses Expresso output. '''
    
    def __init__(self,fin):
        self.filein = fin
        self.lines = {}
        self.data = None
        self.__read__()
        
    def __read__(self):
        
        ''' Read an Expresso output file; store the data inside this object. '''
        
        fh = open(self.filein)
        self.data = fh.readlines()
        fh.close()
        for li in self.data:
            splitted = li.split()
            if len(splitted) > 1: name, _, dat = splitted
            elif len(splitted) == 1: 
                name = splitted[0]
                dat = ''
            else: continue
            self.lines[name.strip('>')] = dat.strip()
            
[docs]    def getPDBcodes(self):
        
        ''' Return PDB code and chain as a tuple for each line. '''
        
        return [(x[:-1],x[-1]) for x in self.lines.values() if x]
    
[docs]    def writeNonEmpty(self,fout):
        
        ''' Writes non-empty PDB IDs to file. '''
        
        fh = open(fout,'w')
        for name in self.getNonEmptySequenceNames():
            fh.write('>%s _P_ %s\n' % (name,self.lines[name]))
        fh.close()
        
[docs]    def getNonEmptySequenceNames(self):
        
        ''' Gets non-empty PDB names. '''
        
        return [x for x in self.lines if x]
Navigation

Source code for labblouin.expresso

Quick search

Navigation