Source code for labblouin.homstrad
#!/bin/python
'''
A library to manage interfacing with the Homstrad database raw files.
homstrad Python Library / May 22, 2013 / Alex Safatli
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
E-mail: safatli@cs.dal.ca
Dependencies: IO (LabBlouinTools)
'''
import glob, os, IO
import re as R
[docs]def hasNoFolders(path):
for fi in glob.glob(os.path.join(path,'*')):
if os.path.isdir(fi): return False
return True
[docs]class homstradFolder:
''' Model a single Homstrad folder or set. '''
def __init__(self,foldrname):
self.path = foldrname
self.name = IO.getFileName(foldrname)
self.files = glob.glob(os.path.join(foldrname,'*'))
self._pdb = ''
self.molecules = []
self.sequences = {}
self.__manifest__()
def __manifest__(self):
# Find PIR file.
pirfile = None
pdbfile = None
for fi in self.files:
if not pirfile and fi.endswith('.pir'): pirfile = fi
if not pdbfile and fi.endswith('.pdb'): pdbfile = fi
if not pirfile:
raise IOError('%s did not possess PIR file.' % (self.path))
if not pdbfile:
raise IOError('%s did not possess PDB file.' % (self.path))
self._pdb = pdbfile
# Parse PIR file for sequences.
pirhandle = open(pirfile)
struct, seq = None, ''
for line in pirhandle:
if line.startswith('>'):
if struct:
self.molecules.append(struct)
self.sequences[struct] = seq
struct = None
seq = ''
struct = line.strip('>').split(';')[-1].strip('\n')
elif line.startswith('structure'): pass
elif line == '\n': pass
else: seq += line.strip().strip('*')
if struct:
self.sequences[struct] = seq
self.molecules.append(struct)
pirhandle.close()
# Parse PDB file for name sequence. Ensure consistency.
if len(self.sequences) > 2:
tmp = self.molecules
self.molecules = []
pdbhandle = open(pdbfile)
s = ''
line = pdbhandle.readline()
while line.startswith('REMARK'):
s += line.strip('\n')
line = pdbhandle.readline()
mols = R.findall('\S*\s*chain',s)
for mol in mols: self.molecules.append(mol.split()[0])
pdbhandle.close()
if len(self.molecules) != len(self.sequences):
if len(self.molecules) == 0: self.molecules = tmp
else:
raise IOError('PDB file did not match PIR structure series for %s.' % (
self.path))
[docs] def getNames(self): return self.molecules
[docs] def getNumSequences(self): return len(self.getSequences())
[docs] def getSequences(self): return self.sequences
[docs] def getFiles(self): return self.files
[docs] def getPath(self): return self.path
[docs] def getFASTA(self): return self.getFASTAfor(self.molecules)
[docs] def writeFASTA(self,fi,names=None):
if not names: names = self.sequences
fh = open(fi,'w')
fh.write(self.getFASTAfor(names))
fh.close()
return fi
[docs] def getFASTAfor(self,names):
sequ = ''
out = ''
for name in names:
sequ = ''
seq = self.sequences[name]
for i in xrange(0,len(seq),70): sequ += seq[i:i+70] + '\n'
out += '>%s\n%s' % (name,sequ)
return out
[docs] def getAlignedPDB(self): return self._pdb
[docs] def getPDBs(self):
for mol in self.molecules:
yield os.path.join(self.path,'%s.atm' % (mol))
[docs] def getPDBfor(self,name):
if name in self.molecules:
return os.path.join(self.path,'%s.atm' % (name))
else: return None
[docs] def getSequenceLength(self): return self.getAlignmentLength()
[docs] def getAlignmentLength(self):
return len(self.sequences[self.sequences.keys()[0]])
[docs]class homstradDatabase:
def __init__(self,dbpath,traverse=True):
self.path = dbpath
self.folders = {}
self.failed = []
self.succeeded = []
if (traverse): self.traverse()
def __iter__(self):
for it in self.folders:
yield self.folders[it]
[docs] def traverse(self):
if len(self.folders) == 0:
folders = glob.glob(os.path.join(self.path,'*'))
for folder in folders:
if os.path.isdir(folder) and hasNoFolders(folder):
try:
f = homstradFolder(folder)
self.folders[folder] = f
self.succeeded.append(folder)
except IOError: self.failed.append(folder)
[docs] def getPath(self): return self.path
[docs] def getFolders(self): return self.folders
[docs] def getFailedCount(self): return len(self.failed)
[docs] def getSucceededCount(self): return len(self.succeeded)