Source code for labblouin.mdsa
#!/bin/python
'''
A library to manage interfacing with the MDSA database raw files (http://dna.cs.byu.edu/mdsas/index.shtml).
mdsa Python Library / Oct 8, 2014 / Alex Safatli
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
E-mail: safatli@cs.dal.ca
Dependencies: IO, FASTAnet (LabBlouinTools)
'''
import os, FASTAnet, IO
from glob import glob as files
mdsaTypes = ['smart','balibase','oxbench','prefab']
[docs]class mdsaAlignment:
''' Model a single MDSA alignment. '''
def __init__(self,finame):
self.path = finame
self.name = IO.getFileName(finame)
self.fasta = FASTAnet.FASTAstructure(self.path,uniqueOnly=False)
[docs] def getObject(self): return self.fasta
[docs] def getNames(self): return self.fasta.getSequenceNames()
[docs] def getNumSequences(self): return len(self.getSequences())
[docs] def getSequences(self): return self.fasta.getSequences()
[docs] def getPath(self): return self.path
[docs] def writeFASTA(self,fi,names=None):
if not names:
self.fasta.writeFile(fi)
return fi
fh = open(fi,'w')
fh.write(self.getFASTAfor(names))
fh.close()
return fi
[docs] def getFASTAfor(self,names):
sequ = ''
out = ''
for name in names:
sequ = ''
seq = str(self.fasta.getSequenceByName(name))
for i in xrange(0,len(seq),70): sequ += seq[i:i+70] + '\n'
out += '>%s\n%s' % (name,sequ)
return out
[docs] def getSequenceLength(self): return self.getAlignmentLength()
[docs] def getAlignmentLength(self):
return len(self.fasta.sequences[self.fasta.sequences.keys()[0]])
[docs]class mdsaDatabase:
def __init__(self,dbpath,traverse=True):
self.path = dbpath
self.files = {}
if (traverse): self.traverse()
def __iter__(self):
for it in self.files:
yield self.files[it]
[docs] def traverse(self):
if len(self.files) == 0:
fis = files(os.path.join(self.path,'*'))
for fi in fis:
f = mdsaAlignment(fi)
self.files[fi] = f
[docs] def getPath(self): return self.path
[docs] def getFiles(self): return self.files