Source code for labblouin.mdsa

#!/bin/python

''' 
A library to manage interfacing with the MDSA database raw files (http://dna.cs.byu.edu/mdsas/index.shtml).

mdsa Python Library / Oct 8, 2014 / Alex Safatli

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

E-mail: safatli@cs.dal.ca
Dependencies: IO, FASTAnet (LabBlouinTools)

'''

import os, FASTAnet, IO
from glob import glob as files

mdsaTypes = ['smart','balibase','oxbench','prefab']

[docs]class mdsaAlignment: ''' Model a single MDSA alignment. ''' def __init__(self,finame): self.path = finame self.name = IO.getFileName(finame) self.fasta = FASTAnet.FASTAstructure(self.path,uniqueOnly=False)
[docs] def getObject(self): return self.fasta
[docs] def getNames(self): return self.fasta.getSequenceNames()
[docs] def getNumSequences(self): return len(self.getSequences())
[docs] def getSequences(self): return self.fasta.getSequences()
[docs] def getPath(self): return self.path
[docs] def writeFASTA(self,fi,names=None): if not names: self.fasta.writeFile(fi) return fi fh = open(fi,'w') fh.write(self.getFASTAfor(names)) fh.close() return fi
[docs] def getFASTAfor(self,names): sequ = '' out = '' for name in names: sequ = '' seq = str(self.fasta.getSequenceByName(name)) for i in xrange(0,len(seq),70): sequ += seq[i:i+70] + '\n' out += '>%s\n%s' % (name,sequ) return out
[docs] def getSequenceLength(self): return self.getAlignmentLength()
[docs] def getAlignmentLength(self): return len(self.fasta.sequences[self.fasta.sequences.keys()[0]])
[docs]class mdsaDatabase: def __init__(self,dbpath,traverse=True): self.path = dbpath self.files = {} if (traverse): self.traverse() def __iter__(self): for it in self.files: yield self.files[it]
[docs] def traverse(self): if len(self.files) == 0: fis = files(os.path.join(self.path,'*')) for fi in fis: f = mdsaAlignment(fi) self.files[fi] = f
[docs] def getPath(self): return self.path
[docs] def getFiles(self): return self.files