Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/757f2a1f8732
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 07:19:48 2012
Кодировка:

# HG changeset patch
# User Boris Burkov
# Date 1274459440 -14400
# Node ID 757f2a1f87324f41925783f061eac36b679f9a48

Initial commit with the lib modules

diff -r 000000000000 -r 757f2a1f8732 lib/block.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/block.py Fri May 21 20:30:40 2010 +0400
@@ -0,0 +1,57 @@
+#!usr/bin/python
+
+import sys
+
+import configure
+import project
+import sequence
+import monomer
+import rooted_tree
+from Lgamma import Lgamma
+
+class Block(object):
+ """
+ Mandatory data:
+ * self.project -- project object, which the block belongs to
+ * self.sequences - list of sequence objects, that contain monomers
+ and/or gaps, that constitute the block
+ * self.positions -- positions of the project.alignment that are
+ included in the block
+
+ How to create a new block:
+ >>> import project
+ >>> import block
+ >>> proj = project.Project(open("test.fasta"))
+ >>> block1 = block.Block(proj, proj.sequences, range(len(proj.alignment[proj.sequences[0]])))
+
+ """
+
+ def __init__(self,project,sequences,positions):
+ self.project=project
+ self.sequences=sequences
+ self.positions=positions
+
+ def to_fasta(self,file):
+ """writes the block as an alignment in fasta-format into the file.
+
+ No changes in the names, descriptions or order of the sequences
+ are made.
+
+ """
+ for sequence in self.sequences:
+ file.write(">%s %s\n"%(sequence.name,sequence.description))
+ string_index=0
+ for position in self.positions:
+ if string_index>=60:
+ file.write("\n")
+ string_index=0
+ if self.project.alignment[sequence][position]==None:
+ file.write("-")
+ string_index+=1
+ else:
+ file.write(self.project.alignment[sequence][position].code)
+ string_index+=1
+ file.write("\n")
+
+
+
diff -r 000000000000 -r 757f2a1f8732 lib/monomer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/monomer.py Fri May 21 20:30:40 2010 +0400
@@ -0,0 +1,5 @@
+#!/usr/bin/python
+
+class Monomer(object):
+ def __init__(self,code):
+ self.code=code
diff -r 000000000000 -r 757f2a1f8732 lib/project.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/project.py Fri May 21 20:30:40 2010 +0400
@@ -0,0 +1,90 @@
+#!/usr/bin/python
+
+"""
+ "I will not use abbrev."
+ "I will always finish what I st"
+ - Bart Simpson
+
+"""
+
+import configure
+import sequence
+import monomer
+
+
+class Project(object):
+ """
+ Mandatory data:
+ * sequences -- list of Sequence objects. Sequences don't contain gaps
+ - see sequence.py module
+ * alignment -- dict
+ {:[,None,]}
+ keys are the Sequence objects, values are the lists, which
+ contain monomers of those sequences or None for gaps in the
+ corresponding sequence of
+ alignment
+
+ """
+ def __init__(self, *args):
+ """overloaded constructor
+
+ Project() -> new empty Project
+ Project(sequences, alignment) -> new Project with sequences and
+ alignment initialized from arguments
+ Project(fasta_file) -> new Project, read alignment and sequences
+ from fasta file
+
+ """
+ if len(args)>1:#overloaded constructor
+ self.sequences=args[0]
+ self.alignment=args[1]
+ elif len(args)==0:
+ self.sequences=[]
+ self.alignment={}
+ else:
+ self.sequences,self.alignment=Project.get_from_fasta(args[0])
+
+ @staticmethod
+ def get_from_fasta(file):
+ """
+ >>> import project
+ >>> sequences,alignment=project.Project.get_from_fasta(open("test.fasta"))
+ """
+ import re
+
+ sequences=[]
+ alignment={}
+
+ content=file.read()
+ raw_sequences=content.split(">")[1:]#ignore everything before the first >
+ for raw in raw_sequences:
+ parsed_raw_sequence = raw.split("\n")
+ for counter,piece in enumerate(parsed_raw_sequence):
+ parsed_raw_sequence[counter]=piece.strip()#cut \r or whitespaces
+ name_and_description = parsed_raw_sequence[0]
+ if len(name_and_description.split(" ",1))==2:
+ name,description=name_and_description.split(" ",1)
+ elif len(name_and_description.split(" ",1))==1:#if there is description
+ name=name_and_description
+ else:
+ raise "Wrong name of sequence in fasta file"
+ string=""
+ for piece in parsed_raw_sequence[1:]:
+ piece_without_whitespace_chars=re.sub("\s","",piece)
+ string+=piece_without_whitespace_chars
+ monomers=[]#convert into Monomer objects
+ alignment_list=[]#create the respective list in alignment dict
+ for current_monomer in string:
+ if current_monomer!="-" and current_monomer!="." and current_monomer!="~":
+ monomers.append(monomer.Monomer(current_monomer))
+ alignment_list.append(monomers[-1])
+ else:
+ alignment_list.append(None)
+ if "description" in vars():#if there's no description
+ sequences.append(sequence.Sequence(name,description,monomers))
+ else:
+ sequences.append(sequence.Sequence(name,None,monomers))
+ alignment[sequences[-1]]=alignment_list
+ return sequences,alignment
+
+
diff -r 000000000000 -r 757f2a1f8732 lib/sequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/sequence.py Fri May 21 20:30:40 2010 +0400
@@ -0,0 +1,9 @@
+#!/usr/bin/python
+import monomer
+
+class Sequence(object):
+ def __init__(self, name, description, monomers):
+ self.name=name
+ self.description=description
+ self.monomers=monomers
+