Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/e1346615e468
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 08:14:02 2012
Кодировка:

# HG changeset patch
# User Andrei
# Date 1310406941 -14400
# Node ID e1346615e468b67e736c5caefc6b2b0773400e97
# Parent 9032fd191b1c2b5be7774d6a8746b3ec1055041b
Methods to construct homology (without writing) added

diff -r 9032fd191b1c -r e1346615e468 allpy/homology.py
--- a/allpy/homology.py Mon Jul 11 18:27:37 2011 +0400
+++ b/allpy/homology.py Mon Jul 11 21:55:41 2011 +0400
@@ -2,42 +2,42 @@
from allpy import markups

class MonomerHomology(object):
- """ Essentially, alignment is a set of monomer homology classes
- Object of type MonomerHomology desribes an alignment in terms of monomer homology
- Currently, identifies of monomers and monomer homology classes are used
-
+ """ Essentially, alignment is a set of monomer homology classes.
+ Object of type MonomerHomology desribes an alignment in terms of monomer homology.
Each monomer of any sequence from an alignment must be contained in a homology class

- Homology of nonequal monomers of the same sequence is allowed (formally),
+ Homology of nonequal monomers of the same sequence is allowed (formally) in attributes,
but there is no implemented methods to deal with

DATA:
monomer_ids = {monomer_id:class_id}
- classes = {class_id:[monomer_id1, monomer_id2]}
- monomer_id = (sequence_name, monomer_number)
+ monomer_id = (sequence_name, monomer_number)
+ monomer_number is a number of the monomer in the sequence
class_id = string
- columns = {class_id:column_number} optional
-
+ classes = {class_id:[monomer_id1, monomer_id2,...], ...}
+ columns = {class_id:column_number}
blocks_data = [block_data, ...]
block_data = (sequence_names,class_ids)
sequence_names = set( [sequence_1_name,...] )
class_ids = = set( [class_1_id, ...] )

- BLOCK is assigned to an alignment (currently)
- Here block is not a block of an alignment, but a set of data to create such block
- On the other hand, block consists of class_ids (not columns) to have possibility to get monomer_id
- Thus, block reaarengement must be accomplished with classes reassignments!
- Not very good, but currently is made as above described
+ block_data contain identifiers of objects only, alignment object is not needed.
+ block_data may be converted to bloks of an alignment by appropriate method

METHODS:
- + .read(file_name)
- + .write_monomer(file,monomer_id,monomer_homology_class_id)
- + .write_class(file,list_of_monomer_ids,monomer_homology_class_id)
- + .write_block(file, block)
+ + .read(file_name, columns = True)
+ Reads file of monomer homology format into MonomerHomology object.
+ + .write_monomer(file,monomer_id,monomer_homology_class_id, column_number = column_number) STATIC
+ + .write_class(file,list_of_monomer_ids,monomer_homology_class_id,column_number = column_number) STATIC
+ + .write_block(file, alignment, block)
+ + .add_monomer(monomer_id,class_id,column_number)
+ + .add_class(monomer_ids,class_id, column_number)
+ + .add_block(alignment, block)
+ .compare_with(class)
+ + .two_vs_one(classes_one, classes_two) STATIC
+ .highest_blocks()
+ .alignment_blocks(alignment)
- .case_homology
+ + .case_homology(in_file_name,out_file_name, case) STATIC
"""
def __init__(self, next_class_id = 1):
self.classes = {}
@@ -46,6 +46,7 @@
self.blocks_data = []
self.next_class_id = next_class_id

+################################################################

def read(self, file_name, columns = False):
"""
@@ -106,6 +107,8 @@
f.close()
return monomers_count

+################################################################
+
@staticmethod
def two_vs_one(classes_one, classes_two):
""" Computes weighted average number of mistakes of classes_two with respect to classes_one
@@ -170,8 +173,44 @@

return (mistakes_two_vs_one, mistakes_one_vs_two)

+################################################################
+
@staticmethod
- def write_monomer(file,monomer_id,class_id, column_number = False):
+ def write_class(file,monomer_ids,class_id, column_number = False):
+ """ Writes list of monomer_ids forming one homology_class
+ """
+ for monomer_id in monomer_ids:
+ MonomerHomology.write_monomer(file,monomer_id,class_id, column_number = column_number)
+
+
+ def add_class(self,monomer_ids,class_id, column_number):
+ """ Adds list of monomer_ids forming one homology_class
+ """
+ for monomer_id in monomer_ids:
+ self.add_monomer(monomer_id,class_id, column_number)
+
+
+
+
+#################################################################
+
+ def add_monomer(self,monomer_id,class_id,column_number):
+ """ Add one monomer to Homology
+ """
+ if len(monomer_id) != 2:
+ raise Exception("wrong parameters given for Monomer_homology.write_monomer: len(monomer_id) is not 2!")
+ exit()
+ self.monomer_ids[monomer_id] = class_id
+
+ if class_id in self.classes:
+ self.classes[class_id].append(monomer_id)
+ else:
+ self.classes[class_id] = [monomer_id]
+
+ self.columns[class_id] = column_number
+
+ @staticmethod
+ def write_monomer(file, monomer_id, class_id, column_number = False):
""" Write a line "class_id sequence_id monome number \n" into file
"""
if len(monomer_id) != 2:
@@ -186,17 +225,41 @@
except:
raise Exception("Failed to write monomer into file!")
exit()
- return None

- @staticmethod
- def write_class(file,monomer_ids,class_id, column_number = False):
- """ Writes list of monomer_ids forming one homology_class
+
+#################################################################
+
+ def add_block(self,alignment, block):
+ """ Add classes of plus-block to Homology
"""
- for monomer_id in monomer_ids:
- MonomerHomology.write_monomer(file,monomer_id,class_id, column_number = column_number)
-
- #######################3#
- def write_block(self,file,block,markedup = False):
+ for column in block.columns:
+ if len(column) == 0:
+ continue
+
+ try:
+ column_number = alignment.markups['number'][column]
+ except:
+ raise Exception("Alignment column has no number! Alignment must be marked up by numbers before .add_block")
+ exit()
+
+ for sequence in block.sequences:
+ if sequence in column:
+
+ try:
+ monomer_id = (sequence.name, column[sequence].number)
+ except:
+ raise Exception("Monomer has no number! Sequences must be marked up by numbers before .writ_block")
+ exit()
+
+ self.add_monomer(monomer_id,self.next_class_id, column_number)
+
+ self.next_class_id +=1
+
+
+
+
+
+ def write_block(self,file,alignment, block, markedup = True):
""" Writes each block column into as one homology class
WARRNINGS:
(1) method works in object of class MonomerHomology
@@ -212,7 +275,7 @@
continue

if markedup:
- column_number = block.markups['number'][column]
+ column_number = alignment.markups['number'][column]
else:
column_number = False

@@ -229,8 +292,8 @@

self.next_class_id +=1

-
- #######################3#
+#################################################################
+
def _sequences_of_class(self,monomer_ids):
""" RETURN set of sequences from a list of monomer_ids
"""
@@ -297,6 +360,8 @@

return self.blocks_data

+################################################################
+
def alignment_blocks(self, alignment):
""" From homology.blocks creates a list of alignment blocks (accessible for visualization)

@@ -346,24 +411,27 @@

block = protein.Block.from_alignment(alignment, sequences = block_sequences, columns = block_columns)
blocks.append(block)
-
return blocks

+#################################################################
@staticmethod
def case_homology(in_file_name,out_file_name, case):
- """ Makes homology file from case sensitive input alignment
+ """ Makes homology file from input alignment
+ case = True => Upper letters in a column are in one class, each lower letter in separate class
+ case = False => all letter in a column are in one class
+
RETURN number of classes
"""
try:
f = open(in_file_name)
except:
- raise Exception("Failed to open file %s" % fasta_file_name)
+ raise Exception("Failed to open file %s" % in_file_name)
exit()
try:
alignment = protein.Alignment().append_file(f)
f.close()
except:
- raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name)
+ raise Exception("Failed to create alignment from file %s!" % in_file_name)
exit()
try:
g = open(out_file_name, 'w')
@@ -406,4 +474,8 @@
g.close()
return next_class_id - 1

+################################################################

+
+
+