allpy
changeset 834:0dc6d90af0cf
Automated merge with ssh://kodomo/allpy
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Mon, 18 Jul 2011 19:26:21 +0400 |
parents | afc8c5adb699 2060c8ef164b |
children | bbe1b872e424 |
files | |
diffstat | 2 files changed, 79 insertions(+), 1 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Sat Jul 16 19:59:16 2011 +0400 1.2 +++ b/allpy/base.py Mon Jul 18 19:26:21 2011 +0400 1.3 @@ -131,7 +131,7 @@ 1.4 markup_class = markups.by_name[kind, name] 1.5 if use_existing and name in self.markups: 1.6 assert self.markups[name].__class__ is markup_class 1.7 - return 1.8 + return self.markups[name] 1.9 assert name not in self.markups 1.10 markup = markup_class(self, name, caller='container', **kws) 1.11 self.markups[name] = markup
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/utils/convert_homology.py Mon Jul 18 19:26:21 2011 +0400 2.3 @@ -0,0 +1,78 @@ 2.4 +#!/usr/bin/python 2.5 +import sys 2.6 +from allpy import protein, homology, processors 2.7 + 2.8 +aln1 = protein.Alignment().append_file(open(sys.argv[1]), format="markup") 2.9 +aln2 = protein.Alignment().append_file(open(sys.argv[2]), format="markup") 2.10 +hmg = open(sys.argv[3]) 2.11 +outfile_name = sys.argv[4] 2.12 + 2.13 +mon_map = {} 2.14 +for seq1, seq2 in zip(aln1.sequences, aln2.sequences): 2.15 + assert seq1.name == seq2.name 2.16 + assert seq1.description == seq2.description 2.17 + seq2.name += "_2" 2.18 + aln = protein.Alignment().append_sequence(seq1).append_sequence(seq2) 2.19 + aln.realign(processors.Muscle()) 2.20 + seq1.add_markup('number') 2.21 + seq2.add_markup('number') 2.22 + for column in aln.columns: 2.23 + if len(column) != 2: 2.24 + continue 2.25 + seq1_number = str(column[seq1].number) 2.26 + seq2_number = str(column[seq2].number) 2.27 + mon_map[seq1.name, seq1_number] = seq2_number 2.28 + 2.29 +col_map = {} 2.30 +aln1_num = aln1.add_markup('number') 2.31 +aln2_num = aln2.add_markup('number') 2.32 +for seq1 in aln1.sequences: 2.33 + for col1 in aln1.columns: 2.34 + num1 = str(aln1_num[col1]) 2.35 + if num1 in col_map: 2.36 + continue 2.37 + if seq1 not in col1: 2.38 + continue 2.39 + key = seq1.name, str(col1[seq1].number) 2.40 + if key not in mon_map: 2.41 + continue 2.42 + num2 = None 2.43 + try: 2.44 + for seq2 in aln2.sequences: 2.45 + if seq2.name.startswith(seq1.name): 2.46 + mon2 = seq2[col1[seq1].number] 2.47 + for col2 in aln2.columns: 2.48 + if col2[seq2] is mon2: 2.49 + num2 = str(aln2_num[col2]) 2.50 + raise Exception 2.51 + except Exception: 2.52 + pass 2.53 + if num2 is not None: 2.54 + col_map[num1] = num2 2.55 + 2.56 +print sorted(col_map.items(), key=lambda (a,b): int(a)) 2.57 + 2.58 +out = open(outfile_name, "w") 2.59 +skipped = set() 2.60 +for line in hmg: 2.61 + line = line.strip() 2.62 + if line == "" or line.startswith("#"): 2.63 + continue 2.64 + class_id, seq_id, monomer_id, column_id = line.split() 2.65 + key = seq_id, monomer_id 2.66 + if key not in mon_map: 2.67 + if key not in skipped: 2.68 + print "Skipping monomer", key 2.69 + skipped.add(key) 2.70 + continue 2.71 + if column_id not in col_map: 2.72 + if column_id not in skipped: 2.73 + print "Skipping column", repr(column_id) 2.74 + skipped.add(column_id) 2.75 + continue 2.76 + monomer_id = mon_map[key] 2.77 + column_id = col_map[column_id] 2.78 + out.write("\t".join([class_id, seq_id, monomer_id, column_id]) + "\n") 2.79 +out.close() 2.80 + 2.81 +# vim: set et ts=4 sts=4 sw=4: