allpy
changeset 833:2060c8ef164b
Added script utils/convert_homology.py to convert homology files for similar alignments.
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Mon, 18 Jul 2011 19:26:03 +0400 |
parents | 241c4dfb1383 |
children | 0dc6d90af0cf |
files | utils/convert_homology.py |
diffstat | 1 files changed, 78 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/utils/convert_homology.py Mon Jul 18 19:26:03 2011 +0400 1.3 @@ -0,0 +1,78 @@ 1.4 +#!/usr/bin/python 1.5 +import sys 1.6 +from allpy import protein, homology, processors 1.7 + 1.8 +aln1 = protein.Alignment().append_file(open(sys.argv[1]), format="markup") 1.9 +aln2 = protein.Alignment().append_file(open(sys.argv[2]), format="markup") 1.10 +hmg = open(sys.argv[3]) 1.11 +outfile_name = sys.argv[4] 1.12 + 1.13 +mon_map = {} 1.14 +for seq1, seq2 in zip(aln1.sequences, aln2.sequences): 1.15 + assert seq1.name == seq2.name 1.16 + assert seq1.description == seq2.description 1.17 + seq2.name += "_2" 1.18 + aln = protein.Alignment().append_sequence(seq1).append_sequence(seq2) 1.19 + aln.realign(processors.Muscle()) 1.20 + seq1.add_markup('number') 1.21 + seq2.add_markup('number') 1.22 + for column in aln.columns: 1.23 + if len(column) != 2: 1.24 + continue 1.25 + seq1_number = str(column[seq1].number) 1.26 + seq2_number = str(column[seq2].number) 1.27 + mon_map[seq1.name, seq1_number] = seq2_number 1.28 + 1.29 +col_map = {} 1.30 +aln1_num = aln1.add_markup('number') 1.31 +aln2_num = aln2.add_markup('number') 1.32 +for seq1 in aln1.sequences: 1.33 + for col1 in aln1.columns: 1.34 + num1 = str(aln1_num[col1]) 1.35 + if num1 in col_map: 1.36 + continue 1.37 + if seq1 not in col1: 1.38 + continue 1.39 + key = seq1.name, str(col1[seq1].number) 1.40 + if key not in mon_map: 1.41 + continue 1.42 + num2 = None 1.43 + try: 1.44 + for seq2 in aln2.sequences: 1.45 + if seq2.name.startswith(seq1.name): 1.46 + mon2 = seq2[col1[seq1].number] 1.47 + for col2 in aln2.columns: 1.48 + if col2[seq2] is mon2: 1.49 + num2 = str(aln2_num[col2]) 1.50 + raise Exception 1.51 + except Exception: 1.52 + pass 1.53 + if num2 is not None: 1.54 + col_map[num1] = num2 1.55 + 1.56 +print sorted(col_map.items(), key=lambda (a,b): int(a)) 1.57 + 1.58 +out = open(outfile_name, "w") 1.59 +skipped = set() 1.60 +for line in hmg: 1.61 + line = line.strip() 1.62 + if line == "" or line.startswith("#"): 1.63 + continue 1.64 + class_id, seq_id, monomer_id, column_id = line.split() 1.65 + key = seq_id, monomer_id 1.66 + if key not in mon_map: 1.67 + if key not in skipped: 1.68 + print "Skipping monomer", key 1.69 + skipped.add(key) 1.70 + continue 1.71 + if column_id not in col_map: 1.72 + if column_id not in skipped: 1.73 + print "Skipping column", repr(column_id) 1.74 + skipped.add(column_id) 1.75 + continue 1.76 + monomer_id = mon_map[key] 1.77 + column_id = col_map[column_id] 1.78 + out.write("\t".join([class_id, seq_id, monomer_id, column_id]) + "\n") 1.79 +out.close() 1.80 + 1.81 +# vim: set et ts=4 sts=4 sw=4: