Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/0dc6d90af0cf
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:19:45 2012
Кодировка:
allpy: 0dc6d90af0cf

allpy

changeset 834:0dc6d90af0cf

Automated merge with ssh://kodomo/allpy
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Mon, 18 Jul 2011 19:26:21 +0400
parents afc8c5adb699 2060c8ef164b
children bbe1b872e424
files
diffstat 2 files changed, 79 insertions(+), 1 deletions(-) [+]
line diff
     1.1 --- a/allpy/base.py	Sat Jul 16 19:59:16 2011 +0400
     1.2 +++ b/allpy/base.py	Mon Jul 18 19:26:21 2011 +0400
     1.3 @@ -131,7 +131,7 @@
     1.4              markup_class = markups.by_name[kind, name]
     1.5          if use_existing and name in self.markups:
     1.6              assert self.markups[name].__class__ is markup_class
     1.7 -            return
     1.8 +            return self.markups[name]
     1.9          assert name not in self.markups
    1.10          markup = markup_class(self, name, caller='container', **kws)
    1.11          self.markups[name] = markup
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/utils/convert_homology.py	Mon Jul 18 19:26:21 2011 +0400
     2.3 @@ -0,0 +1,78 @@
     2.4 +#!/usr/bin/python
     2.5 +import sys
     2.6 +from allpy import protein, homology, processors
     2.7 +
     2.8 +aln1 = protein.Alignment().append_file(open(sys.argv[1]), format="markup")
     2.9 +aln2 = protein.Alignment().append_file(open(sys.argv[2]), format="markup")
    2.10 +hmg = open(sys.argv[3])
    2.11 +outfile_name = sys.argv[4]
    2.12 +
    2.13 +mon_map = {}
    2.14 +for seq1, seq2 in zip(aln1.sequences, aln2.sequences):
    2.15 +    assert seq1.name == seq2.name
    2.16 +    assert seq1.description == seq2.description
    2.17 +    seq2.name += "_2"
    2.18 +    aln = protein.Alignment().append_sequence(seq1).append_sequence(seq2)
    2.19 +    aln.realign(processors.Muscle())
    2.20 +    seq1.add_markup('number')
    2.21 +    seq2.add_markup('number')
    2.22 +    for column in aln.columns:
    2.23 +        if len(column) != 2:
    2.24 +            continue
    2.25 +        seq1_number = str(column[seq1].number)
    2.26 +        seq2_number = str(column[seq2].number)
    2.27 +        mon_map[seq1.name, seq1_number] = seq2_number
    2.28 +
    2.29 +col_map = {}
    2.30 +aln1_num = aln1.add_markup('number')
    2.31 +aln2_num = aln2.add_markup('number')
    2.32 +for seq1 in aln1.sequences:
    2.33 +    for col1 in aln1.columns:
    2.34 +        num1 = str(aln1_num[col1])
    2.35 +        if num1 in col_map:
    2.36 +            continue
    2.37 +        if seq1 not in col1:
    2.38 +            continue
    2.39 +        key = seq1.name, str(col1[seq1].number)
    2.40 +        if key not in mon_map:
    2.41 +            continue
    2.42 +        num2 = None
    2.43 +        try:
    2.44 +            for seq2 in aln2.sequences:
    2.45 +                if seq2.name.startswith(seq1.name):
    2.46 +                    mon2 = seq2[col1[seq1].number]
    2.47 +                    for col2 in aln2.columns:
    2.48 +                        if col2[seq2] is mon2:
    2.49 +                            num2 = str(aln2_num[col2])
    2.50 +                            raise Exception
    2.51 +        except Exception:
    2.52 +            pass
    2.53 +        if num2 is not None:
    2.54 +            col_map[num1] = num2
    2.55 +
    2.56 +print sorted(col_map.items(), key=lambda (a,b): int(a))
    2.57 +
    2.58 +out = open(outfile_name, "w")
    2.59 +skipped = set()
    2.60 +for line in hmg:
    2.61 +    line = line.strip()
    2.62 +    if line == "" or line.startswith("#"):
    2.63 +        continue
    2.64 +    class_id, seq_id, monomer_id, column_id = line.split()
    2.65 +    key = seq_id, monomer_id
    2.66 +    if key not in mon_map:
    2.67 +        if key not in skipped:
    2.68 +            print "Skipping monomer", key
    2.69 +            skipped.add(key)
    2.70 +        continue
    2.71 +    if column_id not in col_map:
    2.72 +        if column_id not in skipped:
    2.73 +            print "Skipping column", repr(column_id)
    2.74 +            skipped.add(column_id)
    2.75 +        continue
    2.76 +    monomer_id = mon_map[key]
    2.77 +    column_id = col_map[column_id]
    2.78 +    out.write("\t".join([class_id, seq_id, monomer_id, column_id]) + "\n")
    2.79 +out.close()
    2.80 +
    2.81 +# vim: set et ts=4 sts=4 sw=4: