allpy

changeset 502:516a8059950f
Automated merge with ssh://kodomo.fbb.msu.ru/allpy
author: boris (netbook) <bnagaev@gmail.com>
date: Tue, 22 Feb 2011 19:25:27 +0300
parents: ffb6aec11314 527b713affaa
children: d3a85d02f252
files: blocks3d/AAdict.py blocks3d/AlignmentSeq.py blocks3d/AlignmentSeq_config.py blocks3d/Blocks3D.py blocks3d/Blocks3D_class.py blocks3d/Blocks3D_config.py blocks3d/GeometricalCore_class.py blocks3d/GeometricalCore_config.py blocks3d/Kliki.py blocks3d/clon.py geometrical_core/argparse_validators.py
diffstat: 18 files changed, 232 insertions(+), 4743 deletions(-) [+]
[-]

allpy/argparse_validators.py 63

allpy/structure.py 60

blocks3d/AAdict.py 1186

blocks3d/AlignmentSeq.py 490

blocks3d/AlignmentSeq_config.py 28

blocks3d/Blocks3D.py 512

blocks3d/Blocks3D_class.py 751

blocks3d/Blocks3D_config.py 21

blocks3d/GeometricalCore_class.py 1008

blocks3d/GeometricalCore_config.py 52

blocks3d/Kliki.py 567

blocks3d/blocks3d.py 107

blocks3d/clon.py 38

blocks3d/protein_pdb.py 18

blocks3d/www/input/blocks.js 5

geometrical_core/argparse_validators.py 63

geometrical_core/geometrical-core 3

geometrical_core/protein_pdb.py 3 allpy/argparse_validators.py 63 allpy/structure.py 60 blocks3d/AAdict.py 1186 blocks3d/AlignmentSeq.py 490 blocks3d/AlignmentSeq_config.py 28 blocks3d/Blocks3D.py 512 blocks3d/Blocks3D_class.py 751 blocks3d/Blocks3D_config.py 21 blocks3d/GeometricalCore_class.py 1008 blocks3d/GeometricalCore_config.py 52 blocks3d/Kliki.py 567 blocks3d/blocks3d.py 107 blocks3d/clon.py 38 blocks3d/protein_pdb.py 18 blocks3d/www/input/blocks.js 5 geometrical_core/argparse_validators.py 63 geometrical_core/geometrical-core 3 geometrical_core/protein_pdb.py 3
allpy/argparse_validators.py 63
allpy/structure.py 60
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/allpy/argparse_validators.py	Tue Feb 22 19:25:27 2011 +0300
     1.3 @@ -0,0 +1,63 @@
     1.4 +
     1.5 +import argparse
     1.6 +
     1.7 +def f_nng(string):
     1.8 +    """ Validates nonnegative (>=0) float """
     1.9 +    try:
    1.10 +        value = float(string)
    1.11 +    except:
    1.12 +        msg = "%r is wrong float" % string
    1.13 +        raise argparse.ArgumentTypeError(msg)
    1.14 +    if value < 0:
    1.15 +        msg = "%r is negative" % string
    1.16 +        raise argparse.ArgumentTypeError(msg)
    1.17 +    return value
    1.18 +
    1.19 +def part(string):
    1.20 +    """ Validates 0.0 <= float <= 1.0 """
    1.21 +    try:
    1.22 +        value = float(string)
    1.23 +    except:
    1.24 +        msg = "%r is wrong float" % string
    1.25 +        raise argparse.ArgumentTypeError(msg)
    1.26 +    if not (0.0 <= value <= 1.0):
    1.27 +        msg = "%r is not float, representing part, ie in [0, 1]" % string
    1.28 +        raise argparse.ArgumentTypeError(msg)
    1.29 +    return value
    1.30 +
    1.31 +def timeout(string):
    1.32 +    """ Validates int >= -1 """
    1.33 +    try:
    1.34 +        value = int(string)
    1.35 +    except:
    1.36 +        msg = "%r is wrong integer" % string
    1.37 +        raise argparse.ArgumentTypeError(msg)
    1.38 +    if value < -1:
    1.39 +        msg = "integer %r is less than -1" % string
    1.40 +        raise argparse.ArgumentTypeError(msg)
    1.41 +    return value
    1.42 +
    1.43 +def pos(string):
    1.44 +    """ Validates positive integer """
    1.45 +    try:
    1.46 +        value = int(string)
    1.47 +    except:
    1.48 +        msg = "%r is wrong integer" % string
    1.49 +        raise argparse.ArgumentTypeError(msg)
    1.50 +    if value < 1:
    1.51 +        msg = "%r is not positive integer" % string
    1.52 +        raise argparse.ArgumentTypeError(msg)
    1.53 +    return value
    1.54 +
    1.55 +def i_nng(string):
    1.56 +    """ Validates int >= 0 """
    1.57 +    try:
    1.58 +        value = int(string)
    1.59 +    except:
    1.60 +        msg = "%r is wrong integer" % string
    1.61 +        raise argparse.ArgumentTypeError(msg)
    1.62 +    if value < 0:
    1.63 +        msg = "integer %r is less than 0" % string
    1.64 +        raise argparse.ArgumentTypeError(msg)
    1.65 +    return value
    1.66 +

     2.1 --- a/allpy/structure.py	Tue Feb 22 17:16:56 2011 +0300
     2.2 +++ b/allpy/structure.py	Tue Feb 22 19:25:27 2011 +0300
     2.3 @@ -248,12 +248,19 @@
     2.4          * timeout_2 -- Bron-Kerbosh timeout (blocks)
     2.5          * min_width -- min width of each core
     2.6          """
     2.7 +        result = []
     2.8          Block = self.__class__
     2.9          # for sorting
    2.10 -        key_column = lambda c: return self.columns.index(c)
    2.11 -        key_sequence = lambda s: return self.sequences.index(c)
    2.12 +        key_column = lambda c: self.columns.index(c)
    2.13 +        key_sequence = lambda s: self.sequences.index(s)
    2.14          vertices = sum([seq for seq in self.sequences], [])
    2.15          edges = {}
    2.16 +        monomer2column = {}
    2.17 +        monomer2sequence = {}
    2.18 +        for column in self.columns:
    2.19 +            for sequence, monomer in column.items():
    2.20 +                monomer2column[monomer] = column
    2.21 +                monomer2sequence[monomer] = sequence
    2.22          for i, seq1 in enumerate(self.sequences):
    2.23              for j, seq2 in enumerate(self.sequences):
    2.24                  if i < j:
    2.25 @@ -264,24 +271,23 @@
    2.26                      for core in cores:
    2.27                          core_block = copy(block)
    2.28                          core_block.columns = sorted(core, key=key_column)
    2.29 -                        for part_block in core_block.continuous_blocks():
    2.30 -                            if len(part.columns) >= min_width:
    2.31 -                                monomers = part.monomers()
    2.32 -                                for m1 in monomers:
    2.33 -                                    for m2 in monomers:
    2.34 -                                        edges
    2.35 -                                        edge = Graph.edge(m1, m2)
    2.36 -                                        edges[edge] = 1.0
    2.37 +                        for part in core_block.continuous_blocks(min_width):
    2.38 +                            monomers = part.monomers()
    2.39 +                            for m1 in monomers:
    2.40 +                                for m2 in monomers:
    2.41 +                                    edges
    2.42 +                                    edge = Graph.edge(m1, m2)
    2.43 +                                    edges[edge] = 1.0
    2.44          graph = Graph(vertices, edges)
    2.45          cliques = graph.cliques(minsize=min_width*2, timeout=timeout_2)
    2.46          used_monomers = set()
    2.47          for clique in cliques:
    2.48              clique -= used_monomers
    2.49              while clique:
    2.50 -                sequences = set(m.sequence for m in clique)
    2.51 -                sequences.sort(key=key_sequence)
    2.52 -                columns = set(m.column for m in clique)
    2.53 -                columns.sort(key=key_column)
    2.54 +                sequences = set(monomer2sequence[m] for m in clique)
    2.55 +                sequences = sorted(sequences, key=key_sequence)
    2.56 +                columns = set(monomer2column[m] for m in clique)
    2.57 +                columns = sorted(columns, key=key_column)
    2.58                  height = len(sequences)
    2.59                  if height <= 1:
    2.60                      break
    2.61 @@ -290,31 +296,39 @@
    2.62                  block = copy(self)
    2.63                  block.columns = filled_columns
    2.64                  block.sequences = sequences
    2.65 -                for part in block.continuous_blocks():
    2.66 -                    if len(part.columns) >= min_width:
    2.67 -                        clique -= part.monomers()
    2.68 -                        used_monomers += part.monomers()
    2.69 -                        yield part
    2.70 +                for part in block.continuous_blocks(min_width):
    2.71 +                    clique -= set(part.monomers())
    2.72 +                    used_monomers |= set(part.monomers())
    2.73 +                    result.append(part)
    2.74 +        return result
    2.75  
    2.76 -    def continuous_blocks(self):
    2.77 +    def continuous_blocks(self, min_width=1):
    2.78          """ Return list of continued blocks """
    2.79 +        result = []
    2.80          self_columns = set(self.columns)
    2.81          columns_batch = []
    2.82          for column in self.alignment.columns:
    2.83              if column in self_columns:
    2.84                  columns_batch.append(column)
    2.85 -            elif columns_batch:
    2.86 +            elif len(columns_batch) >= min_width:
    2.87                  block = copy(self)
    2.88                  block.columns = columns_batch
    2.89                  columns_batch = []
    2.90 -                yield block
    2.91 +                result.append(block)
    2.92 +        if len(columns_batch) >= min_width:
    2.93 +            block = copy(self)
    2.94 +            block.columns = columns_batch
    2.95 +            result.append(block)
    2.96 +        return result
    2.97  
    2.98      def monomers(self):
    2.99          """ Return list of all monomers in this block """
   2.100 +        result = []
   2.101          for column in self.columns:
   2.102              for sequence in self.sequences:
   2.103                  if sequence in column:
   2.104 -                    yield column[sequence]
   2.105 +                    result.append(column[sequence])
   2.106 +        return result
   2.107  
   2.108      def superimpose(self, gc):
   2.109          """ Superimpose monomers from this block at gc positions """

     3.1 --- a/blocks3d/AAdict.py	Tue Feb 22 17:16:56 2011 +0300
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,1186 +0,0 @@
     3.4 -AAdict= {
     3.5 -"CPI": ('x', 'p', None, None),
     3.6 -"AGM": ('r', 'p', 'M', None),
     3.7 -"G25": ('g', 'r', 'M', None),
     3.8 -"CPN": ('c', 'd', 'M', None),
     3.9 -"MA6": ('a', 'r', 'M', None),
    3.10 -"NYS": ('c', 'p', 'M', None),
    3.11 -"CPC": ('x', 'p', None, None),
    3.12 -"8FG": ('g', 'd', 'M', None),
    3.13 -"SIC": ('x', 'p', None, None),
    3.14 -"PQ1": ('n', 'r', None, None),
    3.15 -"PAS": ('d', 'p', 'M', None),
    3.16 -"EOV": ('x', 'p', None, None),
    3.17 -"B2I": ('i', 'p', 'M', None),
    3.18 -"NYM": ('n', 'd', None, None),
    3.19 -"URD": ('u', 'r', 'M', None),
    3.20 -"NYC": ('x', 'p', None, None),
    3.21 -"AGT": ('x', 'p', None, None),
    3.22 -"NYG": ('x', 'p', None, None),
    3.23 -"CPV": ('x', 'p', None, None),
    3.24 -"TAV": ('d', 'p', 'M', None),
    3.25 -"S4G": ('g', 'd', 'M', None),
    3.26 -"MA7": ('a', 'd', 'M', None),
    3.27 -"S4C": ('c', 'r', 'M', None),
    3.28 -"UAR": ('u', 'r', 'M', None),
    3.29 -"SPT": ('t', 'd', 'M', None),
    3.30 -"S4U": ('u', 'r', 'M', None),
    3.31 -"TAF": ('t', 'd', 'M', None),
    3.32 -"5NC": ('c', 'd', 'M', None),
    3.33 -"CSF": ('c', 'r', 'M', None),
    3.34 -"0CS": ('a', 'p', 'M', None),
    3.35 -"B2V": ('v', 'p', 'M', None),
    3.36 -"TAL": ('n', 'r', None, None),
    3.37 -"SCY": ('c', 'p', 'M', None),
    3.38 -"KYQ": ('k', 'p', 'M', None),
    3.39 -"CS8": ('n', 'r', None, None),
    3.40 -"TA4": ('x', 'p', None, None),
    3.41 -"ALN": ('a', 'p', 'M', None),
    3.42 -"GS": ('g', 'd', 'M', None),
    3.43 -"2FE": ('n', 'd', None, None),
    3.44 -"VAL": ('V', 'p', None, None),
    3.45 -"7DA": ('a', 'd', 'M', None),
    3.46 -"XPC": ('x', 'p', None, None),
    3.47 -"VAF": ('v', 'p', 'M', None),
    3.48 -"VAD": ('v', 'p', 'M', None),
    3.49 -"BGM": ('g', 'd', 'M', None),
    3.50 -"P": ('g', 'd', 'M', None),
    3.51 -"LHU": ('u', 'r', 'M', None),
    3.52 -"T48": ('t', 'd', 'M', None),
    3.53 -"2GT": ('t', 'd', 'M', None),
    3.54 -"HMF": ('a', 'p', 'M', None),
    3.55 -"BNN": ('a', 'p', 'M', None),
    3.56 -"AHB": ('n', 'p', 'M', None),
    3.57 -"NPH": ('c', 'p', 'M', None),
    3.58 -"SIB": ('c', 'p', 'M', None),
    3.59 -"SNN": ('n', 'p', 'M', None),
    3.60 -"AHH": ('x', 'p', None, None),
    3.61 -"JW5": ('n', 'r', None, None),
    3.62 -"PBB": ('c', 'p', 'M', None),
    3.63 -"PTR": ('y', 'p', 'M', None),
    3.64 -"MAA": ('a', 'p', 'M', None),
    3.65 -"AHS": ('x', 'p', None, None),
    3.66 -"AHP": ('a', 'p', 'M', None),
    3.67 -"MAD": ('a', 'r', 'M', None),
    3.68 -"AHT": ('x', 'p', None, None),
    3.69 -"XYG": ('x', 'p', None, None),
    3.70 -"MAI": ('r', 'p', 'M', None),
    3.71 -"CP1": ('c', 'd', 'M', None),
    3.72 -"G2L": ('g', 'r', 'M', None),
    3.73 -"FFD": ('n', 'd', None, None),
    3.74 -"CML": ('c', 'p', 'M', None),
    3.75 -"CMH": ('c', 'p', 'M', None),
    3.76 -"ZDU": ('n', 'd', None, None),
    3.77 -"PCS": ('f', 'p', 'M', None),
    3.78 -"CME": ('c', 'p', 'M', None),
    3.79 -"CSO": ('c', 'p', 'M', None),
    3.80 -"193": ('x', 'p', None, None),
    3.81 -"1TY": ('y', 'p', 'M', None),
    3.82 -"CB2": ('c', 'd', 'M', None),
    3.83 -"CS3": ('c', 'p', 'M', None),
    3.84 -"64T": ('t', 'd', 'M', None),
    3.85 -"4AC": ('n', 'r', None, None),
    3.86 -"B3X": ('n', 'p', 'M', None),
    3.87 -"TST": ('x', 'p', None, None),
    3.88 -"CMT": ('c', 'p', 'M', None),
    3.89 -"CMR": ('c', 'd', 'M', None),
    3.90 -"CSH": ('x', 'p', None, None),
    3.91 -"VLM": ('x', 'p', None, None),
    3.92 -"G": ('G', 'r', None, None),
    3.93 -"C4S": ('c', 'd', 'M', None),
    3.94 -"U2L": ('u', 'r', 'M', None),
    3.95 -"LSO": ('x', 'p', None, None),
    3.96 -"U2N": ('n', 'd', None, None),
    3.97 -"PLE": ('l', 'p', 'M', None),
    3.98 -"SIN": ('x', 'p', None, None),
    3.99 -"M2L": ('k', 'p', 'M', None),
   3.100 -"I58": ('k', 'p', 'M', None),
   3.101 -"U2P": ('u', 'r', 'M', None),
   3.102 -"B2A": ('a', 'p', 'M', None),
   3.103 -"TTM": ('t', 'd', 'M', None),
   3.104 -"CSD": ('c', 'p', 'M', None),
   3.105 -"M2G": ('g', 'r', 'M', None),
   3.106 -"ASX": ('x', 'p', None, None),
   3.107 -"AFF": ('n', 'd', None, None),
   3.108 -"TTD": ('t', 'd', 'M', None),
   3.109 -"AET": ('a', 'r', 'M', None),
   3.110 -"C45": ('c', 'd', 'M', None),
   3.111 -"C46": ('c', 'd', 'M', None),
   3.112 -"FTR": ('w', 'p', 'M', None),
   3.113 -"BLY": ('k', 'p', 'M', None),
   3.114 -"C42": ('c', 'd', 'M', None),
   3.115 -"C2S": ('c', 'd', 'M', None),
   3.116 -"FTY": ('y', 'p', 'M', None),
   3.117 -"CSA": ('c', 'p', 'M', None),
   3.118 -"I5C": ('c', 'd', 'M', None),
   3.119 -"ALY": ('k', 'p', 'M', None),
   3.120 -"P1L": ('c', 'p', 'M', None),
   3.121 -"C49": ('c', 'd', 'M', None),
   3.122 -"DSP": ('d', 'p', 'M', None),
   3.123 -"AFA": ('n', 'p', 'M', None),
   3.124 -"P1P": ('n', 'r', None, None),
   3.125 -"U25": ('u', 'r', 'M', None),
   3.126 -"CHS": ('x', 'p', None, None),
   3.127 -"BLE": ('l', 'p', 'M', None),
   3.128 -"DSG": ('n', 'p', 'M', None),
   3.129 -"DSE": ('s', 'p', 'M', None),
   3.130 -"S4A": ('a', 'd', 'M', None),
   3.131 -"DLS": ('k', 'p', 'M', None),
   3.132 -"4SU": ('u', 'r', 'M', None),
   3.133 -"MA": ('a', 'p', 'M', None),
   3.134 -"TRX": ('w', 'p', 'M', None),
   3.135 -"DC": ('C', 'd', None, None),
   3.136 -"HLU": ('l', 'p', 'M', None),
   3.137 -"HSE": ('s', 'p', 'M', None),
   3.138 -"A5M": ('c', 'r', 'M', None),
   3.139 -"ALS": ('a', 'p', 'M', None),
   3.140 -"CBR": ('c', 'd', 'M', None),
   3.141 -"SHC": ('c', 'p', 'M', None),
   3.142 -"B2F": ('f', 'p', 'M', None),
   3.143 -"QUO": ('g', 'r', 'M', None),
   3.144 -"X4A": ('n', 'd', None, None),
   3.145 -"BE2": ('x', 'p', None, None),
   3.146 -"CM0": ('n', 'r', None, None),
   3.147 -"GLH": ('q', 'p', 'M', None),
   3.148 -"DOH": ('d', 'p', 'M', None),
   3.149 -"DON": ('l', 'p', 'M', None),
   3.150 -"GLM": ('x', 'p', None, None),
   3.151 -"GLN": ('Q', 'p', None, None),
   3.152 -"T4S": ('t', 'd', 'M', None),
   3.153 -"DOC": ('c', 'd', 'M', None),
   3.154 -"T3P": ('t', 'd', 'M', None),
   3.155 -"DOA": ('x', 'p', None, None),
   3.156 -"TOX": ('w', 'p', 'M', None),
   3.157 -"SMP": ('a', 'd', 'M', None),
   3.158 -"GLX": ('x', 'p', None, None),
   3.159 -"GLY": ('G', 'p', None, None),
   3.160 -"GLZ": ('g', 'p', 'M', None),
   3.161 -"CTG": ('t', 'd', 'M', None),
   3.162 -"T38": ('n', 'r', None, None),
   3.163 -"AKL": ('d', 'p', 'M', None),
   3.164 -"T36": ('t', 'd', 'M', None),
   3.165 -"T37": ('t', 'd', 'M', None),
   3.166 -"4TA": ('n', 'r', None, None),
   3.167 -"CTH": ('t', 'p', 'M', None),
   3.168 -"GLU": ('E', 'p', None, None),
   3.169 -"RCY": ('c', 'p', 'M', None),
   3.170 -"MDH": ('x', 'p', None, None),
   3.171 -"MDO": ('x', 'p', None, None),
   3.172 -"8OG": ('g', 'd', 'M', None),
   3.173 -"HCS": ('x', 'p', None, None),
   3.174 -"UD5": ('u', 'r', 'M', None),
   3.175 -"O2C": ('n', 'd', None, None),
   3.176 -"5FU": ('u', 'r', 'M', None),
   3.177 -"SDG": ('g', 'd', 'M', None),
   3.178 -"MDR": ('n', 'd', None, None),
   3.179 -"BSE": ('s', 'p', 'M', None),
   3.180 -"4IN": ('x', 'p', None, None),
   3.181 -"FCL": ('f', 'p', 'M', None),
   3.182 -"1SC": ('c', 'r', 'M', None),
   3.183 -"R7A": ('c', 'p', 'M', None),
   3.184 -"6IA": ('a', 'r', 'M', None),
   3.185 -"XTS": ('g', 'r', 'M', None),
   3.186 -"DFT": ('n', 'd', None, None),
   3.187 -"FMU": ('n', 'r', None, None),
   3.188 -"XTY": ('n', 'd', None, None),
   3.189 -"MVA": ('v', 'p', 'M', None),
   3.190 -"LTA": ('x', 'p', None, None),
   3.191 -"01W": ('x', 'p', None, None),
   3.192 -"DFI": ('x', 'p', None, None),
   3.193 -"YG": ('g', 'r', 'M', None),
   3.194 -"FMG": ('g', 'd', 'M', None),
   3.195 -"DFG": ('g', 'd', 'M', None),
   3.196 -"XTL": ('t', 'd', 'M', None),
   3.197 -"DFC": ('c', 'd', 'M', None),
   3.198 -"XTH": ('t', 'd', 'M', None),
   3.199 -"NTY": ('y', 'p', 'M', None),
   3.200 -"EDC": ('g', 'd', 'M', None),
   3.201 -"T49": ('t', 'd', 'M', None),
   3.202 -"EDA": ('a', 'd', 'M', None),
   3.203 -"O12": ('x', 'p', None, None),
   3.204 -"MMT": ('t', 'd', 'M', None),
   3.205 -"T41": ('n', 'r', None, None),
   3.206 -"LMS": ('n', 'r', None, None),
   3.207 -"0AP": ('c', 'd', 'M', None),
   3.208 -"ORQ": ('r', 'p', 'M', None),
   3.209 -"ORN": ('a', 'p', 'M', None),
   3.210 -"ALM": ('a', 'p', 'M', None),
   3.211 -"VB1": ('k', 'p', 'M', None),
   3.212 -"PUY": ('n', 'r', None, None),
   3.213 -"SBD": ('s', 'p', 'M', None),
   3.214 -"LPS": ('s', 'p', 'M', None),
   3.215 -"SC": ('c', 'd', 'M', None),
   3.216 -"ALT": ('a', 'p', 'M', None),
   3.217 -"GL3": ('g', 'p', 'M', None),
   3.218 -"LME": ('e', 'p', 'M', None),
   3.219 -"SBL": ('s', 'p', 'M', None),
   3.220 -"ALQ": ('x', 'p', None, None),
   3.221 -"CAB": ('a', 'p', 'M', None),
   3.222 -"GPL": ('k', 'p', 'M', None),
   3.223 -"CAY": ('c', 'p', 'M', None),
   3.224 -"DPR": ('p', 'p', 'M', None),
   3.225 -"DPQ": ('y', 'p', 'M', None),
   3.226 -"DPP": ('a', 'p', 'M', None),
   3.227 -"CAS": ('c', 'p', 'M', None),
   3.228 -"NC1": ('s', 'p', 'M', None),
   3.229 -"IYR": ('y', 'p', 'M', None),
   3.230 -"CAV": ('x', 'p', None, None),
   3.231 -"DPY": ('n', 'd', None, None),
   3.232 -"5AA": ('a', 'd', 'M', None),
   3.233 -"DPB": ('t', 'd', 'M', None),
   3.234 -"CAL": ('x', 'p', None, None),
   3.235 -"DPN": ('f', 'p', 'M', None),
   3.236 -"BAL": ('a', 'p', 'M', None),
   3.237 -"DPL": ('p', 'p', 'M', None),
   3.238 -"CAF": ('c', 'p', 'M', None),
   3.239 -"DPH": ('f', 'p', 'M', None),
   3.240 -"HIP": ('h', 'p', 'M', None),
   3.241 -"LG": ('g', 'r', 'M', None),
   3.242 -"LA2": ('k', 'p', 'M', None),
   3.243 -"KOR": ('m', 'p', 'M', None),
   3.244 -"LC": ('c', 'r', 'M', None),
   3.245 -"D4M": ('t', 'd', 'M', None),
   3.246 -"5MD": ('n', 'd', None, None),
   3.247 -"PGN": ('g', 'd', 'M', None),
   3.248 -"NHL": ('e', 'p', 'M', None),
   3.249 -"PGL": ('x', 'p', None, None),
   3.250 -"TA3": ('t', 'd', 'M', None),
   3.251 -"XCP": ('x', 'p', None, None),
   3.252 -"DYS": ('c', 'p', 'M', None),
   3.253 -"HIA": ('h', 'p', 'M', None),
   3.254 -"HIC": ('h', 'p', 'M', None),
   3.255 -"PGP": ('g', 'r', 'M', None),
   3.256 -"AB7": ('x', 'p', None, None),
   3.257 -"IIL": ('i', 'p', 'M', None),
   3.258 -"DYG": ('x', 'p', None, None),
   3.259 -"PGY": ('g', 'p', 'M', None),
   3.260 -"A": ('A', 'r', None, None),
   3.261 -"23F": ('f', 'p', 'M', None),
   3.262 -"143": ('c', 'p', 'M', None),
   3.263 -"HIQ": ('h', 'p', 'M', None),
   3.264 -"THR": ('T', 'p', None, None),
   3.265 -"PG7": ('g', 'd', 'M', None),
   3.266 -"ABR": ('a', 'd', 'M', None),
   3.267 -"HIS": ('H', 'p', None, None),
   3.268 -"ABT": ('n', 'd', None, None),
   3.269 -"PG1": ('s', 'p', 'M', None),
   3.270 -"2LU": ('l', 'p', 'M', None),
   3.271 -"NZH": ('h', 'p', 'M', None),
   3.272 -"P5P": ('a', 'r', 'M', None),
   3.273 -"STY": ('y', 'p', 'M', None),
   3.274 -"PG9": ('g', 'p', 'M', None),
   3.275 -"NCX": ('n', 'd', None, None),
   3.276 -"NCY": ('x', 'p', None, None),
   3.277 -"CFL": ('c', 'd', 'M', None),
   3.278 -"3MD": ('d', 'p', 'M', None),
   3.279 -"1AP": ('a', 'd', 'M', None),
   3.280 -"TFE": ('t', 'd', 'M', None),
   3.281 -"D3T": ('t', 'd', 'M', None),
   3.282 -"TFA": ('x', 'p', None, None),
   3.283 -"BHD": ('d', 'p', 'M', None),
   3.284 -"ONL": ('x', 'p', None, None),
   3.285 -"CFY": ('x', 'p', None, None),
   3.286 -"QLG": ('x', 'p', None, None),
   3.287 -"NCB": ('a', 'p', 'M', None),
   3.288 -"TFQ": ('f', 'p', 'M', None),
   3.289 -"XXY": ('x', 'p', None, None),
   3.290 -"23S": ('x', 'p', None, None),
   3.291 -"0AU": ('n', 'd', None, None),
   3.292 -"GHP": ('g', 'p', 'M', None),
   3.293 -"MIA": ('a', 'r', 'M', None),
   3.294 -"AP7": ('a', 'd', 'M', None),
   3.295 -"OAD": ('n', 'r', None, None),
   3.296 -"UDP": ('n', 'r', None, None),
   3.297 -"GHG": ('q', 'p', 'M', None),
   3.298 -"ZFB": ('x', 'p', None, None),
   3.299 -"C38": ('c', 'd', 'M', None),
   3.300 -"HAQ": ('x', 'p', None, None),
   3.301 -"C34": ('c', 'd', 'M', None),
   3.302 -"C37": ('c', 'd', 'M', None),
   3.303 -"ONE": ('u', 'r', 'M', None),
   3.304 -"C31": ('c', 'r', 'M', None),
   3.305 -"HAR": ('r', 'p', 'M', None),
   3.306 -"TY2": ('y', 'p', 'M', None),
   3.307 -"TY3": ('y', 'p', 'M', None),
   3.308 -"RT": ('n', 'r', None, None),
   3.309 -"4PE": ('c', 'd', 'M', None),
   3.310 -"4PD": ('c', 'd', 'M', None),
   3.311 -"4PC": ('c', 'd', 'M', None),
   3.312 -"SAC": ('s', 'p', 'M', None),
   3.313 -"PRO": ('P', 'p', None, None),
   3.314 -"PRN": ('a', 'd', 'M', None),
   3.315 -"6CL": ('k', 'p', 'M', None),
   3.316 -"4PH": ('f', 'p', 'M', None),
   3.317 -"FGL": ('g', 'p', 'M', None),
   3.318 -"6CT": ('t', 'd', 'M', None),
   3.319 -"IAS": ('d', 'p', 'M', None),
   3.320 -"PRS": ('p', 'p', 'M', None),
   3.321 -"UR3": ('u', 'r', 'M', None),
   3.322 -"SAR": ('g', 'p', 'M', None),
   3.323 -"TCY": ('a', 'd', 'M', None),
   3.324 -"4F3": ('x', 'p', None, None),
   3.325 -"TPG": ('g', 'r', 'M', None),
   3.326 -"LAL": ('a', 'p', 'M', None),
   3.327 -"TPC": ('c', 'd', 'M', None),
   3.328 -"PPU": ('a', 'r', 'M', None),
   3.329 -"CHF": ('x', 'p', None, None),
   3.330 -"BFD": ('d', 'p', 'M', None),
   3.331 -"TPO": ('t', 'p', 'M', None),
   3.332 -"H5M": ('p', 'p', 'M', None),
   3.333 -"AYG": ('x', 'p', None, None),
   3.334 -"TPL": ('w', 'p', 'M', None),
   3.335 -"AYA": ('a', 'p', 'M', None),
   3.336 -"6MC": ('a', 'r', 'M', None),
   3.337 -"6MA": ('a', 'r', 'M', None),
   3.338 -"GSR": ('g', 'd', 'M', None),
   3.339 -"EYS": ('x', 'p', None, None),
   3.340 -"LPL": ('x', 'p', None, None),
   3.341 -"6MZ": ('n', 'r', None, None),
   3.342 -"PR3": ('c', 'p', 'M', None),
   3.343 -"2ST": ('t', 'd', 'M', None),
   3.344 -"0NC": ('a', 'p', 'M', None),
   3.345 -"MRG": ('g', 'd', 'M', None),
   3.346 -"KYN": ('a', 'p', 'M', None),
   3.347 -"G2S": ('g', 'd', 'M', None),
   3.348 -"PYX": ('c', 'p', 'M', None),
   3.349 -"PYY": ('n', 'r', None, None),
   3.350 -"TYN": ('y', 'p', 'M', None),
   3.351 -"TYO": ('y', 'p', 'M', None),
   3.352 -"KAG": ('g', 'r', 'M', None),
   3.353 -"IPG": ('g', 'p', 'M', None),
   3.354 -"GH3": ('g', 'r', 'M', None),
   3.355 -"APP": ('x', 'p', None, None),
   3.356 -"IPN": ('n', 'd', None, None),
   3.357 -"TYB": ('y', 'p', 'M', None),
   3.358 -"FAG": ('n', 'd', None, None),
   3.359 -"DBY": ('y', 'p', 'M', None),
   3.360 -"APK": ('k', 'p', 'M', None),
   3.361 -"APH": ('a', 'p', 'M', None),
   3.362 -"API": ('k', 'p', 'M', None),
   3.363 -"TYX": ('x', 'p', None, None),
   3.364 -"APO": ('x', 'p', None, None),
   3.365 -"APM": ('x', 'p', None, None),
   3.366 -"TYT": ('y', 'p', 'M', None),
   3.367 -"TYU": ('n', 'r', None, None),
   3.368 -"DBS": ('s', 'p', 'M', None),
   3.369 -"DBU": ('a', 'p', 'M', None),
   3.370 -"TYQ": ('y', 'p', 'M', None),
   3.371 -"TYR": ('Y', 'p', None, None),
   3.372 -"APE": ('x', 'p', None, None),
   3.373 -"BT5": ('n', 'r', None, None),
   3.374 -"KST": ('k', 'p', 'M', None),
   3.375 -"2AD": ('x', 'p', None, None),
   3.376 -"ARG": ('R', 'p', None, None),
   3.377 -"HDP": ('n', 'd', None, None),
   3.378 -"CZ2": ('c', 'p', 'M', None),
   3.379 -"2AO": ('x', 'p', None, None),
   3.380 -"G33": ('g', 'd', 'M', None),
   3.381 -"AHO": ('a', 'p', 'M', None),
   3.382 -"P2U": ('n', 'd', None, None),
   3.383 -"P2T": ('t', 'd', 'M', None),
   3.384 -"MG1": ('g', 'd', 'M', None),
   3.385 -"G32": ('g', 'd', 'M', None),
   3.386 -"2AR": ('a', 'd', 'M', None),
   3.387 -"2AS": ('x', 'p', None, None),
   3.388 -"G46": ('g', 'r', 'M', None),
   3.389 -"G47": ('g', 'd', 'M', None),
   3.390 -"G48": ('g', 'r', 'M', None),
   3.391 -"G49": ('g', 'd', 'M', None),
   3.392 -"OCS": ('c', 'p', 'M', None),
   3.393 -"P2Y": ('p', 'p', 'M', None),
   3.394 -"PHA": ('f', 'p', 'M', None),
   3.395 -"NLQ": ('q', 'p', 'M', None),
   3.396 -"NLP": ('l', 'p', 'M', None),
   3.397 -"MLL": ('l', 'p', 'M', None),
   3.398 -"CEA": ('c', 'p', 'M', None),
   3.399 -"HV5": ('a', 'p', 'M', None),
   3.400 -"HMR": ('r', 'p', 'M', None),
   3.401 -"HRG": ('r', 'p', 'M', None),
   3.402 -"CSP": ('c', 'p', 'M', None),
   3.403 -"FA2": ('a', 'd', 'M', None),
   3.404 -"BMP": ('n', 'r', None, None),
   3.405 -"NLE": ('l', 'p', 'M', None),
   3.406 -"RTP": ('n', 'r', None, None),
   3.407 -"BMT": ('t', 'p', 'M', None),
   3.408 -"G38": ('g', 'd', 'M', None),
   3.409 -"CS4": ('c', 'p', 'M', None),
   3.410 -"2FI": ('n', 'd', None, None),
   3.411 -"TTS": ('y', 'p', 'M', None),
   3.412 -"CS1": ('c', 'p', 'M', None),
   3.413 -"CS0": ('c', 'p', 'M', None),
   3.414 -"NLO": ('l', 'p', 'M', None),
   3.415 -"NLN": ('l', 'p', 'M', None),
   3.416 -"SHP": ('g', 'p', 'M', None),
   3.417 -"CSL": ('c', 'd', 'M', None),
   3.418 -"SHR": ('k', 'p', 'M', None),
   3.419 -"OXX": ('d', 'p', 'M', None),
   3.420 -"B3Y": ('y', 'p', 'M', None),
   3.421 -"DLE": ('l', 'p', 'M', None),
   3.422 -"PYA": ('a', 'p', 'M', None),
   3.423 -"CSE": ('c', 'p', 'M', None),
   3.424 -"5FA": ('a', 'r', 'M', None),
   3.425 -"GCK": ('c', 'd', 'M', None),
   3.426 -"5FC": ('c', 'd', 'M', None),
   3.427 -"GGL": ('e', 'p', 'M', None),
   3.428 -"B3Q": ('x', 'p', None, None),
   3.429 -"B3S": ('s', 'p', 'M', None),
   3.430 -"B3L": ('x', 'p', None, None),
   3.431 -"A47": ('a', 'd', 'M', None),
   3.432 -"A44": ('a', 'r', 'M', None),
   3.433 -"5MU": ('u', 'r', 'M', None),
   3.434 -"CSY": ('x', 'p', None, None),
   3.435 -"A43": ('a', 'd', 'M', None),
   3.436 -"A40": ('a', 'd', 'M', None),
   3.437 -"B3K": ('k', 'p', 'M', None),
   3.438 -"B3D": ('d', 'p', 'M', None),
   3.439 -"B3E": ('e', 'p', 'M', None),
   3.440 -"DLY": ('k', 'p', 'M', None),
   3.441 -"MTU": ('a', 'r', 'M', None),
   3.442 -"B3A": ('a', 'p', 'M', None),
   3.443 -"CSS": ('c', 'p', 'M', None),
   3.444 -"CSR": ('c', 'p', 'M', None),
   3.445 -"CZZ": ('c', 'p', 'M', None),
   3.446 -"N10": ('s', 'p', 'M', None),
   3.447 -"MGG": ('r', 'p', 'M', None),
   3.448 -"A35": ('a', 'd', 'M', None),
   3.449 -"AFG": ('g', 'd', 'M', None),
   3.450 -"BTR": ('w', 'p', 'M', None),
   3.451 -"SSU": ('u', 'r', 'M', None),
   3.452 -"70U": ('u', 'r', 'M', None),
   3.453 -"A34": ('a', 'd', 'M', None),
   3.454 -"MGN": ('q', 'p', 'M', None),
   3.455 -"XCL": ('c', 'd', 'M', None),
   3.456 -"G4P": ('n', 'r', None, None),
   3.457 -"XCS": ('n', 'd', None, None),
   3.458 -"MGQ": ('a', 'r', 'M', None),
   3.459 -"ABS": ('a', 'd', 'M', None),
   3.460 -"BTA": ('l', 'p', 'M', None),
   3.461 -"MGV": ('g', 'r', 'M', None),
   3.462 -"XCT": ('c', 'd', 'M', None),
   3.463 -"TTQ": ('w', 'p', 'M', None),
   3.464 -"MGY": ('g', 'p', 'M', None),
   3.465 -"XCY": ('c', 'd', 'M', None),
   3.466 -"NNH": ('r', 'p', 'M', None),
   3.467 -"TBG": ('g', 'p', 'M', None),
   3.468 -"ATD": ('t', 'd', 'M', None),
   3.469 -"U8U": ('u', 'r', 'M', None),
   3.470 -"SNC": ('c', 'p', 'M', None),
   3.471 -"ALG": ('r', 'p', 'M', None),
   3.472 -"M1G": ('g', 'd', 'M', None),
   3.473 -"ATL": ('t', 'd', 'M', None),
   3.474 -"ATM": ('t', 'd', 'M', None),
   3.475 -"HPC": ('f', 'p', 'M', None),
   3.476 -"TP1": ('t', 'd', 'M', None),
   3.477 -"XUG": ('g', 'd', 'M', None),
   3.478 -"ISO": ('x', 'p', None, None),
   3.479 -"3MM": ('x', 'p', None, None),
   3.480 -"175": ('x', 'p', None, None),
   3.481 -"2TY": ('y', 'p', 'M', None),
   3.482 -"CSB": ('c', 'p', 'M', None),
   3.483 -"IT1": ('k', 'p', 'M', None),
   3.484 -"UN1": ('x', 'p', None, None),
   3.485 -"UN2": ('x', 'p', None, None),
   3.486 -"ASP": ('D', 'p', None, None),
   3.487 -"DTH": ('t', 'p', 'M', None),
   3.488 -"SER": ('S', 'p', None, None),
   3.489 -"ASU": ('n', 'd', None, None),
   3.490 -"SEP": ('s', 'p', 'M', None),
   3.491 -"LNT": ('x', 'p', None, None),
   3.492 -"A39": ('a', 'r', 'M', None),
   3.493 -"DGI": ('g', 'd', 'M', None),
   3.494 -"DGN": ('q', 'p', 'M', None),
   3.495 -"DGL": ('e', 'p', 'M', None),
   3.496 -"SEG": ('a', 'p', 'M', None),
   3.497 -"ASB": ('d', 'p', 'M', None),
   3.498 -"ASA": ('d', 'p', 'M', None),
   3.499 -"SEC": ('a', 'p', 'M', None),
   3.500 -"SEB": ('s', 'p', 'M', None),
   3.501 -"0A9": ('f', 'p', 'M', None),
   3.502 -"ASK": ('d', 'p', 'M', None),
   3.503 -"PVH": ('h', 'p', 'M', None),
   3.504 -"ASI": ('d', 'p', 'M', None),
   3.505 -"SEL": ('s', 'p', 'M', None),
   3.506 -"ASN": ('N', 'p', None, None),
   3.507 -"ASM": ('x', 'p', None, None),
   3.508 -"ASL": ('d', 'p', 'M', None),
   3.509 -"AS2": ('d', 'p', 'M', None),
   3.510 -"IMC": ('c', 'd', 'M', None),
   3.511 -"A3P": ('a', 'r', 'M', None),
   3.512 -"CLH": ('k', 'p', 'M', None),
   3.513 -"3DR": ('n', 'd', None, None),
   3.514 -"FRD": ('x', 'p', None, None),
   3.515 -"CLD": ('a', 'p', 'M', None),
   3.516 -"CLE": ('l', 'p', 'M', None),
   3.517 -"PDU": ('n', 'd', None, None),
   3.518 -"CLG": ('k', 'p', 'M', None),
   3.519 -"BBC": ('c', 'p', 'M', None),
   3.520 -"TFO": ('a', 'd', 'M', None),
   3.521 -"CLB": ('a', 'p', 'M', None),
   3.522 -"TLC": ('t', 'd', 'M', None),
   3.523 -"2DM": ('n', 'd', None, None),
   3.524 -"A3A": ('a', 'd', 'M', None),
   3.525 -"PDL": ('x', 'p', None, None),
   3.526 -"3DA": ('a', 'd', 'M', None),
   3.527 -"GT9": ('c', 'p', 'M', None),
   3.528 -"CLV": ('x', 'p', None, None),
   3.529 -"PDD": ('x', 'p', None, None),
   3.530 -"1TQ": ('w', 'p', 'M', None),
   3.531 -"SEM": ('x', 'p', None, None),
   3.532 -"TGP": ('g', 'd', 'M', None),
   3.533 -"OMC": ('c', 'r', 'M', None),
   3.534 -"AEA": ('x', 'p', None, None),
   3.535 -"OMG": ('g', 'r', 'M', None),
   3.536 -"H2U": ('u', 'r', 'M', None),
   3.537 -"A38": ('a', 'd', 'M', None),
   3.538 -"DTY": ('y', 'p', 'M', None),
   3.539 -"PVL": ('x', 'p', None, None),
   3.540 -"ABA": ('a', 'p', 'M', None),
   3.541 -"OMU": ('u', 'r', 'M', None),
   3.542 -"OMT": ('m', 'p', 'M', None),
   3.543 -"CRF": ('x', 'p', None, None),
   3.544 -"FPA": ('f', 'p', 'M', None),
   3.545 -"VMS": ('x', 'p', None, None),
   3.546 -"T11": ('f', 'p', 'M', None),
   3.547 -"2MR": ('r', 'p', 'M', None),
   3.548 -"DNE": ('l', 'p', 'M', None),
   3.549 -"5IC": ('c', 'r', 'M', None),
   3.550 -"RC7": ('x', 'p', None, None),
   3.551 -"BPE": ('c', 'p', 'M', None),
   3.552 -"2MT": ('p', 'p', 'M', None),
   3.553 -"2MU": ('u', 'r', 'M', None),
   3.554 -"DG": ('G', 'd', None, None),
   3.555 -"UCL": ('n', 'd', None, None),
   3.556 -"DA": ('A', 'd', None, None),
   3.557 -"AEI": ('d', 'p', 'M', None),
   3.558 -"N5M": ('c', 'r', 'M', None),
   3.559 -"5IU": ('n', 'd', None, None),
   3.560 -"2MG": ('g', 'r', 'M', None),
   3.561 -"AAR": ('r', 'p', 'M', None),
   3.562 -"DT": ('T', 'd', None, None),
   3.563 -"DU": ('n', 'd', None, None),
   3.564 -"HV8": ('x', 'p', None, None),
   3.565 -"MC1": ('s', 'p', 'M', None),
   3.566 -"2FM": ('m', 'p', 'M', None),
   3.567 -"CYS": ('C', 'p', None, None),
   3.568 -"CYR": ('c', 'p', 'M', None),
   3.569 -"BIL": ('x', 'p', None, None),
   3.570 -"PCA": ('e', 'p', 'M', None),
   3.571 -"GYC": ('x', 'p', None, None),
   3.572 -"SMC": ('c', 'p', 'M', None),
   3.573 -"0AC": ('g', 'p', 'M', None),
   3.574 -"BIF": ('f', 'p', 'M', None),
   3.575 -"2BU": ('a', 'd', 'M', None),
   3.576 -"2BT": ('t', 'd', 'M', None),
   3.577 -"5PC": ('c', 'd', 'M', None),
   3.578 -"S2M": ('t', 'd', 'M', None),
   3.579 -"5MC": ('c', 'r', 'M', None),
   3.580 -"S2P": ('a', 'p', 'M', None),
   3.581 -"C1X": ('k', 'p', 'M', None),
   3.582 -"CYG": ('c', 'p', 'M', None),
   3.583 -"CYF": ('c', 'p', 'M', None),
   3.584 -"5PY": ('t', 'd', 'M', None),
   3.585 -"CYJ": ('x', 'p', None, None),
   3.586 -"BIU": ('i', 'p', 'M', None),
   3.587 -"ENP": ('n', 'r', None, None),
   3.588 -"AS9": ('x', 'p', None, None),
   3.589 -"CYM": ('c', 'p', 'M', None),
   3.590 -"CY3": ('c', 'p', 'M', None),
   3.591 -"DHN": ('v', 'p', 'M', None),
   3.592 -"CY1": ('c', 'p', 'M', None),
   3.593 -"CY0": ('c', 'p', 'M', None),
   3.594 -"SLZ": ('k', 'p', 'M', None),
   3.595 -"CY4": ('c', 'p', 'M', None),
   3.596 -"T5S": ('t', 'd', 'M', None),
   3.597 -"SLR": ('p', 'p', 'M', None),
   3.598 -"IML": ('i', 'p', 'M', None),
   3.599 -"32S": ('x', 'p', None, None),
   3.600 -"CSX": ('c', 'p', 'M', None),
   3.601 -"MFC": ('x', 'p', None, None),
   3.602 -"32T": ('x', 'p', None, None),
   3.603 -"TFT": ('t', 'd', 'M', None),
   3.604 -"MLZ": ('k', 'p', 'M', None),
   3.605 -"DHU": ('u', 'r', 'M', None),
   3.606 -"ASQ": ('d', 'p', 'M', None),
   3.607 -"SLA": ('p', 'p', 'M', None),
   3.608 -"DHP": ('x', 'p', None, None),
   3.609 -"E1X": ('a', 'd', 'M', None),
   3.610 -"XGL": ('g', 'd', 'M', None),
   3.611 -"MCL": ('k', 'p', 'M', None),
   3.612 -"TLB": ('n', 'r', None, None),
   3.613 -"MCG": ('x', 'p', None, None),
   3.614 -"OTB": ('x', 'p', None, None),
   3.615 -"FT6": ('w', 'p', 'M', None),
   3.616 -"XGA": ('n', 'd', None, None),
   3.617 -"AA4": ('a', 'p', 'M', None),
   3.618 -"R": ('a', 'd', 'M', None),
   3.619 -"GFL": ('g', 'd', 'M', None),
   3.620 -"10C": ('c', 'r', 'M', None),
   3.621 -"NRQ": ('x', 'p', None, None),
   3.622 -"XGU": ('g', 'd', 'M', None),
   3.623 -"MCS": ('c', 'p', 'M', None),
   3.624 -"D3": ('n', 'd', None, None),
   3.625 -"PAQ": ('y', 'p', 'M', None),
   3.626 -"2PR": ('g', 'd', 'M', None),
   3.627 -"PAT": ('w', 'p', 'M', None),
   3.628 -"PAU": ('a', 'p', 'M', None),
   3.629 -"CH": ('c', 'r', 'M', None),
   3.630 -"GCM": ('x', 'p', None, None),
   3.631 -"CSZ": ('c', 'p', 'M', None),
   3.632 -"GMS": ('g', 'd', 'M', None),
   3.633 -"HBN": ('h', 'p', 'M', None),
   3.634 -"TQQ": ('w', 'p', 'M', None),
   3.635 -"2PI": ('x', 'p', None, None),
   3.636 -"GSU": ('e', 'p', 'M', None),
   3.637 -"CSI": ('g', 'p', 'M', None),
   3.638 -"200": ('f', 'p', 'M', None),
   3.639 -"EXY": ('l', 'p', 'M', None),
   3.640 -"12A": ('a', 'r', 'M', None),
   3.641 -"DCL": ('x', 'p', None, None),
   3.642 -"UNK": ('x', 'p', None, None),
   3.643 -"DCI": ('x', 'p', None, None),
   3.644 -"DCG": ('g', 'd', 'M', None),
   3.645 -"4HT": ('w', 'p', 'M', None),
   3.646 -"CSU": ('c', 'p', 'M', None),
   3.647 -"HSO": ('h', 'p', 'M', None),
   3.648 -"HSL": ('s', 'p', 'M', None),
   3.649 -"TLN": ('n', 'd', None, None),
   3.650 -"TZB": ('x', 'p', None, None),
   3.651 -"BTC": ('c', 'p', 'M', None),
   3.652 -"DCY": ('c', 'p', 'M', None),
   3.653 -"DCT": ('c', 'd', 'M', None),
   3.654 -"4BF": ('y', 'p', 'M', None),
   3.655 -"C2L": ('c', 'r', 'M', None),
   3.656 -"RIA": ('a', 'r', 'M', None),
   3.657 -"IAM": ('a', 'p', 'M', None),
   3.658 -"B1F": ('f', 'p', 'M', None),
   3.659 -"GPN": ('g', 'd', 'M', None),
   3.660 -"C25": ('c', 'r', 'M', None),
   3.661 -"N2C": ('x', 'p', None, None),
   3.662 -"HAC": ('a', 'p', 'M', None),
   3.663 -"MEG": ('e', 'p', 'M', None),
   3.664 -"THO": ('x', 'p', None, None),
   3.665 -"BNO": ('x', 'p', None, None),
   3.666 -"125": ('u', 'r', 'M', None),
   3.667 -"126": ('u', 'r', 'M', None),
   3.668 -"127": ('u', 'r', 'M', None),
   3.669 -"128": ('n', 'r', None, None),
   3.670 -"X": ('g', 'd', 'M', None),
   3.671 -"HS8": ('h', 'p', 'M', None),
   3.672 -"NAM": ('a', 'p', 'M', None),
   3.673 -"THC": ('t', 'p', 'M', None),
   3.674 -"HRP": ('w', 'p', 'M', None),
   3.675 -"MNV": ('v', 'p', 'M', None),
   3.676 -"PHI": ('f', 'p', 'M', None),
   3.677 -"PHM": ('f', 'p', 'M', None),
   3.678 -"PHL": ('f', 'p', 'M', None),
   3.679 -"CSW": ('c', 'p', 'M', None),
   3.680 -"ZAD": ('a', 'r', 'M', None),
   3.681 -"OIC": ('x', 'p', None, None),
   3.682 -"PHE": ('F', 'p', None, None),
   3.683 -"PHD": ('d', 'p', 'M', None),
   3.684 -"SYS": ('c', 'p', 'M', None),
   3.685 -"BG1": ('s', 'p', 'M', None),
   3.686 -"NIY": ('y', 'p', 'M', None),
   3.687 -"MTR": ('t', 'd', 'M', None),
   3.688 -"OAS": ('s', 'p', 'M', None),
   3.689 -"BRU": ('n', 'd', None, None),
   3.690 -"NIT": ('x', 'p', None, None),
   3.691 -"OIP": ('n', 'd', None, None),
   3.692 -"0A0": ('d', 'p', 'M', None),
   3.693 -"0A1": ('y', 'p', 'M', None),
   3.694 -"0A2": ('k', 'p', 'M', None),
   3.695 -"CRQ": ('x', 'p', None, None),
   3.696 -"CRW": ('x', 'p', None, None),
   3.697 -"CRU": ('e', 'p', 'M', None),
   3.698 -"DMH": ('n', 'p', 'M', None),
   3.699 -"PRR": ('a', 'p', 'M', None),
   3.700 -"CRX": ('x', 'p', None, None),
   3.701 -"DMK": ('d', 'p', 'M', None),
   3.702 -"MHL": ('l', 'p', 'M', None),
   3.703 -"LAA": ('d', 'p', 'M', None),
   3.704 -"0ZM": ('x', 'p', None, None),
   3.705 -"DMT": ('x', 'p', None, None),
   3.706 -"CRG": ('x', 'p', None, None),
   3.707 -"0ZJ": ('x', 'p', None, None),
   3.708 -"0ZE": ('x', 'p', None, None),
   3.709 -"CRK": ('x', 'p', None, None),
   3.710 -"TPH": ('x', 'p', None, None),
   3.711 -"1PI": ('x', 'p', None, None),
   3.712 -"CRO": ('x', 'p', None, None),
   3.713 -"0ZC": ('x', 'p', None, None),
   3.714 -"MIS": ('s', 'p', 'M', None),
   3.715 -"C36": ('c', 'd', 'M', None),
   3.716 -"NFA": ('f', 'p', 'M', None),
   3.717 -"PU": ('a', 'r', 'M', None),
   3.718 -"G7M": ('g', 'r', 'M', None),
   3.719 -"S6G": ('g', 'd', 'M', None),
   3.720 -"LDH": ('k', 'p', 'M', None),
   3.721 -"TCP": ('t', 'd', 'M', None),
   3.722 -"TCQ": ('x', 'p', None, None),
   3.723 -"8AN": ('a', 'r', 'M', None),
   3.724 -"BUC": ('c', 'p', 'M', None),
   3.725 -"C32": ('c', 'd', 'M', None),
   3.726 -"7MG": ('g', 'r', 'M', None),
   3.727 -"BUG": ('l', 'p', 'M', None),
   3.728 -"LEU": ('L', 'p', None, None),
   3.729 -"MHO": ('m', 'p', 'M', None),
   3.730 -"C": ('C', 'r', None, None),
   3.731 -"DDX": ('n', 'd', None, None),
   3.732 -"OPR": ('r', 'p', 'M', None),
   3.733 -"FOX": ('g', 'd', 'M', None),
   3.734 -"MTY": ('y', 'p', 'M', None),
   3.735 -"TC1": ('c', 'd', 'M', None),
   3.736 -"FOE": ('c', 'p', 'M', None),
   3.737 -"MF3": ('x', 'p', None, None),
   3.738 -"DDN": ('n', 'd', None, None),
   3.739 -"FME": ('m', 'p', 'M', None),
   3.740 -"EFC": ('c', 'p', 'M', None),
   3.741 -"DDG": ('g', 'd', 'M', None),
   3.742 -"MHS": ('h', 'p', 'M', None),
   3.743 -"DDE": ('h', 'p', 'M', None),
   3.744 -"CR2": ('x', 'p', None, None),
   3.745 -"DSN": ('s', 'p', 'M', None),
   3.746 -"PSH": ('h', 'p', 'M', None),
   3.747 -"6MI": ('n', 'd', None, None),
   3.748 -"CR7": ('x', 'p', None, None),
   3.749 -"0AV": ('a', 'd', 'M', None),
   3.750 -"CR5": ('g', 'p', 'M', None),
   3.751 -"MOR": ('x', 'p', None, None),
   3.752 -"CR8": ('x', 'p', None, None),
   3.753 -"PSA": ('f', 'p', 'M', None),
   3.754 -"ANI": ('x', 'p', None, None),
   3.755 -"NP3": ('n', 'd', None, None),
   3.756 -"DM0": ('k', 'p', 'M', None),
   3.757 -"0AA": ('v', 'p', 'M', None),
   3.758 -"0AB": ('v', 'p', 'M', None),
   3.759 -"FOG": ('f', 'p', 'M', None),
   3.760 -"0AD": ('g', 'd', 'M', None),
   3.761 -"LCC": ('n', 'd', None, None),
   3.762 -"NVA": ('v', 'p', 'M', None),
   3.763 -"0AG": ('l', 'p', 'M', None),
   3.764 -"0AH": ('s', 'p', 'M', None),
   3.765 -"1MA": ('a', 'r', 'M', None),
   3.766 -"0AK": ('d', 'p', 'M', None),
   3.767 -"0AM": ('a', 'd', 'M', None),
   3.768 -"PST": ('t', 'd', 'M', None),
   3.769 -"PSU": ('u', 'r', 'M', None),
   3.770 -"1ZX": ('x', 'p', None, None),
   3.771 -"ILG": ('e', 'p', 'M', None),
   3.772 -"ILE": ('I', 'p', None, None),
   3.773 -"5CM": ('c', 'd', 'M', None),
   3.774 -"D11": ('t', 'p', 'M', None),
   3.775 -"UMS": ('n', 'd', None, None),
   3.776 -"X9Q": ('x', 'p', None, None),
   3.777 -"FGP": ('s', 'p', 'M', None),
   3.778 -"DVA": ('v', 'p', 'M', None),
   3.779 -"5ZA": ('x', 'p', None, None),
   3.780 -"BCC": ('x', 'p', None, None),
   3.781 -"TMB": ('t', 'p', 'M', None),
   3.782 -"3AH": ('h', 'p', 'M', None),
   3.783 -"BCX": ('c', 'p', 'M', None),
   3.784 -"KPI": ('k', 'p', 'M', None),
   3.785 -"A2L": ('a', 'r', 'M', None),
   3.786 -"A2M": ('a', 'r', 'M', None),
   3.787 -"ILX": ('i', 'p', 'M', None),
   3.788 -"PEC": ('c', 'p', 'M', None),
   3.789 -"IGL": ('g', 'p', 'M', None),
   3.790 -"OLE": ('x', 'p', None, None),
   3.791 -"LPG": ('g', 'p', 'M', None),
   3.792 -"DI": ('n', 'd', None, None),
   3.793 -"ALO": ('t', 'p', 'M', None),
   3.794 -"OLT": ('t', 'p', 'M', None),
   3.795 -"TBM": ('t', 'p', 'M', None),
   3.796 -"2AU": ('u', 'r', 'M', None),
   3.797 -"LED": ('l', 'p', 'M', None),
   3.798 -"OLZ": ('s', 'p', 'M', None),
   3.799 -"C6C": ('c', 'p', 'M', None),
   3.800 -"IEY": ('x', 'p', None, None),
   3.801 -"HEU": ('n', 'd', None, None),
   3.802 -"SRA": ('a', 'r', 'M', None),
   3.803 -"ZTH": ('n', 'r', None, None),
   3.804 -"NDN": ('n', 'd', None, None),
   3.805 -"C66": ('x', 'p', None, None),
   3.806 -"6CW": ('w', 'p', 'M', None),
   3.807 -"CR0": ('x', 'p', None, None),
   3.808 -"DBM": ('n', 'r', None, None),
   3.809 -"N": ('n', 'r', None, None),
   3.810 -"N6G": ('g', 'r', 'M', None),
   3.811 -"ALC": ('a', 'p', 'M', None),
   3.812 -"S1H": ('s', 'p', 'M', None),
   3.813 -"FZN": ('k', 'p', 'M', None),
   3.814 -"0SP": ('a', 'd', 'M', None),
   3.815 -"OBS": ('x', 'p', None, None),
   3.816 -"2NT": ('t', 'd', 'M', None),
   3.817 -"K1R": ('c', 'p', 'M', None),
   3.818 -"NMT": ('t', 'd', 'M', None),
   3.819 -"A23": ('a', 'r', 'M', None),
   3.820 -"D1P": ('n', 'd', None, None),
   3.821 -"NMS": ('t', 'd', 'M', None),
   3.822 -"CIR": ('r', 'p', 'M', None),
   3.823 -"CH6": ('x', 'p', None, None),
   3.824 -"BJH": ('x', 'p', None, None),
   3.825 -"YYG": ('g', 'r', 'M', None),
   3.826 -"2MA": ('a', 'r', 'M', None),
   3.827 -"NMC": ('g', 'p', 'M', None),
   3.828 -"0AY": ('x', 'p', None, None),
   3.829 -"UFP": ('n', 'd', None, None),
   3.830 -"SET": ('s', 'p', 'M', None),
   3.831 -"UFR": ('n', 'd', None, None),
   3.832 -"NMM": ('r', 'p', 'M', None),
   3.833 -"UFT": ('n', 'd', None, None),
   3.834 -"0AZ": ('p', 'p', 'M', None),
   3.835 -"5IT": ('t', 'd', 'M', None),
   3.836 -"DIL": ('i', 'p', 'M', None),
   3.837 -"FHL": ('x', 'p', None, None),
   3.838 -"0Z6": ('x', 'p', None, None),
   3.839 -"AR4": ('e', 'p', 'M', None),
   3.840 -"EIT": ('t', 'd', 'M', None),
   3.841 -"SD2": ('x', 'p', None, None),
   3.842 -"CH7": ('x', 'p', None, None),
   3.843 -"N5I": ('n', 'd', None, None),
   3.844 -"5AT": ('t', 'd', 'M', None),
   3.845 -"LOL": ('x', 'p', None, None),
   3.846 -"HYP": ('p', 'p', 'M', None),
   3.847 -"IYT": ('t', 'p', 'M', None),
   3.848 -"LOV": ('x', 'p', None, None),
   3.849 -"LCX": ('k', 'p', 'M', None),
   3.850 -"GND": ('x', 'p', None, None),
   3.851 -"GNE": ('n', 'd', None, None),
   3.852 -"FHU": ('u', 'r', 'M', None),
   3.853 -"C12": ('x', 'p', None, None),
   3.854 -"DIV": ('v', 'p', 'M', None),
   3.855 -"T6A": ('a', 'r', 'M', None),
   3.856 -"DIR": ('r', 'p', 'M', None),
   3.857 -"AIB": ('a', 'p', 'M', None),
   3.858 -"SOC": ('c', 'p', 'M', None),
   3.859 -"NSK": ('x', 'p', None, None),
   3.860 -"LCG": ('g', 'd', 'M', None),
   3.861 -"MT2": ('m', 'p', 'M', None),
   3.862 -"GAU": ('e', 'p', 'M', None),
   3.863 -"PPH": ('l', 'p', 'M', None),
   3.864 -"PPN": ('f', 'p', 'M', None),
   3.865 -"1MG": ('g', 'r', 'M', None),
   3.866 -"GAO": ('g', 'r', 'M', None),
   3.867 -"FAI": ('n', 'r', None, None),
   3.868 -"PPW": ('g', 'd', 'M', None),
   3.869 -"2ML": ('l', 'p', 'M', None),
   3.870 -"MBZ": ('n', 'd', None, None),
   3.871 -"TPN": ('t', 'd', 'M', None),
   3.872 -"SOY": ('s', 'p', 'M', None),
   3.873 -"CBV": ('c', 'r', 'M', None),
   3.874 -"MBQ": ('y', 'p', 'M', None),
   3.875 -"G31": ('g', 'd', 'M', None),
   3.876 -"HTI": ('c', 'p', 'M', None),
   3.877 -"XX1": ('k', 'p', 'M', None),
   3.878 -"6OG": ('g', 'd', 'M', None),
   3.879 -"0AF": ('w', 'p', 'M', None),
   3.880 -"G36": ('g', 'd', 'M', None),
   3.881 -"I": ('n', 'r', None, None),
   3.882 -"YOF": ('y', 'p', 'M', None),
   3.883 -"HPQ": ('f', 'p', 'M', None),
   3.884 -"LCA": ('a', 'r', 'M', None),
   3.885 -"IU": ('u', 'r', 'M', None),
   3.886 -"CYQ": ('c', 'p', 'M', None),
   3.887 -"M0H": ('c', 'p', 'M', None),
   3.888 -"FA5": ('n', 'r', None, None),
   3.889 -"LVG": ('g', 'p', 'M', None),
   3.890 -"HPH": ('x', 'p', None, None),
   3.891 -"IRN": ('n', 'r', None, None),
   3.892 -"Y": ('a', 'd', 'M', None),
   3.893 -"IC": ('c', 'r', 'M', None),
   3.894 -"4DP": ('w', 'p', 'M', None),
   3.895 -"HPE": ('f', 'p', 'M', None),
   3.896 -"PR5": ('a', 'r', 'M', None),
   3.897 -"CDE": ('x', 'p', None, None),
   3.898 -"SDP": ('s', 'p', 'M', None),
   3.899 -"GN7": ('g', 'd', 'M', None),
   3.900 -"ARV": ('x', 'p', None, None),
   3.901 -"2AT": ('t', 'd', 'M', None),
   3.902 -"T66": ('x', 'p', None, None),
   3.903 -"LCK": ('k', 'p', 'M', None),
   3.904 -"EHP": ('f', 'p', 'M', None),
   3.905 -"CHP": ('g', 'p', 'M', None),
   3.906 -"HY3": ('p', 'p', 'M', None),
   3.907 -"THX": ('n', 'd', None, None),
   3.908 -"LCH": ('n', 'd', None, None),
   3.909 -"4MF": ('n', 'd', None, None),
   3.910 -"DTR": ('w', 'p', 'M', None),
   3.911 -"MOD": ('x', 'p', None, None),
   3.912 -"4MM": ('x', 'p', None, None),
   3.913 -"ARO": ('r', 'p', 'M', None),
   3.914 -"GSS": ('g', 'd', 'M', None),
   3.915 -"DRZ": ('n', 'd', None, None),
   3.916 -"CCY": ('x', 'p', None, None),
   3.917 -"TPQ": ('y', 'p', 'M', None),
   3.918 -"5CS": ('c', 'p', 'M', None),
   3.919 -"4OC": ('c', 'r', 'M', None),
   3.920 -"DRP": ('n', 'd', None, None),
   3.921 -"BOR": ('r', 'p', 'M', None),
   3.922 -"DRT": ('t', 'd', 'M', None),
   3.923 -"CCS": ('c', 'p', 'M', None),
   3.924 -"TS": ('n', 'd', None, None),
   3.925 -"MME": ('m', 'p', 'M', None),
   3.926 -"CCL": ('k', 'p', 'M', None),
   3.927 -"GSC": ('g', 'p', 'M', None),
   3.928 -"DRM": ('n', 'd', None, None),
   3.929 -"4FB": ('p', 'p', 'M', None),
   3.930 -"GYS": ('x', 'p', None, None),
   3.931 -"NAL": ('a', 'p', 'M', None),
   3.932 -"6MT": ('a', 'r', 'M', None),
   3.933 -"CCC": ('c', 'r', 'M', None),
   3.934 -"UMP": ('u', 'r', 'M', None),
   3.935 -"ML3": ('k', 'p', 'M', None),
   3.936 -"LPD": ('p', 'p', 'M', None),
   3.937 -"OHI": ('h', 'p', 'M', None),
   3.938 -"LYR": ('k', 'p', 'M', None),
   3.939 -"LYS": ('K', 'p', None, None),
   3.940 -"LYX": ('k', 'p', 'M', None),
   3.941 -"CYA": ('c', 'p', 'M', None),
   3.942 -"LYZ": ('k', 'p', 'M', None),
   3.943 -"ODP": ('n', 'r', None, None),
   3.944 -"C3Y": ('c', 'p', 'M', None),
   3.945 -"POM": ('p', 'p', 'M', None),
   3.946 -"LYM": ('k', 'p', 'M', None),
   3.947 -"LYN": ('k', 'p', 'M', None),
   3.948 -"GLQ": ('e', 'p', 'M', None),
   3.949 -"PIV": ('x', 'p', None, None),
   3.950 -"OHS": ('d', 'p', 'M', None),
   3.951 -"X9A": ('x', 'p', None, None),
   3.952 -"SMT": ('n', 'r', None, None),
   3.953 -"XAR": ('n', 'd', None, None),
   3.954 -"CYD": ('c', 'p', 'M', None),
   3.955 -"SVA": ('s', 'p', 'M', None),
   3.956 -"5HT": ('t', 'd', 'M', None),
   3.957 -"5HU": ('n', 'd', None, None),
   3.958 -"XAL": ('a', 'd', 'M', None),
   3.959 -"T": ('n', 'd', None, None),
   3.960 -"5HP": ('e', 'p', 'M', None),
   3.961 -"4SC": ('c', 'd', 'M', None),
   3.962 -"CQR": ('x', 'p', None, None),
   3.963 -"XAD": ('a', 'd', 'M', None),
   3.964 -"XAE": ('n', 'd', None, None),
   3.965 -"TYI": ('y', 'p', 'M', None),
   3.966 -"VLL": ('x', 'p', None, None),
   3.967 -"2BD": ('n', 'd', None, None),
   3.968 -"IIC": ('x', 'p', None, None),
   3.969 -"A66": ('x', 'p', None, None),
   3.970 -"2EG": ('g', 'd', 'M', None),
   3.971 -"2SA": ('n', 'r', None, None),
   3.972 -"2CO": ('c', 'p', 'M', None),
   3.973 -"TMD": ('t', 'p', 'M', None),
   3.974 -"CXM": ('m', 'p', 'M', None),
   3.975 -"ALA": ('A', 'p', None, None),
   3.976 -"GOM": ('g', 'r', 'M', None),
   3.977 -"2VA": ('v', 'p', 'M', None),
   3.978 -"5BU": ('u', 'r', 'M', None),
   3.979 -"IGU": ('g', 'd', 'M', None),
   3.980 -"MLE": ('l', 'p', 'M', None),
   3.981 -"T39": ('n', 'r', None, None),
   3.982 -"FLT": ('y', 'p', 'M', None),
   3.983 -"U37": ('u', 'r', 'M', None),
   3.984 -"U36": ('u', 'r', 'M', None),
   3.985 -"DHL": ('x', 'p', None, None),
   3.986 -"U33": ('n', 'd', None, None),
   3.987 -"TIH": ('a', 'p', 'M', None),
   3.988 -"SMF": ('f', 'p', 'M', None),
   3.989 -"FLA": ('a', 'p', 'M', None),
   3.990 -"HOB": ('n', 'd', None, None),
   3.991 -"FLE": ('l', 'p', 'M', None),
   3.992 -"VDL": ('x', 'p', None, None),
   3.993 -"A1P": ('n', 'd', None, None),
   3.994 -"0G6": ('x', 'p', None, None),
   3.995 -"LLY": ('k', 'p', 'M', None),
   3.996 -"DHI": ('h', 'p', 'M', None),
   3.997 -"RON": ('x', 'p', None, None),
   3.998 -"PTH": ('y', 'p', 'M', None),
   3.999 -"T32": ('t', 'd', 'M', None),
  3.1000 -"LLP": ('k', 'p', 'M', None),
  3.1001 -"MNU": ('u', 'r', 'M', None),
  3.1002 -"PTA": ('x', 'p', None, None),
  3.1003 -"TZO": ('x', 'p', None, None),
  3.1004 -"DBZ": ('a', 'p', 'M', None),
  3.1005 -"MNL": ('l', 'p', 'M', None),
  3.1006 -"APN": ('a', 'd', 'M', None),
  3.1007 -"OCY": ('c', 'p', 'M', None),
  3.1008 -"BCS": ('c', 'p', 'M', None),
  3.1009 -"OSE": ('s', 'p', 'M', None),
  3.1010 -"3TY": ('x', 'p', None, None),
  3.1011 -"SCS": ('c', 'p', 'M', None),
  3.1012 -"TYY": ('y', 'p', 'M', None),
  3.1013 -"0A5": ('n', 'p', 'M', None),
  3.1014 -"PBT": ('n', 'd', None, None),
  3.1015 -"CWR": ('s', 'p', 'M', None),
  3.1016 -"TRO": ('w', 'p', 'M', None),
  3.1017 -"TRN": ('w', 'p', 'M', None),
  3.1018 -"PYO": ('u', 'r', 'M', None),
  3.1019 -"ZGU": ('g', 'r', 'M', None),
  3.1020 -"TRG": ('k', 'p', 'M', None),
  3.1021 -"TRF": ('w', 'p', 'M', None),
  3.1022 -"R1F": ('c', 'p', 'M', None),
  3.1023 -"M5M": ('c', 'r', 'M', None),
  3.1024 -"PBF": ('f', 'p', 'M', None),
  3.1025 -"R1B": ('c', 'p', 'M', None),
  3.1026 -"DHA": ('a', 'p', 'M', None),
  3.1027 -"YCM": ('c', 'p', 'M', None),
  3.1028 -"R1A": ('c', 'p', 'M', None),
  3.1029 -"TRQ": ('w', 'p', 'M', None),
  3.1030 -"TRP": ('W', 'p', None, None),
  3.1031 -"T23": ('n', 'r', None, None),
  3.1032 -"TRW": ('w', 'p', 'M', None),
  3.1033 -"4FW": ('w', 'p', 'M', None),
  3.1034 -"IVA": ('x', 'p', None, None),
  3.1035 -"VOL": ('x', 'p', None, None),
  3.1036 -"6HT": ('t', 'd', 'M', None),
  3.1037 -"6HG": ('g', 'd', 'M', None),
  3.1038 -"MPJ": ('x', 'p', None, None),
  3.1039 -"C43": ('c', 'r', 'M', None),
  3.1040 -"MPH": ('x', 'p', None, None),
  3.1041 -"AVN": ('x', 'p', None, None),
  3.1042 -"M3L": ('k', 'p', 'M', None),
  3.1043 -"C5C": ('c', 'p', 'M', None),
  3.1044 -"HTR": ('w', 'p', 'M', None),
  3.1045 -"MPQ": ('g', 'p', 'M', None),
  3.1046 -"ESC": ('m', 'p', 'M', None),
  3.1047 -"3ME": ('n', 'd', None, None),
  3.1048 -"TYS": ('y', 'p', 'M', None),
  3.1049 -"6HA": ('a', 'd', 'M', None),
  3.1050 -"KCX": ('k', 'p', 'M', None),
  3.1051 -"6HC": ('c', 'd', 'M', None),
  3.1052 -"AVC": ('a', 'r', 'M', None),
  3.1053 -"GTP": ('g', 'r', 'M', None),
  3.1054 -"ACB": ('d', 'p', 'M', None),
  3.1055 -"2SI": ('x', 'p', None, None),
  3.1056 -"NEM": ('h', 'p', 'M', None),
  3.1057 -"N7P": ('p', 'p', 'M', None),
  3.1058 -"SUB": ('x', 'p', None, None),
  3.1059 -"2AG": ('g', 'p', 'M', None),
  3.1060 -"SUN": ('s', 'p', 'M', None),
  3.1061 -"SUI": ('x', 'p', None, None),
  3.1062 -"ACL": ('r', 'p', 'M', None),
  3.1063 -"SUR": ('u', 'r', 'M', None),
  3.1064 -"BVP": ('n', 'd', None, None),
  3.1065 -"HFA": ('x', 'p', None, None),
  3.1066 -"LET": ('x', 'p', None, None),
  3.1067 -"0A8": ('c', 'p', 'M', None),
  3.1068 -"D4P": ('x', 'p', None, None),
  3.1069 -"2OT": ('t', 'd', 'M', None),
  3.1070 -"NEP": ('h', 'p', 'M', None),
  3.1071 -"PMT": ('c', 'r', 'M', None),
  3.1072 -"2DT": ('t', 'd', 'M', None),
  3.1073 -"MLY": ('k', 'p', 'M', None),
  3.1074 -"AD2": ('a', 'd', 'M', None),
  3.1075 -"CGA": ('e', 'p', 'M', None),
  3.1076 -"NBQ": ('y', 'p', 'M', None),
  3.1077 -"ODS": ('x', 'p', None, None),
  3.1078 -"7GU": ('g', 'd', 'M', None),
  3.1079 -"SAH": ('c', 'p', 'M', None),
  3.1080 -"2DF": ('n', 'd', None, None),
  3.1081 -"2DA": ('a', 'd', 'M', None),
  3.1082 -"PCC": ('p', 'p', 'M', None),
  3.1083 -"ODA": ('x', 'p', None, None),
  3.1084 -"2DO": ('x', 'p', None, None),
  3.1085 -"CGU": ('e', 'p', 'M', None),
  3.1086 -"L2A": ('x', 'p', None, None),
  3.1087 -"DNG": ('l', 'p', 'M', None),
  3.1088 -"UMA": ('a', 'p', 'M', None),
  3.1089 -"PM3": ('f', 'p', 'M', None),
  3.1090 -"DNM": ('l', 'p', 'M', None),
  3.1091 -"DNL": ('k', 'p', 'M', None),
  3.1092 -"GMA": ('e', 'p', 'M', None),
  3.1093 -"1LU": ('l', 'p', 'M', None),
  3.1094 -"CDV": ('x', 'p', None, None),
  3.1095 -"ADX": ('n', 'r', None, None),
  3.1096 -"LKC": ('n', 'd', None, None),
  3.1097 -"ADD": ('x', 'p', None, None),
  3.1098 -"CUC": ('x', 'p', None, None),
  3.1099 -"DNP": ('a', 'p', 'M', None),
  3.1100 -"DNS": ('k', 'p', 'M', None),
  3.1101 -"DNR": ('c', 'd', 'M', None),
  3.1102 -"CG1": ('g', 'r', 'M', None),
  3.1103 -"Z": ('c', 'd', 'M', None),
  3.1104 -"GMU": ('n', 'd', None, None),
  3.1105 -"C99": ('x', 'p', None, None),
  3.1106 -"GDP": ('g', 'r', 'M', None),
  3.1107 -"MED": ('m', 'p', 'M', None),
  3.1108 -"GDR": ('g', 'd', 'M', None),
  3.1109 -"MEA": ('f', 'p', 'M', None),
  3.1110 -"LEF": ('l', 'p', 'M', None),
  3.1111 -"LEH": ('l', 'p', 'M', None),
  3.1112 -"CHG": ('x', 'p', None, None),
  3.1113 -"MEN": ('n', 'p', 'M', None),
  3.1114 -"G42": ('g', 'd', 'M', None),
  3.1115 -"5CG": ('g', 'd', 'M', None),
  3.1116 -"T31": ('u', 'r', 'M', None),
  3.1117 -"MEU": ('g', 'p', 'M', None),
  3.1118 -"MET": ('M', 'p', None, None),
  3.1119 -"U34": ('u', 'r', 'M', None),
  3.1120 -"MEQ": ('q', 'p', 'M', None),
  3.1121 -"MEP": ('u', 'r', 'M', None),
  3.1122 -"B1P": ('n', 'd', None, None),
  3.1123 -"IG": ('g', 'r', 'M', None),
  3.1124 -"T2S": ('n', 'r', None, None),
  3.1125 -"ZHP": ('n', 'r', None, None),
  3.1126 -"AZK": ('k', 'p', 'M', None),
  3.1127 -"U31": ('u', 'r', 'M', None),
  3.1128 -"LTR": ('w', 'p', 'M', None),
  3.1129 -"MSU": ('x', 'p', None, None),
  3.1130 -"1PA": ('f', 'p', 'M', None),
  3.1131 -"MSP": ('a', 'p', 'M', None),
  3.1132 -"MSO": ('m', 'p', 'M', None),
  3.1133 -"AZY": ('y', 'p', 'M', None),
  3.1134 -"ENA": ('n', 'r', None, None),
  3.1135 -"MSL": ('m', 'p', 'M', None),
  3.1136 -"PF5": ('f', 'p', 'M', None),
  3.1137 -"1PR": ('n', 'r', None, None),
  3.1138 -"3AR": ('x', 'p', None, None),
  3.1139 -"MSE": ('m', 'p', 'M', None),
  3.1140 -"AZS": ('s', 'p', 'M', None),
  3.1141 -"MSA": ('g', 'p', 'M', None),
  3.1142 -"R2P": ('x', 'p', None, None),
  3.1143 -"CAR": ('c', 'd', 'M', None),
  3.1144 -"DAB": ('a', 'p', 'M', None),
  3.1145 -"OTY": ('y', 'p', 'M', None),
  3.1146 -"DAL": ('a', 'p', 'M', None),
  3.1147 -"DAM": ('x', 'p', None, None),
  3.1148 -"TT": ('n', 'd', None, None),
  3.1149 -"IOY": ('f', 'p', 'M', None),
  3.1150 -"DAH": ('f', 'p', 'M', None),
  3.1151 -"8MG": ('g', 'd', 'M', None),
  3.1152 -"BZG": ('n', 'd', None, None),
  3.1153 -"ARM": ('r', 'p', 'M', None),
  3.1154 -"DAR": ('r', 'p', 'M', None),
  3.1155 -"DAS": ('d', 'p', 'M', None),
  3.1156 -"MCY": ('c', 'd', 'M', None),
  3.1157 -"SGB": ('s', 'p', 'M', None),
  3.1158 -"YRR": ('n', 'd', None, None),
  3.1159 -"NF2": ('n', 'r', None, None),
  3.1160 -"CNU": ('u', 'r', 'M', None),
  3.1161 -"E": ('a', 'd', 'M', None),
  3.1162 -"AA3": ('a', 'p', 'M', None),
  3.1163 -"GVL": ('x', 'p', None, None),
  3.1164 -"TNR": ('s', 'p', 'M', None),
  3.1165 -"HOL": ('n', 'd', None, None),
  3.1166 -"PTM": ('y', 'p', 'M', None),
  3.1167 -"AS": ('a', 'd', 'M', None),
  3.1168 -"U": ('U', 'r', None, None),
  3.1169 -"ESB": ('y', 'p', 'M', None),
  3.1170 -"DA2": ('x', 'p', None, None),
  3.1171 -"TNB": ('c', 'p', 'M', None),
  3.1172 -"RMP": ('a', 'd', 'M', None),
  3.1173 -"SME": ('m', 'p', 'M', None),
  3.1174 -"NDF": ('f', 'p', 'M', None),
  3.1175 -"BOE": ('t', 'd', 'M', None),
  3.1176 -"LGP": ('g', 'd', 'M', None),
  3.1177 -"SCH": ('c', 'p', 'M', None),
  3.1178 -"T2T": ('n', 'd', None, None),
  3.1179 -"PFF": ('f', 'p', 'M', None),
  3.1180 -"PCE": ('x', 'p', None, None),
  3.1181 -"DFO": ('x', 'p', None, None),
  3.1182 -"DXN": ('n', 'd', None, None),
  3.1183 -"KGC": ('k', 'p', 'M', None),
  3.1184 -"US1": ('n', 'd', None, None),
  3.1185 -"HHK": ('x', 'p', None, None),
  3.1186 -"DXD": ('n', 'd', None, None),
  3.1187 -"ZCY": ('c', 'r', 'M', None),
  3.1188 -"IEL": ('k', 'p', 'M', None),
  3.1189 -}

     4.1 --- a/blocks3d/AlignmentSeq.py	Tue Feb 22 17:16:56 2011 +0300
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,490 +0,0 @@
     4.4 -# -*- coding: cp1251 -*-
     4.5 -
     4.6 -#####################################################################
     4.7 -# Класс функций, необходимых для выравнивания последовательностей
     4.8 -#####################################################################
     4.9 -
    4.10 -
    4.11 -
    4.12 -
    4.13 -"""
    4.14 -Usage:
    4.15 -
    4.16 -seq_in=[]
    4.17 -seq_in.append("SSNAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM")
    4.18 -seq_in.append("NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNAA")
    4.19 -l=AlignmentSeq(seq_in)
    4.20 -for t in l.new_sequences:
    4.21 -    print t
    4.22 -
    4.23 -"""
    4.24 -
    4.25 -
    4.26 -
    4.27 -
    4.28 -import AlignmentSeq_config
    4.29 -
    4.30 -vars().update(vars(AlignmentSeq_config))
    4.31 -
    4.32 -class AlignmentSeq:
    4.33 -
    4.34 -
    4.35 -
    4.36 -
    4.37 -
    4.38 -
    4.39 -
    4.40 -    def __init__(self,sequences):
    4.41 -
    4.42 -        """
    4.43 -        главная функция класса
    4.44 -        принимает любое число последовательностей
    4.45 -        выравнивает их, причем выравненные последовательности помещаются в список new_sequences
    4.46 -        словари { [новый номер атома] => старый номер атома } помещаются в список connections
    4.47 -
    4.48 -        self.common - список [позиция] => [список букв, список номеров цепей]
    4.49 -        """
    4.50 -
    4.51 -
    4.52 -        self.old_sequences = []
    4.53 -        self.new_sequences = []
    4.54 -        self.connections = []
    4.55 -
    4.56 -        for seq in sequences:
    4.57 -            self.old_sequences.append(seq.upper().replace('-','').replace(' ',''))
    4.58 -
    4.59 -
    4.60 -        self.common=[]
    4.61 -
    4.62 -
    4.63 -        for i in xrange(0,len(self.old_sequences)):
    4.64 -             self.unite(i)
    4.65 -
    4.66 -
    4.67 -        for i in xrange(0,len(self.old_sequences)):
    4.68 -             self.lining(i)
    4.69 -##
    4.70 -##        return
    4.71 -##
    4.72 -##        self.correct()
    4.73 -##
    4.74 -##
    4.75 -##
    4.76 -##
    4.77 -##        for str1 in self.new_sequences:
    4.78 -##            self.connections.append(self.recount(str1))
    4.79 -
    4.80 -
    4.81 -
    4.82 -
    4.83 -
    4.84 -
    4.85 -
    4.86 -    def cost(self,a1,a2):
    4.87 -
    4.88 -        """
    4.89 -        функция принимает две аминокислоты и возвращает цену
    4.90 -        их выравнивания
    4.91 -        """
    4.92 -
    4.93 -
    4.94 -        global matrix
    4.95 -        global gaps
    4.96 -
    4.97 -        a1=a1.upper()
    4.98 -        a2=a2.upper()
    4.99 -
   4.100 -        if (matrix.has_key(a1)):
   4.101 -            if (matrix[a1].has_key(a2)):
   4.102 -                return matrix[a1][a2]
   4.103 -
   4.104 -        return gaps[0]
   4.105 -
   4.106 -
   4.107 -
   4.108 -
   4.109 -
   4.110 -
   4.111 -
   4.112 -
   4.113 -
   4.114 -
   4.115 -    def gap_cost(self,int1):
   4.116 -        """
   4.117 -        функция принимает число существующих ГЭПОВ и
   4.118 -        возвращает штраф, который возьмется при добавлении справа ГЭПА
   4.119 -        """
   4.120 -
   4.121 -
   4.122 -        global gaps
   4.123 -
   4.124 -
   4.125 -        if (int1 >= len(gaps)):
   4.126 -            return gaps[(len(gaps)-1)]
   4.127 -        else:
   4.128 -            return gaps[int1]
   4.129 -
   4.130 -
   4.131 -
   4.132 -
   4.133 -
   4.134 -
   4.135 -
   4.136 -
   4.137 -
   4.138 -
   4.139 -
   4.140 -
   4.141 -
   4.142 -
   4.143 -
   4.144 -
   4.145 -    def unite(self,chainN):
   4.146 -
   4.147 -        """
   4.148 -        Создание списка выравнивания
   4.149 -
   4.150 -        chainN - номер цепи
   4.151 -        """
   4.152 -
   4.153 -
   4.154 -        str1=self.old_sequences[chainN]
   4.155 -        len1=len(str1)
   4.156 -
   4.157 -
   4.158 -
   4.159 -
   4.160 -        if (not self.common):
   4.161 -            i=0
   4.162 -            while (i<len1):
   4.163 -                aminoacids=[str1[i]]
   4.164 -                chains=[chainN]
   4.165 -
   4.166 -                self.common.append([aminoacids,chains])
   4.167 -                i+=1
   4.168 -
   4.169 -            return
   4.170 -
   4.171 -
   4.172 -
   4.173 -
   4.174 -
   4.175 -
   4.176 -        len2=len(self.common)
   4.177 -
   4.178 -
   4.179 -        d=[]
   4.180 -        tip_from=[]
   4.181 -        already_gaps=[]
   4.182 -
   4.183 -
   4.184 -        for i in xrange(0,len1+1):
   4.185 -            d.append([])
   4.186 -            already_gaps.append([])
   4.187 -            tip_from.append([])
   4.188 -            for j in xrange(0,len2+1):
   4.189 -                d[i].append(0)
   4.190 -                already_gaps[i].append([0,0])
   4.191 -                tip_from[i].append(0)
   4.192 -
   4.193 -
   4.194 -##
   4.195 -##        for i in xrange(1,len1+1):
   4.196 -##            d[i][0] =  0
   4.197 -##            already_gaps[i][0] = [0,1+i]
   4.198 -##
   4.199 -##
   4.200 -##        for j in xrange(1,len2+1):
   4.201 -##            d[0][j] =  0
   4.202 -##            already_gaps[0][j] = [1+j,0]
   4.203 -##
   4.204 -
   4.205 -
   4.206 -
   4.207 -
   4.208 -
   4.209 -
   4.210 -        for i in xrange(1,len1+1):
   4.211 -            for j in xrange(1,len2+1):
   4.212 -
   4.213 -                costs=[]
   4.214 -                for A in self.common[j-1][0]:
   4.215 -                    costs.append(self.cost(str1[i-1],A))
   4.216 -                cost = max(costs)
   4.217 -
   4.218 -
   4.219 -                insertion = d[i-1][j]
   4.220 -                if (j != len2): # неконцевой гэп
   4.221 -                    insertion += self.gap_cost(already_gaps[i-1][j][1])
   4.222 -
   4.223 -
   4.224 -                deletion  = d[i][j-1]
   4.225 -                if (i != len1): # неконцевой гэп
   4.226 -                    deletion += self.gap_cost(already_gaps[i][j-1][0])
   4.227 -
   4.228 -                substitution=d[i-1][j-1] + cost
   4.229 -
   4.230 -                max_way= max(insertion,deletion,substitution)
   4.231 -
   4.232 -                d[i][j] = max_way
   4.233 -
   4.234 -
   4.235 -
   4.236 -
   4.237 -
   4.238 -                if (max_way==substitution):
   4.239 -                    tip=3
   4.240 -                if (max_way==deletion):
   4.241 -                    tip=2
   4.242 -                if (max_way==insertion):
   4.243 -                    tip=1
   4.244 -
   4.245 -
   4.246 -
   4.247 -                if (tip==1):                             # insertion
   4.248 -                    already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ]
   4.249 -
   4.250 -
   4.251 -                if (tip==2):                             # deletion
   4.252 -                    already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ]
   4.253 -
   4.254 -                if (tip==3):                             # substitution
   4.255 -                    already_gaps[i][j]=[ 0, 0 ]
   4.256 -
   4.257 -                tip_from[i][j]=tip
   4.258 -
   4.259 -
   4.260 -##
   4.261 -##        for d1 in d:
   4.262 -##            for d11 in d1:
   4.263 -##                print d11,
   4.264 -##            print
   4.265 -##
   4.266 -##
   4.267 -##
   4.268 -##
   4.269 -##        for d1 in tip_from:
   4.270 -##            for d11 in d1:
   4.271 -##                print d11,
   4.272 -##            print
   4.273 -##
   4.274 -##
   4.275 -##
   4.276 -##        print insertion
   4.277 -##        print deletion
   4.278 -##        print substitution
   4.279 -##
   4.280 -##
   4.281 -##
   4.282 -##        print already_gaps
   4.283 -##
   4.284 -
   4.285 -
   4.286 -
   4.287 -        i=len1
   4.288 -        j=len2
   4.289 -
   4.290 -        common=[]
   4.291 -
   4.292 -        while (i>0 or j>0):
   4.293 -            tip=tip_from[i][j]
   4.294 -
   4.295 -            if tip==1 or (j==0 and i>0):
   4.296 -
   4.297 -                aminoacids=[(str1[i-1])]
   4.298 -                chains=[chainN]
   4.299 -
   4.300 -                common.append([aminoacids,chains])
   4.301 -
   4.302 -                i-=1
   4.303 -
   4.304 -
   4.305 -
   4.306 -            if tip==2 or (i==0 and j>0):
   4.307 -
   4.308 -                common.append(self.common[j-1])
   4.309 -                j-=1
   4.310 -
   4.311 -
   4.312 -            if (tip==3):
   4.313 -
   4.314 -                chains=self.common[j-1][1]
   4.315 -                chains.append(chainN)
   4.316 -
   4.317 -                aminoacids=self.common[j-1][0]
   4.318 -
   4.319 -                if (not aminoacids.count(str1[i-1])):
   4.320 -                    aminoacids.append(str1[i-1])
   4.321 -
   4.322 -                common.append([aminoacids,chains])
   4.323 -
   4.324 -                i-=1
   4.325 -                j-=1
   4.326 -
   4.327 -
   4.328 -
   4.329 -        common.reverse()
   4.330 -
   4.331 -        self.common=common
   4.332 -
   4.333 -
   4.334 -
   4.335 -
   4.336 -
   4.337 -
   4.338 -
   4.339 -
   4.340 -
   4.341 -
   4.342 -
   4.343 -
   4.344 -
   4.345 -
   4.346 -
   4.347 -
   4.348 -
   4.349 -
   4.350 -
   4.351 -
   4.352 -
   4.353 -
   4.354 -
   4.355 -    def lining(self,chainN):
   4.356 -
   4.357 -
   4.358 -        """
   4.359 -        метод создает новую выровненную последовательность
   4.360 -        в self.new_sequences
   4.361 -
   4.362 -        chainN - номер цепи
   4.363 -        """
   4.364 -
   4.365 -        str1=self.old_sequences[chainN]
   4.366 -        len1=len(str1)
   4.367 -
   4.368 -        len2=len(self.common)
   4.369 -
   4.370 -
   4.371 -        new_seq=''
   4.372 -        position_in_old=0
   4.373 -
   4.374 -        for common_1 in self.common:
   4.375 -            if (common_1[1].count(chainN)):
   4.376 -                new_seq = new_seq + str1[position_in_old]
   4.377 -                position_in_old += 1
   4.378 -            else:
   4.379 -                new_seq = new_seq + '-'
   4.380 -
   4.381 -        self.new_sequences.append(new_seq)
   4.382 -
   4.383 -
   4.384 -
   4.385 -
   4.386 -
   4.387 -
   4.388 -
   4.389 -##
   4.390 -##    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   4.391 -##    метод находит позиции в гтовом выраснивании, которые можно сдвинуть
   4.392 -##    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   4.393 -##    def correct(self):
   4.394 -##
   4.395 -##        new_sequences=[]
   4.396 -##
   4.397 -##        all_l = len(self.new_sequences[0]) # наименьшая из длин последовательностей
   4.398 -##        for str1 in self.new_sequences:
   4.399 -##            all_l = min (all_l,len(str1))
   4.400 -##
   4.401 -##
   4.402 -##
   4.403 -##        i=0
   4.404 -##        while (i < all_l):
   4.405 -##
   4.406 -##            if (i==0):
   4.407 -##                for str1 in self.new_sequences:
   4.408 -##                    new_sequences.append(str1[0])
   4.409 -##                i+=1
   4.410 -##                continue
   4.411 -##
   4.412 -##
   4.413 -##            all_gaps=1
   4.414 -##            for str1 in self.new_sequences:
   4.415 -##                if (str1[i]!='-'):
   4.416 -##                    all_gaps=0
   4.417 -##
   4.418 -##            if (all_gaps==1):
   4.419 -##                i+=1
   4.420 -##                continue
   4.421 -##
   4.422 -##
   4.423 -##            sovmest=1
   4.424 -##            j=-1
   4.425 -##            for str1 in self.new_sequences:
   4.426 -##                j+=1
   4.427 -##                if (str1[i]!='-' and new_sequences[j][-1]!='-'):
   4.428 -##                    sovmest=0
   4.429 -##                    break
   4.430 -##
   4.431 -##
   4.432 -##
   4.433 -##
   4.434 -##            j=-1
   4.435 -##            for str1 in self.new_sequences:
   4.436 -##                j+=1
   4.437 -##
   4.438 -##                if (sovmest==1):
   4.439 -##                    if (str1[i]!='-'):
   4.440 -##                        new_sequences[j] = new_sequences[j][:-1] + str1[i]
   4.441 -##                else:
   4.442 -##                    new_sequences[j] = new_sequences[j] + str1[i]
   4.443 -##
   4.444 -##
   4.445 -##
   4.446 -##            i+=1
   4.447 -##            continue
   4.448 -##
   4.449 -##        self.new_sequences = new_sequences
   4.450 -
   4.451 -
   4.452 -
   4.453 -
   4.454 -
   4.455 -
   4.456 -
   4.457 -
   4.458 -
   4.459 -
   4.460 -
   4.461 -
   4.462 -
   4.463 -
   4.464 -
   4.465 -##
   4.466 -##    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   4.467 -##    функция возвращает словарь: [новый номер атома] => старый номер атома
   4.468 -##    str1 - выравненная последовательность
   4.469 -##    нумерацию начинаем с 0!
   4.470 -##    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
   4.471 -##    def recount(self,str1):
   4.472 -##        spisok={}
   4.473 -##
   4.474 -##        len1=len(str1)
   4.475 -##        old_nomer = 0
   4.476 -##
   4.477 -##        for i in xrange(0,len1):
   4.478 -##            s=str1[i]
   4.479 -##            if (s != '-'):
   4.480 -##                spisok[i]=old_nomer
   4.481 -##                old_nomer+=1
   4.482 -##
   4.483 -##        return spisok
   4.484 -
   4.485 -
   4.486 -
   4.487 -
   4.488 -
   4.489 -
   4.490 -
   4.491 -
   4.492 -
   4.493 -

     5.1 --- a/blocks3d/AlignmentSeq_config.py	Tue Feb 22 17:16:56 2011 +0300
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,28 +0,0 @@
     5.4 -matrix={
     5.5 -"A": {"A":  4, "R": -1, "N": -2, "D": -2, "C":  0, "Q": -1, "E": -1, "G":  0, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S":  1, "T":  0, "W": -3, "Y": -2, "V":  0, "B": -2, "Z": -1, "X":  0, "*": -4},
     5.6 -"R": {"A": -1, "R":  5, "N":  0, "D": -2, "C": -3, "Q":  1, "E":  0, "G": -2, "H":  0, "I": -3, "L": -2, "K":  2, "M": -1, "F": -3, "P": -2, "S": -1, "T": -1, "W": -3, "Y": -2, "V": -3, "B": -1, "Z":  0, "X": -1, "*": -4},
     5.7 -"N": {"A": -2, "R":  0, "N":  6, "D":  1, "C": -3, "Q":  0, "E":  0, "G":  0, "H":  1, "I": -3, "L": -3, "K":  0, "M": -2, "F": -3, "P": -2, "S":  1, "T":  0, "W": -4, "Y": -2, "V": -3, "B":  3, "Z":  0, "X": -1, "*": -4},
     5.8 -"D": {"A": -2, "R": -2, "N":  1, "D":  6, "C": -3, "Q":  0, "E":  2, "G": -1, "H": -1, "I": -3, "L": -4, "K": -1, "M": -3, "F": -3, "P": -1, "S":  0, "T": -1, "W": -4, "Y": -3, "V": -3, "B":  4, "Z":  1, "X": -1, "*": -4},
     5.9 -"C": {"A":  0, "R": -3, "N": -3, "D": -3, "C":  9, "Q": -3, "E": -4, "G": -3, "H": -3, "I": -1, "L": -1, "K": -3, "M": -1, "F": -2, "P": -3, "S": -1, "T": -1, "W": -2, "Y": -2, "V": -1, "B": -3, "Z": -3, "X": -2, "*": -4},
    5.10 -"Q": {"A": -1, "R":  1, "N":  0, "D":  0, "C": -3, "Q":  5, "E":  2, "G": -2, "H":  0, "I": -3, "L": -2, "K":  1, "M":  0, "F": -3, "P": -1, "S":  0, "T": -1, "W": -2, "Y": -1, "V": -2, "B":  0, "Z":  3, "X": -1, "*": -4},
    5.11 -"E": {"A": -1, "R":  0, "N":  0, "D":  2, "C": -4, "Q":  2, "E":  5, "G": -2, "H":  0, "I": -3, "L": -3, "K":  1, "M": -2, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  1, "Z":  4, "X": -1, "*": -4},
    5.12 -"G": {"A":  0, "R": -2, "N":  0, "D": -1, "C": -3, "Q": -2, "E": -2, "G":  6, "H": -2, "I": -4, "L": -4, "K": -2, "M": -3, "F": -3, "P": -2, "S":  0, "T": -2, "W": -2, "Y": -3, "V": -3, "B": -1, "Z": -2, "X": -1, "*": -4},
    5.13 -"H": {"A": -2, "R":  0, "N":  1, "D": -1, "C": -3, "Q":  0, "E":  0, "G": -2, "H":  8, "I": -3, "L": -3, "K": -1, "M": -2, "F": -1, "P": -2, "S": -1, "T": -2, "W": -2, "Y":  2, "V": -3, "B":  0, "Z":  0, "X": -1, "*": -4},
    5.14 -"I": {"A": -1, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -3, "E": -3, "G": -4, "H": -3, "I":  4, "L":  2, "K": -3, "M":  1, "F":  0, "P": -3, "S": -2, "T": -1, "W": -3, "Y": -1, "V":  3, "B": -3, "Z": -3, "X": -1, "*": -4},
    5.15 -"L": {"A": -1, "R": -2, "N": -3, "D": -4, "C": -1, "Q": -2, "E": -3, "G": -4, "H": -3, "I":  2, "L":  4, "K": -2, "M":  2, "F":  0, "P": -3, "S": -2, "T": -1, "W": -2, "Y": -1, "V":  1, "B": -4, "Z": -3, "X": -1, "*": -4},
    5.16 -"K": {"A": -1, "R":  2, "N":  0, "D": -1, "C": -3, "Q":  1, "E":  1, "G": -2, "H": -1, "I": -3, "L": -2, "K":  5, "M": -1, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  1, "X": -1, "*": -4},
    5.17 -"M": {"A": -1, "R": -1, "N": -2, "D": -3, "C": -1, "Q":  0, "E": -2, "G": -3, "H": -2, "I":  1, "L":  2, "K": -1, "M":  5, "F":  0, "P": -2, "S": -1, "T": -1, "W": -1, "Y": -1, "V":  1, "B": -3, "Z": -1, "X": -1, "*": -4},
    5.18 -"F": {"A": -2, "R": -3, "N": -3, "D": -3, "C": -2, "Q": -3, "E": -3, "G": -3, "H": -1, "I":  0, "L":  0, "K": -3, "M":  0, "F":  6, "P": -4, "S": -2, "T": -2, "W":  1, "Y":  3, "V": -1, "B": -3, "Z": -3, "X": -1, "*": -4},
    5.19 -"P": {"A": -1, "R": -2, "N": -2, "D": -1, "C": -3, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -3, "L": -3, "K": -1, "M": -2, "F": -4, "P":  7, "S": -1, "T": -1, "W": -4, "Y": -3, "V": -2, "B": -2, "Z": -1, "X": -2, "*": -4},
    5.20 -"S": {"A":  1, "R": -1, "N":  1, "D":  0, "C": -1, "Q":  0, "E":  0, "G":  0, "H": -1, "I": -2, "L": -2, "K":  0, "M": -1, "F": -2, "P": -1, "S":  4, "T":  1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  0, "X":  0, "*": -4},
    5.21 -"T": {"A":  0, "R": -1, "N":  0, "D": -1, "C": -1, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S":  1, "T":  5, "W": -2, "Y": -2, "V":  0, "B": -1, "Z": -1, "X":  0, "*": -4},
    5.22 -"W": {"A": -3, "R": -3, "N": -4, "D": -4, "C": -2, "Q": -2, "E": -3, "G": -2, "H": -2, "I": -3, "L": -2, "K": -3, "M": -1, "F":  1, "P": -4, "S": -3, "T": -2, "W": 11, "Y":  2, "V": -3, "B": -4, "Z": -3, "X": -2, "*": -4},
    5.23 -"Y": {"A": -2, "R": -2, "N": -2, "D": -3, "C": -2, "Q": -1, "E": -2, "G": -3, "H":  2, "I": -1, "L": -1, "K": -2, "M": -1, "F":  3, "P": -3, "S": -2, "T": -2, "W":  2, "Y":  7, "V": -1, "B": -3, "Z": -2, "X": -1, "*": -4},
    5.24 -"V": {"A":  0, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -2, "E": -2, "G": -3, "H": -3, "I":  3, "L":  1, "K": -2, "M":  1, "F": -1, "P": -2, "S": -2, "T":  0, "W": -3, "Y": -1, "V":  4, "B": -3, "Z": -2, "X": -1, "*": -4},
    5.25 -"B": {"A": -2, "R": -1, "N":  3, "D":  4, "C": -3, "Q":  0, "E":  1, "G": -1, "H":  0, "I": -3, "L": -4, "K":  0, "M": -3, "F": -3, "P": -2, "S":  0, "T": -1, "W": -4, "Y": -3, "V": -3, "B":  4, "Z":  0, "X": -1, "*": -4},
    5.26 -"Z": {"A": -1, "R":  0, "N":  0, "D":  1, "C": -3, "Q":  3, "E":  4, "G": -2, "H":  0, "I": -3, "L": -3, "K":  1, "M": -1, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  4, "X": -1, "*": -4},
    5.27 -"X": {"A":  0, "R": -1, "N": -1, "D": -1, "C": -2, "Q": -1, "E": -1, "G": -1, "H": -1, "I": -1, "L": -1, "K": -1, "M": -1, "F": -1, "P": -2, "S":  0, "T":  0, "W": -2, "Y": -1, "V": -1, "B": -1, "Z": -1, "X": -1, "*": -4},
    5.28 -"*": {"A": -4, "R": -4, "N": -4, "D": -4, "C": -4, "Q": -4, "E": -4, "G": -4, "H": -4, "I": -4, "L": -4, "K": -4, "M": -4, "F": -4, "P": -4, "S": -4, "T": -4, "W": -4, "Y": -4, "V": -4, "B": -4, "Z": -4, "X": -4, "*":  1}
    5.29 -}
    5.30 -
    5.31 -gaps = (-8, -4, -2, -1)
    5.32 \ No newline at end of file

     6.1 --- a/blocks3d/Blocks3D.py	Tue Feb 22 17:16:56 2011 +0300
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,512 +0,0 @@
     6.4 -#!/usr/bin/python
     6.5 -# -*- coding: cp1251 -*-
     6.6 -
     6.7 -
     6.8 -##########################
     6.9 -# Reliable blocks finding
    6.10 -##########################
    6.11 -
    6.12 -
    6.13 -import json
    6.14 -
    6.15 -
    6.16 -import Blocks3D_config
    6.17 -import GeometricalCore_config
    6.18 -from Blocks3D_class import Blocks3D
    6.19 -B3D = Blocks3D()
    6.20 -
    6.21 -
    6.22 -
    6.23 -import sys
    6.24 -
    6.25 -
    6.26 -
    6.27 -# determine html.htm path
    6.28 -if hasattr(Blocks3D_config, 'html_file'):
    6.29 -    html_file = Blocks3D_config.html_file
    6.30 -
    6.31 -else:
    6.32 -
    6.33 -    if sys.argv[0].replace('/', '') != sys.argv[0]:
    6.34 -        delimiter = '/'
    6.35 -    else:
    6.36 -        delimiter = "\\"
    6.37 -
    6.38 -    path_parts = sys.argv[0].split(delimiter)
    6.39 -    path_parts[-1] = 'html.htm'
    6.40 -    html_file = delimiter.join(path_parts)
    6.41 -
    6.42 -
    6.43 -
    6.44 -
    6.45 -
    6.46 -
    6.47 -
    6.48 -
    6.49 -
    6.50 -import optparse
    6.51 -import os
    6.52 -
    6.53 -
    6.54 -from clon import clon # copy list
    6.55 -
    6.56 -
    6.57 -
    6.58 -
    6.59 -# get options
    6.60 -
    6.61 -parser = optparse.OptionParser()
    6.62 -
    6.63 -
    6.64 -parser.set_defaults(**vars(GeometricalCore_config))
    6.65 -parser.set_defaults(**vars(Blocks3D_config))
    6.66 -
    6.67 -
    6.68 -parser.add_option('-i', help='Input alignment file', dest='alignment_file')
    6.69 -parser.add_option('-c', help='PDB names conformity file', dest='conformity_file')
    6.70 -
    6.71 -
    6.72 -##parser.add_option('--out', help='Output file name (*)', dest='output_file')
    6.73 -##parser.add_option('-f', help='Output file format (msf, fasta, list)', dest='output_format')
    6.74 -
    6.75 -#parser.add_option('-l', help='Output list file', dest='output_list_file')
    6.76 -#parser.add_option('-f', help='Output fasta file', dest='output_fasta_file')
    6.77 -#parser.add_option('-g', help='Output msf file', dest='output_msf_file')
    6.78 -#parser.add_option('-p', help='Output pdb file (+spt script)', dest='output_pdb_file')
    6.79 -
    6.80 -parser.add_option('-H', help='Output html file', dest='output_html_file')
    6.81 -
    6.82 -
    6.83 -
    6.84 -parser.add_option('-d', help='Distance spreading [' + str(GeometricalCore_config.delta) + ']', dest='delta')
    6.85 -
    6.86 -parser.add_option('-m', help='Min block width [' + str(Blocks3D_config.min_width) + ']', dest='min_width')
    6.87 -
    6.88 -
    6.89 -parser.add_option('-t', help='Bron-Kerbosh (couple cores) timeout [' + str(GeometricalCore_config.timeout) + '] (-1 - unlimited)', dest='timeout')
    6.90 -
    6.91 -
    6.92 -parser.add_option('-T', help='Bron-Kerbosh (blocks) timeout [' + str(Blocks3D_config.timeout_2) + '] (-1 - unlimited)', dest='timeout_2')
    6.93 -
    6.94 -
    6.95 -#parser.add_option('-n', help='Alternative core new atoms (%) [' + str(int(GeometricalCore_config.alternative_core_new_atoms * 100)) + ']', dest='new_atoms')
    6.96 -
    6.97 -#parser.add_option('-a', help='Cores count [' + str(GeometricalCore_config.alternative_cores_count) + '] (0 - all)', dest='cores_count')
    6.98 -
    6.99 -
   6.100 -#parser.add_option('-x', help='Superposition core identifier (0 - main core)', dest='Super_core')
   6.101 -
   6.102 -
   6.103 -
   6.104 -parser.add_option('-u', help='PDB url [' + GeometricalCore_config.pdb_url + ']', dest='pdb_url')
   6.105 -parser.add_option('-s', action='store_true', help='Do not save uploaded PDB')
   6.106 -
   6.107 -
   6.108 -
   6.109 -options, args = parser.parse_args()
   6.110 -vars().update(vars(options))
   6.111 -
   6.112 -
   6.113 -
   6.114 -
   6.115 -
   6.116 -
   6.117 -
   6.118 -
   6.119 -
   6.120 -all_right = 1
   6.121 -
   6.122 -
   6.123 -
   6.124 -# check all options
   6.125 -
   6.126 -
   6.127 -try:
   6.128 -
   6.129 -    delta = float(delta)
   6.130 -
   6.131 -    min_width = int(min_width)
   6.132 -
   6.133 -except:
   6.134 -    all_right = 0
   6.135 -    print "Error: bad option"
   6.136 -
   6.137 -
   6.138 -
   6.139 -if s:
   6.140 -    save = 0
   6.141 -else:
   6.142 -    save = 1
   6.143 -
   6.144 -
   6.145 -
   6.146 -if timeout:
   6.147 -    try:
   6.148 -        timeout = int(timeout)
   6.149 -    except:
   6.150 -        all_right = 0
   6.151 -        print "Error: bad timeout"
   6.152 -
   6.153 -
   6.154 -
   6.155 -if timeout_2:
   6.156 -    try:
   6.157 -        timeout_2 = int(timeout_2)
   6.158 -    except:
   6.159 -        all_right = 0
   6.160 -        print "Error: bad timeout"
   6.161 -
   6.162 -
   6.163 -
   6.164 -#if not Super_core:
   6.165 -#    Super_core = 0
   6.166 -
   6.167 -#Super_core = int(Super_core)
   6.168 -
   6.169 -
   6.170 -
   6.171 -
   6.172 -
   6.173 -
   6.174 -
   6.175 -
   6.176 -try:
   6.177 -    if not os.path.exists(alignment_file):
   6.178 -        all_right = 0
   6.179 -        print "Error: can not find input file"
   6.180 -except:
   6.181 -    all_right = 0
   6.182 -    print "Error: can not find input file"
   6.183 -
   6.184 -
   6.185 -if not (output_html_file):
   6.186 -    all_right = 0
   6.187 -    print "Error: no output file"
   6.188 -
   6.189 -
   6.190 -
   6.191 -
   6.192 -
   6.193 -
   6.194 -
   6.195 -
   6.196 -if conformity_file:
   6.197 -    try:
   6.198 -        if not os.path.exists(conformity_file):
   6.199 -            all_right = 0
   6.200 -            print "Error: can not find conformity file"
   6.201 -    except:
   6.202 -        all_right = all_right
   6.203 -
   6.204 -
   6.205 -
   6.206 -
   6.207 -
   6.208 -
   6.209 -
   6.210 -
   6.211 -
   6.212 -
   6.213 -if not all_right:
   6.214 -
   6.215 -    print ' '
   6.216 -
   6.217 -    print 'Programm for find blocks of true alignment'
   6.218 -
   6.219 -    print ' '
   6.220 -
   6.221 -    parser.print_help()
   6.222 -
   6.223 -#    print ' '
   6.224 -#    print '* - required options'
   6.225 -
   6.226 -#    print ' '
   6.227 -#    print 'Protein identifiers should not contain ":" symbol'
   6.228 -
   6.229 -    exit()
   6.230 -
   6.231 -
   6.232 -
   6.233 -
   6.234 -# determinate temp fasta file
   6.235 -
   6.236 -#n = 1
   6.237 -
   6.238 -#while 1:
   6.239 -#    GC_temp = 'GC_temp' + str(n) + '.tmp'
   6.240 -#    if not os.path.exists(GC_temp):
   6.241 -#        break
   6.242 -#    n += 1
   6.243 -
   6.244 -GC_temp = 'GC_temp'
   6.245 -
   6.246 -
   6.247 -
   6.248 -
   6.249 -
   6.250 -
   6.251 -
   6.252 -
   6.253 -
   6.254 -
   6.255 -##is_fasta = 0
   6.256 -##fasta_check = open(alignment_file).readlines()
   6.257 -##
   6.258 -##for fasta_line in fasta_check:
   6.259 -##    if len(fasta_line) == 0:
   6.260 -##        continue
   6.261 -##    if fasta_line[0] == '>':
   6.262 -##        is_fasta = 1
   6.263 -##
   6.264 -##
   6.265 -##if is_fasta:
   6.266 -##
   6.267 -##    open(GC_temp, 'w').write(''.join(fasta_check))
   6.268 -##
   6.269 -##
   6.270 -##else:
   6.271 -
   6.272 -
   6.273 -
   6.274 -
   6.275 -
   6.276 -# try to determ is it fasta
   6.277 -
   6.278 -text = ''.join(open(alignment_file).readlines())
   6.279 -
   6.280 -#if text.count("\n>") >= 2:
   6.281 -if text.count("\n>") >= 1:
   6.282 -    # fasta
   6.283 -
   6.284 -    # write to temp file input fasta alignment
   6.285 -    open(GC_temp, 'w').write(text)
   6.286 -
   6.287 -else:
   6.288 -
   6.289 -    # not fasta
   6.290 -
   6.291 -    # convert input file to fasta
   6.292 -
   6.293 -    try:
   6.294 -        os.system('seqret ' + alignment_file + ' ' + GC_temp)
   6.295 -    except:
   6.296 -        print 'Error: can not convert input alignment file to fasta using emboss command seqret'
   6.297 -        exit()
   6.298 -
   6.299 -
   6.300 -
   6.301 -
   6.302 -
   6.303 -
   6.304 -if not os.path.exists(GC_temp):
   6.305 -    print 'Error: can not convert input alignment file to fasta'
   6.306 -    exit()
   6.307 -
   6.308 -
   6.309 -
   6.310 -
   6.311 -
   6.312 -
   6.313 -
   6.314 -
   6.315 -
   6.316 -
   6.317 -# read fasta
   6.318 -
   6.319 -fasta = open(GC_temp).readlines()
   6.320 -
   6.321 -
   6.322 -fasta_dict = {} # dict. index - fasta identifier. value - sequence without line breaks
   6.323 -
   6.324 -#fasta_order = [] # list of IDs, in order as in input file
   6.325 -
   6.326 -current_id = '' # current fasta identifier
   6.327 -
   6.328 -for fasta_string in fasta:
   6.329 -
   6.330 -    fasta_string = fasta_string.strip()
   6.331 -
   6.332 -    if not len(fasta_string):
   6.333 -        continue
   6.334 -
   6.335 -    if fasta_string[0] == '>':
   6.336 -        current_id = fasta_string[1:].split()[0]
   6.337 -        #fasta_order.append(current_id)
   6.338 -        fasta_dict[current_id] = ''
   6.339 -    else:
   6.340 -        if not current_id:
   6.341 -            print 'Bad Fasta file: line "' + fasta_string + '"'
   6.342 -            exit()
   6.343 -        fasta_dict[current_id] += fasta_string.replace(' ', '')
   6.344 -
   6.345 -
   6.346 -
   6.347 -
   6.348 -
   6.349 -# read conformity file
   6.350 -
   6.351 -
   6.352 -conformity_dict = {} # Key - fasta_id. Value - list [pdb_code, pdb_chain]
   6.353 -
   6.354 -
   6.355 -if conformity_file:
   6.356 -    conformity = open(conformity_file).readlines()
   6.357 -
   6.358 -    for conformity_string in conformity:
   6.359 -
   6.360 -        if len(conformity_string) < 8:
   6.361 -            continue
   6.362 -
   6.363 -        conformity_list = conformity_string.split()
   6.364 -
   6.365 -        if len(conformity_list) != 2:
   6.366 -            print 'Error: can not read line "' + conformity_string + '" in conformity file'
   6.367 -            exit()
   6.368 -
   6.369 -        fasta_id = conformity_list[0]
   6.370 -
   6.371 -##        if not fasta_dict.has_key(fasta_id):
   6.372 -##            print 'Error: unknown fasta identifier "' + fasta_id + '" in conformity file'
   6.373 -##            exit()
   6.374 -
   6.375 -
   6.376 -        conformity_pdb = conformity_list[1]
   6.377 -
   6.378 -#        if len(conformity_pdb) != 2:
   6.379 -#            print 'Error: can not read PDB code and chain "' + conformity_list[1] + '" in conformity file'
   6.380 -#            exit()
   6.381 -
   6.382 -#        if len(conformity_pdb[0]) != 4 or len(conformity_pdb[1]) != 1:
   6.383 -#            print 'Error: can not read PDB code and chain "' + conformity_list[1] + '" in conformity file'
   6.384 -#            exit()
   6.385 -
   6.386 -
   6.387 -        conformity_dict[fasta_id] = conformity_pdb
   6.388 -
   6.389 -
   6.390 -
   6.391 -
   6.392 -# get PDB names and chains
   6.393 -
   6.394 -block = [] # block to give it to GeometricalCore_class
   6.395 -
   6.396 -# ID_user[ID] = ID, entered by user
   6.397 -ID_user = {}
   6.398 -
   6.399 -
   6.400 -for fasta_id, sequence in fasta_dict.items():
   6.401 -
   6.402 -    if conformity_dict.has_key(fasta_id):
   6.403 -        fasta_id = conformity_dict[fasta_id]
   6.404 -
   6.405 -
   6.406 -    pdb = B3D.from_ID(fasta_id)
   6.407 -
   6.408 -
   6.409 -
   6.410 -    if not pdb:
   6.411 -        print 'Error: sequence identifier "' + fasta_id + '" in not valid'
   6.412 -#        exit()
   6.413 -        continue
   6.414 -
   6.415 -    pdb_code, pdb_chain, model = pdb
   6.416 -
   6.417 -    block.append([pdb_code, pdb_chain, sequence, model])
   6.418 -
   6.419 -    # save ID, entered by user
   6.420 -    ID = B3D.to_ID(pdb_code, pdb_chain, model)
   6.421 -    ID_user[ID] = fasta_id
   6.422 -
   6.423 -
   6.424 -if len(block) < 2:
   6.425 -    print 'Error: there are not enough sequences (less than 2)'
   6.426 -    exit()
   6.427 -
   6.428 -
   6.429 -
   6.430 -
   6.431 -
   6.432 -
   6.433 -
   6.434 -##############################################
   6.435 -# run GeometricalCore_class to make core
   6.436 -##############################################
   6.437 -
   6.438 -
   6.439 -
   6.440 -B3D.min_width = min_width
   6.441 -B3D.timeout_2 = timeout_2
   6.442 -
   6.443 -B3D.delta = delta
   6.444 -B3D.save = save
   6.445 -B3D.min_size = min_width
   6.446 -B3D.alternative_core_new_atoms = alternative_core_new_atoms
   6.447 -B3D.alternative_cores_count = alternative_cores_count
   6.448 -B3D.timeout = timeout
   6.449 -B3D.pdb_url = pdb_url
   6.450 -
   6.451 -
   6.452 -# blocks is object with result
   6.453 -blocks = B3D.find_blocks(block)
   6.454 -
   6.455 -# remake IDs as user enter
   6.456 -blocks_1 = []
   6.457 -
   6.458 -
   6.459 -for block in blocks:
   6.460 -
   6.461 -    IDs_1 = []
   6.462 -
   6.463 -    for ID in block['IDs']:
   6.464 -
   6.465 -        IDs_1.append(ID_user[ID])
   6.466 -
   6.467 -    block['IDs'] = IDs_1
   6.468 -
   6.469 -    blocks_1.append(block)
   6.470 -
   6.471 -
   6.472 -
   6.473 -
   6.474 -
   6.475 -if output_html_file:
   6.476 -    # produce HTML
   6.477 -
   6.478 -    t = ''
   6.479 -
   6.480 -
   6.481 -    t += "blocks = json('" + json.dumps(blocks_1) + "');"
   6.482 -    t += "fasta_dict = json('" + json.dumps(fasta_dict) + "');"
   6.483 -
   6.484 -#    t += "try{blocks_init();}catch(e){}"
   6.485 -
   6.486 -
   6.487 -
   6.488 -    html = ''.join(open(html_file).readlines())
   6.489 -
   6.490 -    # insert out code instead of "self_js_text"
   6.491 -    html = html.replace('self_js_text', t)
   6.492 -
   6.493 -
   6.494 -    open(output_html_file, 'w').write(html)
   6.495 -
   6.496 -
   6.497 -
   6.498 -
   6.499 -
   6.500 -
   6.501 -
   6.502 -
   6.503 -
   6.504 -
   6.505 -
   6.506 -
   6.507 -os.remove(GC_temp)
   6.508 -
   6.509 -print 'Task is complete'
   6.510 -
   6.511 -
   6.512 -
   6.513 -
   6.514 -
   6.515 -

     7.1 --- a/blocks3d/Blocks3D_class.py	Tue Feb 22 17:16:56 2011 +0300
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,751 +0,0 @@
     7.4 -# -*- coding: cp1251 -*-
     7.5 -
     7.6 -################################
     7.7 -# Find blocks or true alignment
     7.8 -################################
     7.9 -
    7.10 -
    7.11 -"""
    7.12 -
    7.13 -"""
    7.14 -
    7.15 -
    7.16 -
    7.17 -#import json #temp
    7.18 -
    7.19 -
    7.20 -
    7.21 -import GeometricalCore_class # for find cores
    7.22 -
    7.23 -import Blocks3D_config # default class options
    7.24 -
    7.25 -GC = GeometricalCore_class.GeometricalCore
    7.26 -
    7.27 -from Kliki import Kliki
    7.28 -
    7.29 -
    7.30 -from clon import clon # copy list
    7.31 -
    7.32 -
    7.33 -class Blocks3D(GC):
    7.34 -
    7.35 -    """
    7.36 -    Self data:
    7.37 -
    7.38 -
    7.39 -
    7.40 -    User parameters:
    7.41 -
    7.42 -
    7.43 -    """
    7.44 -
    7.45 -
    7.46 -
    7.47 -
    7.48 -
    7.49 -    def __init__(self):
    7.50 -
    7.51 -        """
    7.52 -        Create new copy of class
    7.53 -        """
    7.54 -
    7.55 -        GC.__init__(self)
    7.56 -
    7.57 -        vars(self).update(vars(Blocks3D_config))
    7.58 -
    7.59 -
    7.60 -
    7.61 -
    7.62 -
    7.63 -
    7.64 -    def find_blocks(self, alignment):
    7.65 -        """
    7.66 -        Find all blocks
    7.67 -
    7.68 -        Arguments:
    7.69 -            alignment - list.
    7.70 -                It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None]
    7.71 -                    SSNAKIDQLSSDAQ - part of alignment block sequence
    7.72 -                    Last parameter - model or None
    7.73 -
    7.74 -
    7.75 -        Result:
    7.76 -            returns list of blocks
    7.77 -
    7.78 -            block is dict:
    7.79 -                start: integer
    7.80 -                end: integer
    7.81 -                IDs: list of IDs
    7.82 -        """
    7.83 -
    7.84 -#        # list of IDs
    7.85 -        IDs = []
    7.86 -
    7.87 -        for pdb_code, pdb_chain, seq_part, model in alignment:
    7.88 -
    7.89 -            ID = self.to_ID(pdb_code, pdb_chain, model)
    7.90 -            IDs.append(ID)
    7.91 -
    7.92 -        self.IDs = IDs
    7.93 -
    7.94 -
    7.95 -        # build couple cores:
    7.96 -
    7.97 -        # CORES = <list of cores>
    7.98 -        # core is list of positions
    7.99 -#        CORES = []
   7.100 -
   7.101 -
   7.102 -
   7.103 -        # couple_core_parts[ID_1][ID_2] = <list of parts>
   7.104 -        # part = {'core': core_number, 'start': start_posotion, 'end': end_position}
   7.105 -        couple_core_parts = {}
   7.106 -
   7.107 -
   7.108 -
   7.109 -
   7.110 -        for protein_1 in alignment:
   7.111 -            pdb_code_1, pdb_chain_1, seq_part_1, model_1 = protein_1
   7.112 -            ID_1 = self.to_ID(pdb_code_1, pdb_chain_1, model_1)
   7.113 -
   7.114 -            couple_core_parts[ID_1] = {}
   7.115 -
   7.116 -            for protein_2 in alignment:
   7.117 -                pdb_code_2, pdb_chain_2, seq_part_2, model_2 = protein_2
   7.118 -                ID_2 = self.to_ID(pdb_code_2, pdb_chain_2, model_2)
   7.119 -
   7.120 -                if ID_1 == ID_2:
   7.121 -                    break
   7.122 -
   7.123 -
   7.124 -                cores = self.check_block([protein_1, protein_2])
   7.125 -
   7.126 -#                print ' '
   7.127 -
   7.128 -#                print cores
   7.129 -
   7.130 -
   7.131 -                couple_core_parts[ID_1][ID_2] = []
   7.132 -
   7.133 -                all_cores = [] # list of positions of all cores
   7.134 -
   7.135 -                for core in cores:
   7.136 -
   7.137 -                    parts = self.splice_to_continued(core, (ID_1, ID_2))
   7.138 -
   7.139 -                    for part in parts:
   7.140 -
   7.141 -                        if len(part) >= self.min_width:
   7.142 -
   7.143 -                            all_cores.extend(clon(part))
   7.144 -
   7.145 -                all_cores = list(set(all_cores)) # unical elements
   7.146 -
   7.147 -                all_cores.sort()
   7.148 -
   7.149 -
   7.150 -                parts = self.splice_to_continued(all_cores, (ID_1, ID_2))
   7.151 -
   7.152 -                for part in parts:
   7.153 -
   7.154 -                    if len(part) < self.min_width:
   7.155 -                        continue
   7.156 -
   7.157 -                    couple_core_parts[ID_1][ID_2].append(clon({
   7.158 -                    'start': part[0], 'end': part[-1]}))
   7.159 -
   7.160 -                couple_core_parts[ID_2][ID_1] = couple_core_parts[ID_1][ID_2]
   7.161 -
   7.162 -#                CORES.append(clon(core))
   7.163 -
   7.164 -
   7.165 -
   7.166 -#        exit()
   7.167 -
   7.168 -        # list of elements of blocks graph
   7.169 -        # element is (ID, pos)
   7.170 -        graph_elements = []
   7.171 -
   7.172 -        # reverse dict to graph_elements
   7.173 -        graph_elements_1 = {}
   7.174 -
   7.175 -
   7.176 -        # graph[i][j] == 1, if these elements are connected, or i == j
   7.177 -        # i and j are indexes from list "graph_elements"
   7.178 -        graph = {}
   7.179 -
   7.180 -
   7.181 -        # same as graph, but include lists of cores n
   7.182 -#        graph_cores = {}
   7.183 -
   7.184 -
   7.185 -
   7.186 -        # is_boundary[ID][pos] == 1, if pos is boundary of some part in ID
   7.187 -        is_boundary = {}
   7.188 -
   7.189 -
   7.190 -
   7.191 -        for ID_1 in IDs:
   7.192 -
   7.193 -
   7.194 -            # find boundaries
   7.195 -            is_boundary[ID_1] = {}
   7.196 -            for ID_2 in IDs:
   7.197 -                if ID_2 == ID_1:
   7.198 -                    continue
   7.199 -                for part in couple_core_parts[ID_1][ID_2]:
   7.200 -                    is_boundary[ID_1][part['start']] = 1
   7.201 -                    is_boundary[ID_1][part['end']] = 1
   7.202 -
   7.203 -
   7.204 -
   7.205 -
   7.206 -            for ID_2 in IDs:
   7.207 -
   7.208 -                if ID_1 == ID_2:
   7.209 -                    break
   7.210 -
   7.211 -
   7.212 -
   7.213 -
   7.214 -
   7.215 -                for part in couple_core_parts[ID_1][ID_2]:
   7.216 -
   7.217 -                    start = part['start']
   7.218 -                    end = part['end']
   7.219 -
   7.220 -                    # elements of graph like (ID, pos)
   7.221 -                    elements = []
   7.222 -
   7.223 -                    for pos in xrange(start, end + 1):
   7.224 -
   7.225 -                        if not is_boundary[ID_1].has_key(pos) and not is_boundary[ID_2].has_key(pos):
   7.226 -                            continue
   7.227 -
   7.228 -                        for ID in (ID_1, ID_2):
   7.229 -                            if not graph_elements_1.has_key((ID, pos)):
   7.230 -                                L = len(graph_elements)
   7.231 -                                graph_elements_1[(ID, pos)] = L
   7.232 -                                graph[L] = {}
   7.233 -#                                graph_cores[L] = {}
   7.234 -
   7.235 -                                graph_elements.append((ID, pos))
   7.236 -
   7.237 -
   7.238 -                            elements.append(graph_elements_1[(ID, pos)])
   7.239 -
   7.240 -
   7.241 -                    for element_1 in elements:
   7.242 -                        for element_2 in elements:
   7.243 -                            graph[element_1][element_2] = 1
   7.244 -
   7.245 -#                            # add information about cores
   7.246 -#                            if not graph_cores[element_1].has_key(element_2):
   7.247 -#                                graph_cores[element_1][element_2] = []
   7.248 -#                            graph_cores[element_1][element_2].append(part['core'])
   7.249 -
   7.250 -
   7.251 -
   7.252 -        L = len(graph_elements)
   7.253 -
   7.254 -        for element_1 in xrange(0, L):
   7.255 -            for element_2 in xrange(0, L):
   7.256 -                if not graph[element_1].has_key(element_2):
   7.257 -                    graph[element_1][element_2] = 0
   7.258 -
   7.259 -
   7.260 -
   7.261 -        kliki_1 = Kliki(graph, min_size=0, timeout=self.timeout_2).kliki
   7.262 -
   7.263 -
   7.264 -#
   7.265 -#
   7.266 -#        # graf illustration
   7.267 -#        for i in graph.keys():
   7.268 -#            t = ''
   7.269 -#            for j in graph[i].keys():
   7.270 -#                if graph[i][j]:
   7.271 -#                    t += '1'
   7.272 -#                else:
   7.273 -#                    t += ' '
   7.274 -#            print t
   7.275 -#
   7.276 -#
   7.277 -
   7.278 -
   7.279 -        alignment_len = len(alignment[0][2])
   7.280 -
   7.281 -
   7.282 -
   7.283 -        # occupied alignment elements
   7.284 -        occupied = {}
   7.285 -
   7.286 -        for ID in IDs:
   7.287 -
   7.288 -            occupied[ID] = [0] * alignment_len
   7.289 -
   7.290 -        # 00000000
   7.291 -        occupied_current0 = clon(occupied)
   7.292 -
   7.293 -
   7.294 -
   7.295 -
   7.296 -
   7.297 -
   7.298 -
   7.299 -#        print graph
   7.300 -
   7.301 -#        print kliki_1
   7.302 -
   7.303 -
   7.304 -        rectangles = []
   7.305 -
   7.306 -        for klika in kliki_1:
   7.307 -
   7.308 -            if self.rectangles_type == 'out':
   7.309 -
   7.310 -                # transitivity
   7.311 -
   7.312 -                IDs_dict = {}
   7.313 -                start = alignment_len
   7.314 -                end = 0
   7.315 -
   7.316 -                for element in klika:
   7.317 -
   7.318 -                    ID, pos = graph_elements[element]
   7.319 -
   7.320 -                    IDs_dict[ID] = 1
   7.321 -
   7.322 -                    start = min(start, pos)
   7.323 -                    end = max(end, pos)
   7.324 -
   7.325 -                IDs = IDs_dict.keys()
   7.326 -
   7.327 -
   7.328 -                rectangles.append({'IDs': IDs, 'start': start, 'end': end})
   7.329 -
   7.330 -            else:
   7.331 -
   7.332 -                ID_start = {}
   7.333 -                ID_end = {}
   7.334 -
   7.335 -                for element in klika:
   7.336 -
   7.337 -                    ID, pos = graph_elements[element]
   7.338 -
   7.339 -
   7.340 -                    if not ID_start.has_key(ID):
   7.341 -                        ID_start[ID] = pos
   7.342 -                        ID_end[ID] = pos
   7.343 -                    else:
   7.344 -                        ID_start[ID] = min(ID_start[ID], pos)
   7.345 -                        ID_end[ID] = max(ID_end[ID], pos)
   7.346 -
   7.347 -
   7.348 -                # empty table
   7.349 -                occupied_current = clon(occupied_current0)
   7.350 -
   7.351 -
   7.352 -
   7.353 -                for ID, start in ID_start.items():
   7.354 -                    end = ID_end[ID]
   7.355 -
   7.356 -                    for pos in xrange(start, end + 1):
   7.357 -                        occupied_current[ID][pos] = 1
   7.358 -
   7.359 -
   7.360 -                rectangles.extend(self.splice_to_rect(occupied_current))
   7.361 -
   7.362 -
   7.363 -
   7.364 -        # sort by height and then by width
   7.365 -
   7.366 -        rectangles.sort(self.rectangles_compare)
   7.367 -
   7.368 -
   7.369 -
   7.370 -
   7.371 -
   7.372 -
   7.373 -
   7.374 -
   7.375 -
   7.376 -        rectangles_2 = []
   7.377 -
   7.378 -
   7.379 -        for rectangle in rectangles:
   7.380 -
   7.381 -            # empty table
   7.382 -            occupied_current = clon(occupied_current0)
   7.383 -
   7.384 -            for ID in rectangle['IDs']:
   7.385 -
   7.386 -                for pos in xrange(rectangle['start'], rectangle['end'] + 1):
   7.387 -
   7.388 -                    if not occupied[ID][pos]:
   7.389 -
   7.390 -                        occupied_current[ID][pos] = 1
   7.391 -
   7.392 -            rectangles_current = self.splice_to_rect(occupied_current)
   7.393 -
   7.394 -
   7.395 -
   7.396 -            rectangles_2.extend(rectangles_current)
   7.397 -
   7.398 -            for rectangle in rectangles_current:
   7.399 -
   7.400 -#                if len(rectangle['IDs']) ==2 and rectangle['IDs'][1] == '1bw5_A_4':
   7.401 -#                    print occupied_current, rectangles
   7.402 -
   7.403 -
   7.404 -                for ID in rectangle['IDs']:
   7.405 -                    for pos in xrange(rectangle['start'], rectangle['end'] + 1):
   7.406 -                        occupied[ID][pos] = 1
   7.407 -
   7.408 -
   7.409 -
   7.410 -        rectangles = clon(rectangles_2)
   7.411 -
   7.412 -
   7.413 -
   7.414 -
   7.415 -
   7.416 -        # delete blocks, including 1 sequence
   7.417 -
   7.418 -        rectangles = filter(self.rectangle_filter, rectangles)
   7.419 -
   7.420 -
   7.421 -        return rectangles
   7.422 -
   7.423 -
   7.424 -
   7.425 -#
   7.426 -#    # transitivity
   7.427 -#    def splice_to_outrect(self, occupied_current, occupied):
   7.428 -#        """
   7.429 -#        Splice multitude of alignment elements to rectangles
   7.430 -#
   7.431 -#        Returns:
   7.432 -#            new occupied_current (out rectangle) \ occupied
   7.433 -#
   7.434 -#        """
   7.435 -#
   7.436 -#        width = len(occupied_current.values()[0])
   7.437 -#
   7.438 -#        start = width # the most right position
   7.439 -#        end = 0 # the most left position
   7.440 -#
   7.441 -#        IDs_dict = {}
   7.442 -#
   7.443 -#        for ID, line in occupied_current.items():
   7.444 -#
   7.445 -#            for pos, condition in enumerate(line):
   7.446 -#
   7.447 -#                if condition:
   7.448 -#
   7.449 -#                    IDs_dict[ID] = 1
   7.450 -#                    start = min(start, pos)
   7.451 -#                    end = max(end, pos)
   7.452 -#
   7.453 -#        IDs = IDs_dict.keys()
   7.454 -#
   7.455 -#
   7.456 -#
   7.457 -#        occupied_current_1 = {}
   7.458 -#
   7.459 -#
   7.460 -#
   7.461 -#        for ID in IDs:
   7.462 -#
   7.463 -#            occupied_current_1[ID] = [0] * width
   7.464 -#
   7.465 -#            for pos in xrange(start, end + 1):
   7.466 -#
   7.467 -#                if not occupied[ID][pos]:
   7.468 -#
   7.469 -#                    occupied_current_1[ID][pos] = 1
   7.470 -#
   7.471 -#        return occupied_current_1
   7.472 -#
   7.473 -#
   7.474 -
   7.475 -
   7.476 -    def rectangles_compare(self, x, y):
   7.477 -
   7.478 -        height_x = len(x['IDs'])
   7.479 -        height_y = len(y['IDs'])
   7.480 -
   7.481 -        if height_x > height_y:
   7.482 -            return -1
   7.483 -        if height_x < height_y:
   7.484 -            return 1
   7.485 -
   7.486 -        # same heights
   7.487 -
   7.488 -        width_x = x['end'] - x['start'] + 1
   7.489 -        width_y = y['end'] - y['start'] + 1
   7.490 -
   7.491 -        if width_x > width_y:
   7.492 -            return -1
   7.493 -        if width_x < width_y:
   7.494 -            return 1
   7.495 -
   7.496 -        return 0
   7.497 -
   7.498 -
   7.499 -
   7.500 -
   7.501 -    def rectangle_filter(self, rectangle):
   7.502 -
   7.503 -        height = len(rectangle['IDs'])
   7.504 -
   7.505 -        width = rectangle['end'] - rectangle['start'] + 1
   7.506 -
   7.507 -        if height >= 2 and width >= self.min_width:
   7.508 -            return True
   7.509 -        else:
   7.510 -            return False
   7.511 -
   7.512 -
   7.513 -
   7.514 -
   7.515 -    def splice_to_rect(self, occupied_current):
   7.516 -        """
   7.517 -        Splice multitude of alignment elements to rectangles
   7.518 -
   7.519 -        Returns list of rectangles:
   7.520 -            rectangle is dict:
   7.521 -                'start'
   7.522 -                'end'
   7.523 -                'IDs' - list
   7.524 -
   7.525 -        """
   7.526 -
   7.527 -#        occupied_current_1 = splice_to_outrect(occupied_current, occupied)
   7.528 -
   7.529 -        occupied_current_1 = clon(occupied_current)
   7.530 -
   7.531 -        width = len(occupied_current_1.values()[0])
   7.532 -
   7.533 -
   7.534 -        rectangles = []
   7.535 -
   7.536 -        rectangles_count = 0
   7.537 -
   7.538 -
   7.539 -        while 1:
   7.540 -
   7.541 -            excluded_IDs = {}
   7.542 -
   7.543 -            while 1:
   7.544 -
   7.545 -                IDs_occupied = {} # is ID occupied
   7.546 -
   7.547 -                height = {} # this position height
   7.548 -
   7.549 -                for pos in xrange(0, width):
   7.550 -
   7.551 -                    height[pos] = 0
   7.552 -
   7.553 -                    for ID, e in occupied_current_1.items():
   7.554 -
   7.555 -                        if excluded_IDs.has_key(ID):
   7.556 -                            continue
   7.557 -
   7.558 -                        if  e[pos]:
   7.559 -                            height[pos] += 1
   7.560 -
   7.561 -                            IDs_occupied[ID] = 1
   7.562 -
   7.563 -
   7.564 -                max_height = len(IDs_occupied.keys())
   7.565 -
   7.566 -
   7.567 -                if max_height <= 1:
   7.568 -                    break
   7.569 -
   7.570 -
   7.571 -
   7.572 -                max_height_positions = []
   7.573 -
   7.574 -                for pos in xrange(0, width):
   7.575 -                    if height[pos] == max_height:
   7.576 -                        max_height_positions.append(pos)
   7.577 -
   7.578 -
   7.579 -                parts = self.splice_to_continued(max_height_positions, IDs_occupied.keys())
   7.580 -
   7.581 -
   7.582 -                ok = 0
   7.583 -
   7.584 -                for part in parts:
   7.585 -
   7.586 -                    if len(part) >= self.min_width:
   7.587 -
   7.588 -                        # It is block!
   7.589 -
   7.590 -                        IDs = IDs_occupied.keys()
   7.591 -                        start = part[0]
   7.592 -                        end = part[-1]
   7.593 -
   7.594 -
   7.595 -                        rectangles.append(clon({'IDs': IDs, 'start': start, 'end': end, 'cores':[]}))
   7.596 -
   7.597 -
   7.598 -                        # exclude elements
   7.599 -
   7.600 -                        for ID in IDs:
   7.601 -                            for pos in xrange(start, end + 1):
   7.602 -                                occupied_current_1[ID][pos] = 0
   7.603 -
   7.604 -
   7.605 -
   7.606 -                        ok = 1
   7.607 -
   7.608 -                if ok:
   7.609 -                    continue
   7.610 -
   7.611 -
   7.612 -
   7.613 -
   7.614 -                # find the shortest sequence
   7.615 -
   7.616 -
   7.617 -                max_width_dict = {}
   7.618 -
   7.619 -                for ID, e in occupied_current_1.items():
   7.620 -
   7.621 -                    if excluded_IDs.has_key(ID):
   7.622 -                        continue
   7.623 -
   7.624 -                    positions = []
   7.625 -
   7.626 -                    for pos in xrange(0, width):
   7.627 -
   7.628 -                        if height[pos] <= 1:
   7.629 -                            # "bad" position
   7.630 -                            continue
   7.631 -
   7.632 -                        if e[pos]:
   7.633 -                            positions.append(pos)
   7.634 -
   7.635 -                    if len(positions) == 0:
   7.636 -                        continue
   7.637 -
   7.638 -                    parts = self.splice_to_continued(positions, [ID])
   7.639 -
   7.640 -                    max_part_len = 0
   7.641 -
   7.642 -                    for part in parts:
   7.643 -                        max_part_len = max(len(part), max_part_len)
   7.644 -
   7.645 -                    max_width_dict[ID] = max_part_len
   7.646 -
   7.647 -
   7.648 -                if len(max_width_dict.values()):
   7.649 -                    min_width = min(max_width_dict.values())
   7.650 -                else:
   7.651 -                    break
   7.652 -
   7.653 -                # exclude these sequences
   7.654 -
   7.655 -                for ID, e in occupied_current_1.items():
   7.656 -
   7.657 -                    if max_width_dict.has_key(ID):
   7.658 -                        #if max_width_dict[ID] == min_width:
   7.659 -                        if max_width_dict[ID] <= min_width:
   7.660 -
   7.661 -                            excluded_IDs[ID] = 1
   7.662 -
   7.663 -#                            for pos in xrange(0, width):
   7.664 -#                                occupied_current_1[ID][pos] = 0
   7.665 -
   7.666 -
   7.667 -#            print rectangles_count
   7.668 -
   7.669 -            if len(rectangles) == rectangles_count:
   7.670 -                break
   7.671 -
   7.672 -            rectangles_count = len(rectangles)
   7.673 -
   7.674 -#            print rectangles_count
   7.675 -
   7.676 -
   7.677 -        return rectangles
   7.678 -
   7.679 -
   7.680 -
   7.681 -
   7.682 -
   7.683 -
   7.684 -
   7.685 -
   7.686 -
   7.687 -
   7.688 -
   7.689 -
   7.690 -
   7.691 -
   7.692 -
   7.693 -
   7.694 -
   7.695 -
   7.696 -
   7.697 -
   7.698 -
   7.699 -
   7.700 -
   7.701 -
   7.702 -
   7.703 -    def splice_to_continued(self, LIST, IDs):
   7.704 -        """
   7.705 -        LIST is a list of positions of alignment
   7.706 -
   7.707 -        returns list of continued lists, constituted source LIST
   7.708 -        """
   7.709 -
   7.710 -        LIST.sort()
   7.711 -
   7.712 -
   7.713 -        # list of continuing parts of this core
   7.714 -        parts = [[]]
   7.715 -
   7.716 -        for pos in LIST:
   7.717 -
   7.718 -            if len(parts[-1]):
   7.719 -
   7.720 -                if pos - parts[-1][-1] == 1:
   7.721 -
   7.722 -                    parts[-1].append(pos)
   7.723 -
   7.724 -                else:
   7.725 -
   7.726 -                    # check omited sequence
   7.727 -
   7.728 -                    gappes = 1 # all omited sequence is gaps
   7.729 -
   7.730 -                    for x in xrange(parts[-1][-1] + 1, pos):
   7.731 -
   7.732 -                        for ID in IDs:
   7.733 -                            if self.structure1[ID][x]:
   7.734 -                                gappes = 0
   7.735 -                                break
   7.736 -
   7.737 -                        if gappes == 0:
   7.738 -                            break
   7.739 -
   7.740 -                    if not gappes:
   7.741 -
   7.742 -                        # There are not only gapes
   7.743 -                        parts.append([pos])
   7.744 -
   7.745 -            else:
   7.746 -                # first element
   7.747 -
   7.748 -                parts[-1].append(pos)
   7.749 -
   7.750 -
   7.751 -        return clon(parts)
   7.752 -
   7.753 -
   7.754 -

     8.1 --- a/blocks3d/Blocks3D_config.py	Tue Feb 22 17:16:56 2011 +0300
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,21 +0,0 @@
     8.4 -# -*- coding: cp1251 -*-
     8.5 -
     8.6 -#################
     8.7 -# CLASS VARS
     8.8 -#################
     8.9 -
    8.10 -import sys
    8.11 -from os.path import join, dirname
    8.12 -
    8.13 -
    8.14 -min_width = 3
    8.15 -
    8.16 -timeout_2 = 10
    8.17 -
    8.18 -
    8.19 -# in this file should be HTM text. string "self_js_text" will be replaced to variable code
    8.20 -html_file = join(dirname(__file__), "html.htm")
    8.21 -
    8.22 -
    8.23 -rectangles_type = 'out'
    8.24 -

     9.1 --- a/blocks3d/GeometricalCore_class.py	Tue Feb 22 17:16:56 2011 +0300
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,1008 +0,0 @@
     9.4 -# -*- coding: cp1251 -*-
     9.5 -
     9.6 -##############################################
     9.7 -# Geometrical core finding - alignment check
     9.8 -##############################################
     9.9 -
    9.10 -
    9.11 -"""
    9.12 -Example:
    9.13 -
    9.14 -    import GeometricalCore_class
    9.15 -
    9.16 -    l = GeometricalCore_class.GeometricalCore()
    9.17 -
    9.18 -
    9.19 -    print l.check_block([
    9.20 -        ['1jcc', 'A', 'NAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM', None],
    9.21 -        ['1jcd', 'A', 'NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNA', None],
    9.22 -        ])
    9.23 -
    9.24 -    print l.check_block([
    9.25 -        ['1jcc', 'A', 'NAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM', None],
    9.26 -        ['1jcd', 'A', 'NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNA', None],
    9.27 -        ])
    9.28 -
    9.29 -
    9.30 -Run method check_block() for each alignment block
    9.31 -
    9.32 -Structures will be uploaded automatically.
    9.33 -Programm takes them from folder /pdb (/pdb/1jcc.ent), if they are there
    9.34 -
    9.35 -In this example we control biological truth of alignment
    9.36 -    NAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM
    9.37 -    NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNA
    9.38 -
    9.39 -We controled with delta 1 and 2 angstrom
    9.40 -Delta is max allowed alterity between max and min distance between atoms
    9.41 -
    9.42 -We got list of core alignment positions (first is 0)
    9.43 -
    9.44 -"""
    9.45 -
    9.46 -
    9.47 -
    9.48 -
    9.49 -
    9.50 -
    9.51 -import os # to control file existence
    9.52 -
    9.53 -import urllib2 # to upload PDB from Internet
    9.54 -
    9.55 -import re # regulas expression
    9.56 -
    9.57 -
    9.58 -from Kliki import Kliki # algorithm to find core(s) from graph
    9.59 -
    9.60 -import AAdict # dict to convert 3-N code to 1-N
    9.61 -import AlignmentSeq # sequense alignment class
    9.62 -
    9.63 -
    9.64 -import GeometricalCore_config # default class options
    9.65 -
    9.66 -
    9.67 -
    9.68 -
    9.69 -AAdict = AAdict.AAdict
    9.70 -
    9.71 -
    9.72 -class GeometricalCore:
    9.73 -
    9.74 -    """
    9.75 -    Self data:
    9.76 -
    9.77 -    self.structure
    9.78 -        3d Coordinates of CA atoms of each structure
    9.79 -
    9.80 -        Formation:
    9.81 -            self.structure[structure name][atom identifier] = list [x,y,z]
    9.82 -            where:
    9.83 -                structure name - string like "1jcc:A"
    9.84 -                atom identifier - number of residue i PDB file, first is 0
    9.85 -                x,y,x - Coordinates of CA atoms in PDB
    9.86 -
    9.87 -
    9.88 -    self.seq
    9.89 -        sequences of each structures
    9.90 -
    9.91 -        Formation:
    9.92 -            self.seq[structure name] = string like "SSNAKIDQLSSDAQ"
    9.93 -                where: structure name - string like "1jcc:A"
    9.94 -
    9.95 -    self.structure1 - same as self.structure, but atom identifier is number in block
    9.96 -
    9.97 -    self.d - distances
    9.98 -        self.d[ID][i][j] = distance between i and j atoms in structure ID
    9.99 -
   9.100 -
   9.101 -
   9.102 -
   9.103 -
   9.104 -    User parameters:
   9.105 -
   9.106 -        self.max_absent - max part of positions without PDB info in one structure
   9.107 -
   9.108 -        self.save - should programm save PDB to folder pbd/ (0 - do not save, 1 - save)
   9.109 -
   9.110 -        self.min_size - min size of each core
   9.111 -
   9.112 -        self.alternative_core_new_atoms - part of new atoms in alternative core
   9.113 -
   9.114 -        self.alternative_cores_count - max number of cores (including main core)
   9.115 -
   9.116 -    """
   9.117 -
   9.118 -
   9.119 -
   9.120 -
   9.121 -
   9.122 -    def __init__(self):
   9.123 -
   9.124 -        """
   9.125 -        Create new copy of class
   9.126 -
   9.127 -        Creates self vars self.structure and self.seq
   9.128 -        """
   9.129 -
   9.130 -        vars(self).update(vars(GeometricalCore_config))
   9.131 -
   9.132 -
   9.133 -        self.structure = {}
   9.134 -        self.structure1 = {}
   9.135 -        self.d = {}
   9.136 -
   9.137 -
   9.138 -        self.seq = {}
   9.139 -        self.pdb_text = {} # there will be PDB texts in this dict. key - string like "1jcc"
   9.140 -
   9.141 -
   9.142 -
   9.143 -
   9.144 -
   9.145 -
   9.146 -
   9.147 -
   9.148 -
   9.149 -    def change_block_boundary(self):
   9.150 -        """
   9.151 -        If you use at first one boundaries (in positions) of block in some PDB file,
   9.152 -        and then decide to change them, run this method before running check_block method
   9.153 -        """
   9.154 -
   9.155 -        self.structure1 = {}
   9.156 -        self.d = {}
   9.157 -
   9.158 -
   9.159 -
   9.160 -#
   9.161 -#
   9.162 -#    def change_parameters(self, delta=None, save=None, min_size=None,
   9.163 -#                alternative_core_new_atoms=None,
   9.164 -#                alternative_cores_count=None, timeout=None):
   9.165 -#        """
   9.166 -#        Changes self parameters
   9.167 -#
   9.168 -#        description of parameters - see class description
   9.169 -#        """
   9.170 -#
   9.171 -#        if delta != None:
   9.172 -#            self.delta = delta
   9.173 -#
   9.174 -#
   9.175 -#        if save != None:
   9.176 -#            self.save = save
   9.177 -#
   9.178 -#        if min_size != None:
   9.179 -#            self.min_size = min_size
   9.180 -#
   9.181 -#        if alternative_core_new_atoms != None:
   9.182 -#            self.alternative_core_new_atoms = alternative_core_new_atoms
   9.183 -#
   9.184 -#
   9.185 -#        if alternative_cores_count != None:
   9.186 -#            self.alternative_cores_count = alternative_cores_count
   9.187 -#
   9.188 -#        if timeout != None:
   9.189 -#            self.timeout = timeout
   9.190 -#
   9.191 -#
   9.192 -#
   9.193 -
   9.194 -
   9.195 -
   9.196 -
   9.197 -    def to_ID(self, pdb_code, pdb_chain, model):
   9.198 -
   9.199 -        """
   9.200 -        Makes standart ID for structure from pdb_code, chain and model
   9.201 -        """
   9.202 -
   9.203 -        ID = pdb_code + '_'
   9.204 -
   9.205 -        if pdb_chain:
   9.206 -            ID = ID + pdb_chain.upper().strip()
   9.207 -
   9.208 -        if model:
   9.209 -            ID = ID + '_' + model
   9.210 -
   9.211 -        return ID
   9.212 -
   9.213 -
   9.214 -
   9.215 -
   9.216 -
   9.217 -
   9.218 -    def from_ID(self, ID):
   9.219 -
   9.220 -        """
   9.221 -        Parse standart ID to pdb_code, chain and model
   9.222 -        """
   9.223 -
   9.224 -
   9.225 -        if not hasattr(self, 're1'):
   9.226 -
   9.227 -
   9.228 -            # makes compiled regular expressions
   9.229 -
   9.230 -            # for pdb-codes
   9.231 -            self.re1 = re.compile(r"(^|[^a-z0-9])([0-9][0-9a-z]{3})([^a-z0-9]([0-9a-z ]?)([^a-z0-9]([0-9]{1,3}))?)?($|[^a-z0-9])")
   9.232 -
   9.233 -            # for files
   9.234 -            self.re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
   9.235 -
   9.236 -
   9.237 -            self.from_ID_dict = {}
   9.238 -
   9.239 -
   9.240 -
   9.241 -
   9.242 -        if self.from_ID_dict.has_key(ID):
   9.243 -            return self.from_ID_dict[ID]
   9.244 -
   9.245 -
   9.246 -        if ID.lower().find('.ent') != -1 or ID.lower().find('.pdb') != -1:
   9.247 -            # it is file
   9.248 -            parseO = self.re2.search(ID) # files
   9.249 -        else:
   9.250 -            parseO = self.re1.search(ID.lower()) # pdb codes
   9.251 -
   9.252 -
   9.253 -        if not parseO:
   9.254 -            return None
   9.255 -
   9.256 -        parse = parseO.groups()
   9.257 -
   9.258 -
   9.259 -        if len(parse) < 2:
   9.260 -            return None
   9.261 -
   9.262 -
   9.263 -        code = parse[1]
   9.264 -
   9.265 -#        if not file:
   9.266 -#            code = code.lower()
   9.267 -
   9.268 -        chain = ''
   9.269 -        model = None
   9.270 -
   9.271 -        if len(parse) >= 4:
   9.272 -            chain = parse[3]
   9.273 -
   9.274 -            if chain:
   9.275 -                chain = chain.upper()
   9.276 -
   9.277 -            if len(parse) >= 6:
   9.278 -                if parse[5]:
   9.279 -                    model = parse[5]
   9.280 -
   9.281 -
   9.282 -
   9.283 -        self.from_ID_dict[ID] = (code, chain, model) # save parsing results
   9.284 -
   9.285 -        return code, chain, model
   9.286 -
   9.287 -
   9.288 -
   9.289 -
   9.290 -
   9.291 -
   9.292 -    def check_block(self, block):
   9.293 -        """
   9.294 -        Outside opened method (from main programm)
   9.295 -
   9.296 -
   9.297 -        Arguments:
   9.298 -            block - list.
   9.299 -                It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None]
   9.300 -                    SSNAKIDQLSSDAQ - part of alignment block sequence
   9.301 -                    Last parameter - model or None
   9.302 -
   9.303 -
   9.304 -        Result:
   9.305 -            returns list of core atom identifiers (first is 0)
   9.306 -
   9.307 -            Example: [0,1,2,3,4,6]
   9.308 -        """
   9.309 -
   9.310 -
   9.311 -
   9.312 -
   9.313 -        # check lendth of input sequences
   9.314 -        L0 = len(block[0][2])
   9.315 -
   9.316 -        for pdb_code, pdb_chain, seq_part, model in block:
   9.317 -            if len(seq_part) != L0:
   9.318 -                #print seq_part, block[0][2]
   9.319 -                print 'Sequences must have equal lengths'
   9.320 -                print 'Task was aborted!'
   9.321 -                return
   9.322 -
   9.323 -
   9.324 -
   9.325 -
   9.326 -
   9.327 -
   9.328 -
   9.329 -        pdb_list = [] # list of uploading PDB names
   9.330 -
   9.331 -        for pdb_code, pdb_chain, seq_part, model in block:
   9.332 -
   9.333 -            ID = self.to_ID(pdb_code, pdb_chain, model)
   9.334 -
   9.335 -            if not self.seq.has_key(ID) or not self.structure.has_key(ID):
   9.336 -
   9.337 -#                pdb_code = pdb_code.lower()
   9.338 -
   9.339 -                if pdb_chain:
   9.340 -                    pdb_chain = pdb_chain.upper()
   9.341 -
   9.342 -                pdb_list.append((pdb_code, pdb_chain, model))
   9.343 -
   9.344 -
   9.345 -
   9.346 -        if len(pdb_list) != 0:
   9.347 -            self.load_pdb(pdb_list) # If it is required, get PDBs
   9.348 -
   9.349 -
   9.350 -
   9.351 -        for pdb_code, pdb_chain, seq_part, model in block:
   9.352 -
   9.353 -            ID = self.to_ID(pdb_code, pdb_chain, model)
   9.354 -
   9.355 -
   9.356 -
   9.357 -
   9.358 -
   9.359 -            if not self.seq.has_key(ID) or not self.structure.has_key(ID): # no structure :(
   9.360 -                print 'Undefined structuce ' + ID
   9.361 -                print 'Task was aborted!'
   9.362 -                return
   9.363 -
   9.364 -            if not self.seq[ID] or not self.structure[ID]: # no structure :(
   9.365 -                print 'Undefined structuce ' + ID
   9.366 -                print 'Task was aborted!'
   9.367 -                return
   9.368 -
   9.369 -
   9.370 -
   9.371 -
   9.372 -
   9.373 -
   9.374 -#        absent = {} # [position identifier] = 1, if some structure has not this atom
   9.375 -
   9.376 -#        structure = {} # [structure name][position in block] = list [x,y,z]
   9.377 -
   9.378 -        IDs = []
   9.379 -
   9.380 -        for pdb_code, pdb_chain, seq_part, model in block:
   9.381 -
   9.382 -            ID = self.to_ID(pdb_code, pdb_chain, model)
   9.383 -
   9.384 -            IDs.append(ID)
   9.385 -
   9.386 -            if self.structure1.has_key(ID):
   9.387 -                continue
   9.388 -
   9.389 -
   9.390 -            # alignment part of sequence with full sequence
   9.391 -            connections = self.find_sequence_piece(self.seq[ID], seq_part)
   9.392 -
   9.393 -
   9.394 -            # add atoms coordinates
   9.395 -
   9.396 -            self.structure1[ID] = []
   9.397 -
   9.398 -
   9.399 -
   9.400 -            for i in xrange(0, len(connections)):
   9.401 -                if connections[i] == '-': # there is not required position in PDB
   9.402 -                    self.structure1[ID].append(None)
   9.403 -#                    absent[i] = 1
   9.404 -                else:
   9.405 -                    self.structure1[ID].append(self.structure[ID][connections[i]])
   9.406 -
   9.407 -#
   9.408 -#
   9.409 -#        # how many positions absent in some structures
   9.410 -#        if len(absent) / float(len(block[0][2])) > self.max_absent:
   9.411 -#            print 'There are too many positions without PDB structure,', len(absent), 'of', len(block[0][2])
   9.412 -#
   9.413 -#
   9.414 -
   9.415 -
   9.416 -        # now we know coordinates of each atom of each structure
   9.417 -
   9.418 -
   9.419 -
   9.420 -        # calculate atom distances
   9.421 -        self.make_d()
   9.422 -
   9.423 -
   9.424 -        # make alignment graph
   9.425 -        graf, cost = self.make_graf(IDs)
   9.426 -
   9.427 -##        self.cost = cost
   9.428 -
   9.429 -
   9.430 -        # find cores
   9.431 -        cores = Kliki(graf, cost, min_size=self.min_size, timeout=self.timeout).kliki
   9.432 -
   9.433 -
   9.434 -        # sort cores and return real alternative cores (with high difference)
   9.435 -        cores = self.find_alternative_cores(cores)
   9.436 -
   9.437 -
   9.438 -#
   9.439 -#        # save CA atom coordinates
   9.440 -#        self.structure1 = structure
   9.441 -#
   9.442 -
   9.443 -        return cores
   9.444 -
   9.445 -
   9.446 -
   9.447 -
   9.448 -
   9.449 -
   9.450 -
   9.451 -
   9.452 -    def load_pdb(self, pdb_list):
   9.453 -
   9.454 -        """
   9.455 -        This method is runned to get PDB files and reading them structures
   9.456 -
   9.457 -        Argument:
   9.458 -            pdb_list - list of lists [pdb_code, chain]
   9.459 -
   9.460 -        Result:
   9.461 -            Adds structure data to self vars self.structure and self.seq
   9.462 -        """
   9.463 -
   9.464 -
   9.465 -
   9.466 -
   9.467 -
   9.468 -        codes = [] # list of PDB codes
   9.469 -
   9.470 -        # get texts of each PDB
   9.471 -
   9.472 -        for pdb_code, pdb_chain, model in pdb_list:
   9.473 -            if not codes.count(pdb_code):
   9.474 -                codes.append(pdb_code)
   9.475 -
   9.476 -
   9.477 -        for pdb_code in codes:
   9.478 -
   9.479 -
   9.480 -            # check PDB file existance in folder ./pdb/
   9.481 -
   9.482 -            fname = 'pdb/' + pdb_code
   9.483 -
   9.484 -            if pdb_code.lower().find('.ent') == -1 and pdb_code.lower().find('.pdb') == -1:
   9.485 -                # pdb code
   9.486 -                fname += '.ent'
   9.487 -
   9.488 -
   9.489 -            if os.path.exists(fname):
   9.490 -                self.pdb_text[pdb_code] = open(fname).readlines()
   9.491 -            else:
   9.492 -                # try to upload from Internet
   9.493 -                try:
   9.494 -
   9.495 -                    url = self.pdb_url.replace('XXXX', pdb_code)
   9.496 -
   9.497 -                    self.pdb_text[pdb_code] = urllib2.urlopen(url).readlines()
   9.498 -
   9.499 -
   9.500 -                except:
   9.501 -                    self.pdb_text[pdb_code] = ''
   9.502 -
   9.503 -
   9.504 -                if self.save:
   9.505 -                    # save information
   9.506 -
   9.507 -                    try:
   9.508 -
   9.509 -
   9.510 -                        # check folder "/pdb" existance
   9.511 -
   9.512 -                        if not os.path.exists('pdb'):
   9.513 -
   9.514 -                            # make folder
   9.515 -                            os.mkdir('pdb');
   9.516 -
   9.517 -
   9.518 -                        pdbfile_to_save = open('pdb/' + pdb_code + '.ent', 'w')
   9.519 -                        pdbfile_to_save.write(''.join(self.pdb_text[pdb_code]))
   9.520 -                        pdbfile_to_save.close()
   9.521 -
   9.522 -                        print 'PDB structure saved to file pdb/' + pdb_code + '.ent'
   9.523 -
   9.524 -                    except:
   9.525 -
   9.526 -                        print 'Can not write file pdb/' + pdb_code + '.ent'
   9.527 -
   9.528 -
   9.529 -            if not self.pdb_text[pdb_code]:
   9.530 -                #print self.pdb_url.replace('XXXX', pdb_code)
   9.531 -                print 'Can not get text of PDB', pdb_code
   9.532 -                return
   9.533 -
   9.534 -
   9.535 -        # We have all PDB texts
   9.536 -
   9.537 -
   9.538 -        # read all texts and get CA atoms coordinates
   9.539 -
   9.540 -        for pdb_code, pdb_chain, model in pdb_list:
   9.541 -
   9.542 -            ID = self.to_ID(pdb_code, pdb_chain, model)
   9.543 -
   9.544 -            S = self.read_pdb(pdb_code, pdb_chain, self.pdb_text[pdb_code], model)
   9.545 -            self.structure[ID], self.seq[ID] = S
   9.546 -
   9.547 -
   9.548 -            if not self.seq[ID]:
   9.549 -                print 'Can not find sequence for "' + ID + '"'
   9.550 -                return
   9.551 -
   9.552 -
   9.553 -
   9.554 -
   9.555 -
   9.556 -
   9.557 -    def read_pdb(self, pdb_code, pdb_chain, pdb_text, model):
   9.558 -        """
   9.559 -        Reads pdb_text
   9.560 -
   9.561 -        Returns (structure, sequence)
   9.562 -            structure (key - string like "1jcc:A")
   9.563 -                This element is dict with [x,y,z] coordinates of each aa
   9.564 -            sequence
   9.565 -        """
   9.566 -
   9.567 -
   9.568 -
   9.569 -        structure = [] # dict with coordinates list
   9.570 -        seq = ''  # sequense (string)
   9.571 -
   9.572 -
   9.573 -        Model_already = 0 # is it our model now
   9.574 -
   9.575 -        if not model:
   9.576 -            Model_already = 1
   9.577 -
   9.578 -
   9.579 -
   9.580 -
   9.581 -
   9.582 -        for currentline in pdb_text:
   9.583 -
   9.584 -
   9.585 -            if not Model_already:
   9.586 -                if currentline[0:5] == 'MODEL':
   9.587 -                    if currentline[10:14].strip() == model:
   9.588 -                        Model_already = 1
   9.589 -                        continue
   9.590 -
   9.591 -                continue
   9.592 -
   9.593 -
   9.594 -
   9.595 -            if currentline[0:6] == 'ENDMDL':
   9.596 -                break
   9.597 -
   9.598 -
   9.599 -            if len(currentline) < 54:
   9.600 -                continue
   9.601 -
   9.602 -
   9.603 -
   9.604 -            if currentline[0:4] != "ATOM":
   9.605 -                continue
   9.606 -
   9.607 -
   9.608 -            atomType = currentline[12:16].strip()
   9.609 -
   9.610 -            if atomType != "CA":
   9.611 -                continue
   9.612 -
   9.613 -
   9.614 -            thisChain = currentline[21].strip()
   9.615 -
   9.616 -            if thisChain != pdb_chain:
   9.617 -                continue
   9.618 -
   9.619 -
   9.620 -            alterCode = currentline[16] # Alter code
   9.621 -
   9.622 -            if alterCode != ' ' and alterCode != 'A':
   9.623 -                continue
   9.624 -
   9.625 -
   9.626 -            thisAminoAcid = currentline[17:20].strip()
   9.627 -
   9.628 -
   9.629 -            thisX = float(currentline[30:38].strip())
   9.630 -            thisY = float(currentline[38:46].strip())
   9.631 -            thisZ = float(currentline[46:54].strip())
   9.632 -
   9.633 -
   9.634 -            # add atom coornates
   9.635 -            structure.append((thisX,thisY,thisZ))
   9.636 -
   9.637 -
   9.638 -            # prolong sequense
   9.639 -            if (AAdict.has_key(thisAminoAcid)):
   9.640 -                seq = seq + AAdict[thisAminoAcid][0]
   9.641 -            else:
   9.642 -                seq = seq + 'x'
   9.643 -
   9.644 -        return (structure, seq)
   9.645 -
   9.646 -
   9.647 -
   9.648 -
   9.649 -
   9.650 -
   9.651 -###
   9.652 -###
   9.653 -###
   9.654 -###         def add_pdb(self, pdb_code, pdb_chain, pdb_text, model):
   9.655 -###
   9.656 -###             """
   9.657 -###             Method read PDB and get coordinates of CA atoms
   9.658 -###
   9.659 -###             Arguments:
   9.660 -###                 pdb_code - PDB name, for example "1jcc"
   9.661 -###                 pdb_chain - chain, for example "A"
   9.662 -###                 pdb_text - PDB file strings
   9.663 -###
   9.664 -###
   9.665 -###             Result:
   9.666 -###                 New element will be added in dict self.structure (key - string like "1jcc:A")
   9.667 -###                 This element is dict with [x,y,z] coordinates of each aa
   9.668 -###
   9.669 -###             """
   9.670 -###
   9.671 -###             ID = self.to_ID(pdb_code, pdb_chain, model)
   9.672 -###
   9.673 -###             self.structure[ID], self.seq[ID] = read_pdb(pdb_code, pdb_chain, pdb_text, model)
   9.674 -###
   9.675 -###
   9.676 -###
   9.677 -
   9.678 -
   9.679 -
   9.680 -
   9.681 -
   9.682 -
   9.683 -
   9.684 -
   9.685 -
   9.686 -
   9.687 -
   9.688 -
   9.689 -
   9.690 -
   9.691 -
   9.692 -
   9.693 -
   9.694 -
   9.695 -
   9.696 -
   9.697 -
   9.698 -
   9.699 -
   9.700 -
   9.701 -
   9.702 -
   9.703 -
   9.704 -
   9.705 -
   9.706 -
   9.707 -
   9.708 -
   9.709 -
   9.710 -    def find_sequence_piece (self, seq_all, seq_part):
   9.711 -        """
   9.712 -        Method for search subsequence in sequnce
   9.713 -
   9.714 -        Arguments:
   9.715 -            seq_all - full sequence (from PDB)
   9.716 -            seq_part - subsequence (from alignment block), can contain gapes
   9.717 -
   9.718 -        Result:
   9.719 -        retuns list with conformity list
   9.720 -                key - subsequence position (in seq_part)
   9.721 -                value - full sequence position (in seq_all)
   9.722 -
   9.723 -                First is 0
   9.724 -
   9.725 -                If no position from PDB meets subsequence position,
   9.726 -                value is '-', programm makes warning
   9.727 -
   9.728 -        """
   9.729 -        seq_alignment = AlignmentSeq.AlignmentSeq([seq_part, seq_all]).new_sequences
   9.730 -
   9.731 -##        print seq_alignment[0]
   9.732 -##        print seq_alignment[1]
   9.733 -
   9.734 -
   9.735 -        # At first, calculate conformity without gapes
   9.736 -
   9.737 -        connections0 = [] # key - seq_part position, value - position in seq_part without gapes
   9.738 -        part_usage = 0 # how many aa from seqpart were used
   9.739 -
   9.740 -        for i in xrange(0, len(seq_part)):
   9.741 -            s = seq_part[i]
   9.742 -            if s == '-':
   9.743 -                connections0.append('-')
   9.744 -            else:
   9.745 -                connections0.append(part_usage)
   9.746 -                part_usage += 1
   9.747 -
   9.748 -
   9.749 -
   9.750 -
   9.751 -
   9.752 -
   9.753 -
   9.754 -
   9.755 -
   9.756 -
   9.757 -        connections1 = []
   9.758 -
   9.759 -        all_usage = 0 # how many aa from full sequence were used
   9.760 -
   9.761 -        for i in xrange(0, len(seq_alignment[0])):
   9.762 -            AA_part = seq_alignment[0][i]
   9.763 -            AA_all  = seq_alignment[1][i]
   9.764 -
   9.765 -            if AA_all != '-':
   9.766 -                all_usage += 1
   9.767 -
   9.768 -            if AA_part == '-':
   9.769 -                continue
   9.770 -
   9.771 -            if AA_all == '-':
   9.772 -                connections1.append('-')
   9.773 -#                print 'Can not find', AA_part, len(connections1), 'of', seq_part, 'in', seq_all
   9.774 -                continue
   9.775 -
   9.776 -
   9.777 -            connections1.append(all_usage - 1)
   9.778 -
   9.779 -
   9.780 -
   9.781 -        connections = []
   9.782 -
   9.783 -
   9.784 -        for i in connections0:
   9.785 -            if i == '-':
   9.786 -                connections.append('-')
   9.787 -            else:
   9.788 -                connections.append(connections1[i])
   9.789 -
   9.790 -        return connections
   9.791 -
   9.792 -
   9.793 -
   9.794 -
   9.795 -
   9.796 -
   9.797 -
   9.798 -
   9.799 -
   9.800 -
   9.801 -
   9.802 -
   9.803 -
   9.804 -
   9.805 -
   9.806 -
   9.807 -    def make_d(self):
   9.808 -
   9.809 -        """
   9.810 -        Calculates distances
   9.811 -
   9.812 -        adds new elements to self.d
   9.813 -        """
   9.814 -
   9.815 -
   9.816 -        # Atom count
   9.817 -        aton_count = len(self.structure1.values()[0])
   9.818 -
   9.819 -        for ID, structure in self.structure1.items():
   9.820 -
   9.821 -            if self.d.has_key(ID):
   9.822 -                continue # already done
   9.823 -
   9.824 -            self.d[ID] = []
   9.825 -
   9.826 -
   9.827 -            # create distance matrix
   9.828 -            for atom1 in xrange(0, aton_count):
   9.829 -                self.d[ID].append([])
   9.830 -                for atom2 in xrange(0, aton_count):
   9.831 -                    self.d[ID][atom1].append(None)
   9.832 -
   9.833 -
   9.834 -            # done distance matrix
   9.835 -
   9.836 -            for atom1 in xrange(0, aton_count):
   9.837 -                for atom2 in xrange(0, aton_count):
   9.838 -
   9.839 -                    if atom2 == atom1:
   9.840 -                        break
   9.841 -
   9.842 -                    if structure[atom1] and structure[atom2]:
   9.843 -
   9.844 -                        dist = 0
   9.845 -
   9.846 -                        for xyz in xrange(0, 3):
   9.847 -
   9.848 -                            dist += (structure[atom1][xyz] - structure[atom2][xyz]) ** 2
   9.849 -
   9.850 -                        dist = dist ** 0.5
   9.851 -
   9.852 -                        self.d[ID][atom1][atom2] = dist
   9.853 -                        self.d[ID][atom2][atom1] = dist
   9.854 -
   9.855 -
   9.856 -
   9.857 -
   9.858 -
   9.859 -
   9.860 -
   9.861 -
   9.862 -
   9.863 -
   9.864 -    def make_graf(self, IDs):
   9.865 -
   9.866 -        """
   9.867 -        Make alignment core graph
   9.868 -
   9.869 -        Arguments:
   9.870 -            IDs - list of IDs to study
   9.871 -
   9.872 -
   9.873 -        Result:
   9.874 -        returns [alignment core graph, cost graf]
   9.875 -
   9.876 -
   9.877 -            alignment core graph example:
   9.878 -            graf[0][1] = 1    0 and 1 positions are connected
   9.879 -            graf[0][1] = 0    0 and 1 positions are not connected
   9.880 -        """
   9.881 -
   9.882 -        # Atom count
   9.883 -        aton_count = len(self.structure1.values()[0])
   9.884 -
   9.885 -
   9.886 -        graf = {}
   9.887 -
   9.888 -        cost = {} # distance spreading
   9.889 -
   9.890 -
   9.891 -
   9.892 -
   9.893 -
   9.894 -        for atom1 in xrange(0, aton_count):
   9.895 -
   9.896 -
   9.897 -            graf[atom1] = {}
   9.898 -
   9.899 -            cost[atom1] = {}
   9.900 -
   9.901 -            for atom2 in xrange(0, aton_count):
   9.902 -
   9.903 -
   9.904 -                if atom1 == atom2:
   9.905 -                    graf[atom1][atom2] = 1
   9.906 -                    break
   9.907 -
   9.908 -
   9.909 -                distances = []
   9.910 -
   9.911 -                for ID in IDs:
   9.912 -                    distances.append(self.d[ID][atom1][atom2])
   9.913 -
   9.914 -
   9.915 -                if distances.count(None):
   9.916 -                    graf[atom1][atom2] = 0
   9.917 -                    graf[atom2][atom1] = 0
   9.918 -
   9.919 -                else:
   9.920 -
   9.921 -                    spreading = max(distances) - min(distances)
   9.922 -
   9.923 -                    if spreading > self.delta:
   9.924 -
   9.925 -                        graf[atom1][atom2] = 0
   9.926 -                        graf[atom2][atom1] = 0
   9.927 -
   9.928 -                    else:
   9.929 -                        graf[atom1][atom2] = 1
   9.930 -                        graf[atom2][atom1] = 1
   9.931 -
   9.932 -                        cost[atom1][atom2] = -spreading
   9.933 -                        cost[atom2][atom1] = -spreading
   9.934 -
   9.935 -
   9.936 -
   9.937 -        return [graf, cost]
   9.938 -
   9.939 -
   9.940 -
   9.941 -
   9.942 -
   9.943 -
   9.944 -
   9.945 -    def find_alternative_cores(self, cores):
   9.946 -        """
   9.947 -        gather different cores and returns it
   9.948 -
   9.949 -        Arguments:
   9.950 -            cores - list of lists of core atoms
   9.951 -
   9.952 -        Result:
   9.953 -            returns list with real different cores
   9.954 -        """
   9.955 -
   9.956 -        new_cores = [] # list with added cores
   9.957 -
   9.958 -        for core in cores:
   9.959 -
   9.960 -            if self.alternative_cores_count:
   9.961 -                if len(new_cores) >= self.alternative_cores_count:
   9.962 -                    break # there are enough cores :)
   9.963 -
   9.964 -
   9.965 -
   9.966 -
   9.967 -            ok = 1
   9.968 -
   9.969 -            if self.alternative_core_new_atoms:
   9.970 -
   9.971 -                # compare this core with all added cores
   9.972 -
   9.973 -                for one_core in new_cores:
   9.974 -                    new_atoms = 0
   9.975 -
   9.976 -                    for atom in core:
   9.977 -                        if one_core.count(atom) == 0:
   9.978 -                            new_atoms += 1
   9.979 -
   9.980 -                    if new_atoms / float(len(core)) < self.alternative_core_new_atoms:
   9.981 -                        ok = 0
   9.982 -                        break
   9.983 -
   9.984 -
   9.985 -            # if this core is good
   9.986 -            if ok == 1:
   9.987 -                new_cores.append(core)
   9.988 -
   9.989 -
   9.990 -
   9.991 -        return new_cores
   9.992 -
   9.993 -
   9.994 -
   9.995 -
   9.996 -
   9.997 -
   9.998 -
   9.999 -
  9.1000 -
  9.1001 -
  9.1002 -
  9.1003 -
  9.1004 -
  9.1005 -
  9.1006 -
  9.1007 -
  9.1008 -
  9.1009 -
  9.1010 -
  9.1011 -

    10.1 --- a/blocks3d/GeometricalCore_config.py	Tue Feb 22 17:16:56 2011 +0300
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,52 +0,0 @@
    10.4 -# -*- coding: cp1251 -*-
    10.5 -
    10.6 -#################
    10.7 -# CLASS VARS
    10.8 -#################
    10.9 -
   10.10 -
   10.11 -# дельта - максимальный разброс расстояний в ангстремах
   10.12 -delta = 2.0
   10.13 -
   10.14 -
   10.15 -
   10.16 -# сохранять загруженные из Интеренета файлы в папке pdb/
   10.17 -save = 1
   10.18 -
   10.19 -
   10.20 -
   10.21 -# min size of returning cores
   10.22 -min_size = 20
   10.23 -
   10.24 -
   10.25 -
   10.26 -# min part or new atoms in new alternative core
   10.27 -alternative_core_new_atoms = 0.5
   10.28 -
   10.29 -
   10.30 -# max number of cores (including main core)
   10.31 -alternative_cores_count = 5
   10.32 -
   10.33 -
   10.34 -
   10.35 -# максимальная позволенная доля позиция блока, не найденных хотя бы в одном из файлов PDB
   10.36 -# !!! can be changed only in this file !!!
   10.37 -max_absent = 0.15
   10.38 -
   10.39 -
   10.40 -# pdb download url (XXXX is pdb code place)
   10.41 -pdb_url = 'http://www.pdb.org/pdb/files/XXXX.pdb'
   10.42 -
   10.43 -# time in sec. for BRON-KERBOSH algorithm
   10.44 -timeout = 10
   10.45 -
   10.46 -
   10.47 -#####################
   10.48 -# PROGRAMM VARS
   10.49 -#####################
   10.50 -
   10.51 -
   10.52 -# output file format (msf, fasta, list)
   10.53 -# output_format = 'msf'
   10.54 -
   10.55 -

    11.1 --- a/blocks3d/Kliki.py	Tue Feb 22 17:16:56 2011 +0300
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,567 +0,0 @@
    11.4 -# -*- coding: cp1251 -*-
    11.5 -
    11.6 -#########################################################################
    11.7 -# Алгоритм Брона-Кербоша: поиск максимального подграфа/подграфов
    11.8 -#########################################################################
    11.9 -
   11.10 -
   11.11 -
   11.12 -
   11.13 -"""
   11.14 -возвращает клики в порядке уменьшения числа элементов в них
   11.15 -
   11.16 -Usage:
   11.17 -
   11.18 -from Kliki import Kliki
   11.19 -l = Kliki(graf)
   11.20 -l.kliki # тут будут клики
   11.21 -"""
   11.22 -
   11.23 -
   11.24 -import time
   11.25 -from clon import clon
   11.26 -
   11.27 -
   11.28 -
   11.29 -class Kliki:
   11.30 -
   11.31 -
   11.32 -
   11.33 -    """
   11.34 -    graf[вершина1][вершина2] = 1 - есть связь, 0 - нет связи
   11.35 -
   11.36 -    kliki - список клик, клика - список номеров элементов, входящих в полный подграф
   11.37 -
   11.38 -    compsub - полный подграф для данного шага
   11.39 -    """
   11.40 -
   11.41 -
   11.42 -
   11.43 -
   11.44 -
   11.45 -
   11.46 -
   11.47 -
   11.48 -    def __init__ (self, graf, cost = None, limit_count=0, min_size=0, timeout=10):
   11.49 -
   11.50 -        """
   11.51 -        главная функция класса
   11.52 -
   11.53 -        graf - словарь словарей.
   11.54 -            Если graf[1][2] == 1, то 1 соединен с 2
   11.55 -            Если graf[1][2] == 0, то 1 не соединен с 2
   11.56 -
   11.57 -            graf[N][N] = 1 для всех N
   11.58 -
   11.59 -        cost - устроен аналогично graf (необязательный)
   11.60 -            хранит цены связей между элементами.
   11.61 -            чем больше цена, тем выше ценится эта связь
   11.62 -            используется для fast_algorithm и для сортировки клик
   11.63 -
   11.64 -
   11.65 -        limit_count - максимальное число клик, которые хотят
   11.66 -            Если задать 0, то возвратятся все клики
   11.67 -
   11.68 -        min_size - min size of returning klika
   11.69 -
   11.70 -        timeout - time in sec. for BRON-KERBOSH algorithm
   11.71 -
   11.72 -        """
   11.73 -
   11.74 -
   11.75 -
   11.76 -        self.graf = graf
   11.77 -        self.cost = cost
   11.78 -
   11.79 -
   11.80 -        self.kliki = []
   11.81 -
   11.82 -        self.timeout = timeout
   11.83 -
   11.84 -
   11.85 -
   11.86 -        connections = {} # index - atom, value - count of connections
   11.87 -
   11.88 -        for atom in self.graf.keys():
   11.89 -            connections[atom] = self.graf[atom].values().count(1)
   11.90 -
   11.91 -
   11.92 -
   11.93 -
   11.94 -
   11.95 -        # delete atoms which have not enough number of connections
   11.96 -        deleted = 1
   11.97 -
   11.98 -        while deleted:
   11.99 -
  11.100 -            deleted = 0
  11.101 -
  11.102 -            for atom, c in connections.items():
  11.103 -
  11.104 -                if c < min_size:
  11.105 -
  11.106 -                    del connections[atom]
  11.107 -
  11.108 -                    for atom1, connect in graf[atom].items():
  11.109 -                        if connect == 1 and connections.has_key(atom1):
  11.110 -                            connections[atom1] -= 1
  11.111 -                            deleted = 1
  11.112 -
  11.113 -
  11.114 -
  11.115 -
  11.116 -
  11.117 -        bank_l = {}
  11.118 -
  11.119 -        for atom, c in connections.items():
  11.120 -
  11.121 -            if not bank_l.has_key(c):
  11.122 -                bank_l[c] = []
  11.123 -
  11.124 -            bank_l[c].append(atom)
  11.125 -
  11.126 -
  11.127 -        keys = []
  11.128 -
  11.129 -        if len(bank_l.keys()):
  11.130 -            for c in xrange(min(bank_l.keys()), max(bank_l.keys())+1):
  11.131 -                if bank_l.has_key(c):
  11.132 -                    keys.extend(bank_l[c])
  11.133 -
  11.134 -
  11.135 -
  11.136 -
  11.137 -        # RUN BRON-KERBOSH
  11.138 -
  11.139 -        if self.timeout != 0:
  11.140 -
  11.141 -            self.bron_kerbosh(keys[:])
  11.142 -
  11.143 -        if len(self.kliki) == 0:
  11.144 -            self.fast_algorithm(keys[:]) # run fast algorithm
  11.145 -
  11.146 -
  11.147 -
  11.148 -
  11.149 -
  11.150 -        # упорядочим полученные клики по убыванию числа элементов в них
  11.151 -
  11.152 -
  11.153 -
  11.154 -
  11.155 -
  11.156 -##        max_l=0                  # максимальный размер
  11.157 -##        min_l=len(self.kliki[0]) # минимальный размер
  11.158 -
  11.159 -        bank_l = {}
  11.160 -
  11.161 -        for klika in self.kliki:
  11.162 -            klika.sort()
  11.163 -            l = len(klika) # длина текущей клики
  11.164 -
  11.165 -            if l >= min_size:
  11.166 -##                max_l = max(max_l,len(klika))
  11.167 -##                min_l = min(min_l,len(klika))
  11.168 -
  11.169 -                if not bank_l.has_key(l):
  11.170 -                    bank_l[l] = []
  11.171 -
  11.172 -                bank_l[l].append(klika)
  11.173 -
  11.174 -
  11.175 -
  11.176 -        kliki=[]
  11.177 -
  11.178 -        #print self.cost
  11.179 -
  11.180 -        if len(bank_l.keys()):
  11.181 -
  11.182 -            r = range(min(bank_l.keys()), max(bank_l.keys())+1)
  11.183 -            r.reverse()
  11.184 -
  11.185 -            for l in r:
  11.186 -                if (bank_l.has_key(l)):
  11.187 -
  11.188 -                    k = bank_l[l] # все клики длины l
  11.189 -
  11.190 -
  11.191 -                    if self.cost:
  11.192 -
  11.193 -    ##                    print l
  11.194 -
  11.195 -                        # отсортируем их по убыванию общей цены cost
  11.196 -
  11.197 -
  11.198 -
  11.199 -                        costs = []
  11.200 -
  11.201 -                        for klika in k:
  11.202 -
  11.203 -                            c = 0
  11.204 -
  11.205 -                            for i in klika:
  11.206 -
  11.207 -                                if not self.cost.has_key(i):
  11.208 -                                    continue
  11.209 -
  11.210 -                                for j in klika:
  11.211 -
  11.212 -                                    if j == i:
  11.213 -                                        break
  11.214 -
  11.215 -
  11.216 -                                    if not self.cost[i].has_key(j):
  11.217 -                                        continue
  11.218 -
  11.219 -                                    c += self.cost[i][j]
  11.220 -
  11.221 -                            costs.append(c)
  11.222 -
  11.223 -                        costs1 = costs[:]
  11.224 -
  11.225 -                        costs1.sort(reverse=1)
  11.226 -
  11.227 -                        k1 = []
  11.228 -
  11.229 -                        for c in costs1:
  11.230 -
  11.231 -                            n = costs.index(c)
  11.232 -
  11.233 -                            k1.append(k[n][:])
  11.234 -
  11.235 -                            del k[n]
  11.236 -                            del costs[n]
  11.237 -
  11.238 -                        k = k1
  11.239 -
  11.240 -                    kliki.extend(k)
  11.241 -
  11.242 -##        kliki.reverse()
  11.243 -
  11.244 -        if limit_count:
  11.245 -            if len(kliki) > limit_count: # наложим ограничение на число клик
  11.246 -                kliki = kliki[:limit_count]
  11.247 -
  11.248 -
  11.249 -        self.kliki = kliki[:]
  11.250 -
  11.251 -
  11.252 -
  11.253 -
  11.254 -
  11.255 -
  11.256 -
  11.257 -
  11.258 -
  11.259 -
  11.260 -    def bron_kerbosh (self, keys):
  11.261 -
  11.262 -        """
  11.263 -        алгоритм Брона-Кербоша поиска максимального полного подграфа
  11.264 -        http://ru.wikipedia.org/wiki/Алгоритм_Брона_-_Кербоша
  11.265 -
  11.266 -        compsub - список атомов ядра
  11.267 -        candidates - список нерассмотренных кандидатов в ядро
  11.268 -        used -  список рассмотренных кандидатов в ядро
  11.269 -        """
  11.270 -
  11.271 -        depth = 0 # глубина "рекурсии"
  11.272 -        list_candidates = [keys] # список значений candidates всех "рекурсий"
  11.273 -        list_used = [[]] #список значений used всех "рекурсий"
  11.274 -
  11.275 -        compsub = [] # единый compsub
  11.276 -
  11.277 -
  11.278 -        print 'Bron and Kerbosh algorithm started'
  11.279 -
  11.280 -        start_time = time.time()
  11.281 -
  11.282 -        # ПОКА...
  11.283 -        while 1:
  11.284 -
  11.285 -            if depth == -1:
  11.286 -                break # ВСЕ! Все рекурсии (итерации) пройдены
  11.287 -
  11.288 -
  11.289 -
  11.290 -
  11.291 -
  11.292 -            # создадим candidates и used из списка
  11.293 -
  11.294 -            #print depth
  11.295 -
  11.296 -            candidates = list_candidates[depth][:]
  11.297 -            used = list_used[depth][:]
  11.298 -
  11.299 -
  11.300 -
  11.301 -
  11.302 -            # ПОКА candidates НЕ пусто
  11.303 -            if len(candidates)==0:
  11.304 -                depth -= 1
  11.305 -
  11.306 -                if compsub:
  11.307 -                    compsub.pop()
  11.308 -                continue
  11.309 -
  11.310 -
  11.311 -
  11.312 -
  11.313 -
  11.314 -
  11.315 -            # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates
  11.316 -            # (все из used НЕ соединены хотя бы с 1 из candidates)
  11.317 -
  11.318 -            used_candidates = 0
  11.319 -
  11.320 -            for used1 in used:
  11.321 -                for candidates1 in candidates:
  11.322 -                    if self.graf[used1][candidates1] == 0:
  11.323 -                        break
  11.324 -                else:
  11.325 -                    used_candidates = 1
  11.326 -
  11.327 -            if used_candidates:
  11.328 -                depth -= 1
  11.329 -
  11.330 -                if compsub:
  11.331 -                    compsub.pop()
  11.332 -                continue
  11.333 -
  11.334 -
  11.335 -
  11.336 -
  11.337 -
  11.338 -
  11.339 -
  11.340 -            # Выбираем вершину v из candidates и добавляем ее в compsub
  11.341 -            v = candidates[0]
  11.342 -            compsub.append(v)
  11.343 -
  11.344 -
  11.345 -
  11.346 -
  11.347 -
  11.348 -            # Формируем new_candidates и new_used, удаляя из candidates и used вершины, НЕ соединенные с v
  11.349 -            # (то есть, оставляя только соединенные с v)
  11.350 -            new_candidates = []
  11.351 -            for candidates1 in candidates:
  11.352 -                if self.graf[candidates1][v] == 1 and candidates1 != v:
  11.353 -                    new_candidates.append(candidates1)
  11.354 -
  11.355 -
  11.356 -            new_used = []
  11.357 -            for used1 in used:
  11.358 -                if self.graf[used1][v] == 1 and used1 != v:
  11.359 -                    new_used.append(used1)
  11.360 -
  11.361 -
  11.362 -
  11.363 -
  11.364 -            # Удаляем v из candidates и помещаем в used
  11.365 -            del list_candidates[depth][0]
  11.366 -            list_used[depth].append(v)
  11.367 -
  11.368 -
  11.369 -            # ЕСЛИ new_candidates и new_used пусты
  11.370 -            if len(new_candidates) == 0 and len(new_used) == 0:
  11.371 -                # compsub - клика
  11.372 -                self.kliki.append(compsub[:])
  11.373 -
  11.374 -            else:
  11.375 -                # ИНАЧЕ рекурсивно вызываем bron_kerbosh(new_candidates, new_used)
  11.376 -
  11.377 -                depth += 1
  11.378 -
  11.379 -
  11.380 -                # TIMEOUT check start
  11.381 -                if self.timeout != -1:
  11.382 -
  11.383 -                    if time.time() - start_time > self.timeout:
  11.384 -
  11.385 -                        self.kliki = []
  11.386 -                        return
  11.387 -                # TIMEOUT check end
  11.388 -
  11.389 -
  11.390 -
  11.391 -
  11.392 -                if depth >= len(list_candidates):
  11.393 -                    list_candidates.append([])
  11.394 -                    list_used.append([])
  11.395 -
  11.396 -
  11.397 -                list_candidates[depth] = new_candidates[:]
  11.398 -                list_used[depth] = new_used[:]
  11.399 -
  11.400 -                continue
  11.401 -
  11.402 -
  11.403 -            # Удаляем v из compsub
  11.404 -            if compsub:
  11.405 -                compsub.pop()
  11.406 -
  11.407 -
  11.408 -
  11.409 -
  11.410 -
  11.411 -
  11.412 -
  11.413 -
  11.414 -
  11.415 -
  11.416 -    def fast_algorithm (self, keys):
  11.417 -
  11.418 -        """
  11.419 -        Fast algorithm
  11.420 -        """
  11.421 -
  11.422 -
  11.423 -        self.kliki = []
  11.424 -
  11.425 -        print 'Fast algorithm started'
  11.426 -
  11.427 -
  11.428 -
  11.429 -        while 1:
  11.430 -            # try find new klika
  11.431 -
  11.432 -
  11.433 -            # exclude previous klika atoms
  11.434 -            excluded = {}
  11.435 -
  11.436 -            for klika in self.kliki:
  11.437 -
  11.438 -                for i in klika:
  11.439 -
  11.440 -                    excluded[i] = 1
  11.441 -
  11.442 -            keys1 = []
  11.443 -
  11.444 -            for i in keys:
  11.445 -
  11.446 -                if not excluded.has_key(i):
  11.447 -                    keys1.append(i)
  11.448 -
  11.449 -            if len(keys1) == 0:
  11.450 -                break
  11.451 -
  11.452 -            while 1:
  11.453 -
  11.454 -                # exclude some atoms
  11.455 -
  11.456 -
  11.457 -                connections = {} # index - atom, value -  connections value
  11.458 -
  11.459 -                for i in keys1:
  11.460 -
  11.461 -                    connections[i] = 0
  11.462 -
  11.463 -                    for j in keys1:
  11.464 -
  11.465 -                        if i != j and self.graf[i][j]:
  11.466 -                            connections[i] += 1
  11.467 -
  11.468 -
  11.469 -                if max(connections.values()) == min(connections.values()):
  11.470 -                    # all atoms are equal
  11.471 -                    break
  11.472 -
  11.473 -
  11.474 -                exclude_connect = min(connections.values()) # excluded atoms connections
  11.475 -
  11.476 -
  11.477 -                if self.cost:
  11.478 -
  11.479 -
  11.480 -                    cost_sum = {} # index - atom, value - cost sum
  11.481 -
  11.482 -                    for i in keys1:
  11.483 -
  11.484 -                        cost_sum[i] = 0
  11.485 -
  11.486 -                        if connections[i] == exclude_connect:
  11.487 -
  11.488 -                            for j in keys1:
  11.489 -
  11.490 -                                if i != j and self.graf[i][j]:
  11.491 -
  11.492 -                                    cost_sum[i] += self.cost[i][j]
  11.493 -
  11.494 -                    exclude_cost = min(cost_sum.values()) # excluded atoms cost sum
  11.495 -
  11.496 -
  11.497 -
  11.498 -                    keys2 = []
  11.499 -
  11.500 -                    for i in keys1:
  11.501 -
  11.502 -                        if connections[i] == exclude_connect:
  11.503 -
  11.504 -                            if cost_sum[i] == exclude_cost:
  11.505 -
  11.506 -                                continue
  11.507 -
  11.508 -                        keys2.append(i)
  11.509 -
  11.510 -                    keys1 = clon(keys2)
  11.511 -
  11.512 -                else:
  11.513 -
  11.514 -                    keys2 = []
  11.515 -
  11.516 -                    for i in keys1:
  11.517 -
  11.518 -                        if connections[i] == exclude_connect:
  11.519 -
  11.520 -                            continue
  11.521 -
  11.522 -                        keys2.append(i)
  11.523 -
  11.524 -                    keys1 = clon(keys2)
  11.525 -
  11.526 -
  11.527 -
  11.528 -            # try add other atoms
  11.529 -
  11.530 -
  11.531 -            while 1:
  11.532 -
  11.533 -
  11.534 -                for i in keys:
  11.535 -
  11.536 -                    if i in keys1:
  11.537 -                        continue
  11.538 -
  11.539 -
  11.540 -                    for j in keys1:
  11.541 -
  11.542 -
  11.543 -                        if not self.graf[i][j]:
  11.544 -                            break
  11.545 -
  11.546 -                    else:
  11.547 -                        # add atom i
  11.548 -                        keys1.append(i)
  11.549 -
  11.550 -
  11.551 -                        break
  11.552 -
  11.553 -
  11.554 -                else:
  11.555 -                    # no new atoms
  11.556 -                    break
  11.557 -
  11.558 -
  11.559 -
  11.560 -            # keys1 is klika
  11.561 -
  11.562 -            self.kliki.append(keys1[:])
  11.563 -
  11.564 -
  11.565 -
  11.566 -
  11.567 -
  11.568 -
  11.569 -
  11.570 -

    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/blocks3d/blocks3d.py	Tue Feb 22 19:25:27 2011 +0300
    12.3 @@ -0,0 +1,107 @@
    12.4 +#!/usr/bin/python
    12.5 +"""
    12.6 +Blocks3D
    12.7 +version 2.0
    12.8 +"""
    12.9 +
   12.10 +import argparse
   12.11 +import os
   12.12 +import json
   12.13 +
   12.14 +from allpy import config
   12.15 +from protein_pdb import Alignment, Block, Monomer, Sequence
   12.16 +from allpy import markup
   12.17 +from allpy import fileio
   12.18 +from allpy.argparse_validators import f_nng, part, timeout, pos, i_nng
   12.19 +
   12.20 +r = argparse.FileType('r')
   12.21 +w = argparse.FileType('w')
   12.22 +c = config
   12.23 +
   12.24 +html_template = os.path.join(os.path.dirname(__file__),
   12.25 +    'html.htm')
   12.26 +
   12.27 +p = argparse.ArgumentParser(
   12.28 +description='Blocks3D',
   12.29 +epilog='''1) Distance spreading [angstrom]
   12.30 +2) -1 timeout means running Bron-Kerbosh algorithm without timeout''',
   12.31 +formatter_class=argparse.ArgumentDefaultsHelpFormatter,
   12.32 +)
   12.33 +
   12.34 +p.add_argument('-v','--version',action='version',version='%(prog)s 2.0')
   12.35 +p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True)
   12.36 +p.add_argument('-c',help='PDB names conformity file',metavar='FILE',type=r)
   12.37 +p.add_argument('-o',help='Output text file',metavar='FILE',type=w)
   12.38 +p.add_argument('-H',help='Output html file',metavar='FILE',type=w)
   12.39 +p.add_argument('-d',help='Distance spreading',metavar='float',type=f_nng,default=c.delta)
   12.40 +p.add_argument('-m',help='Min block width',metavar='int',type=pos,default=c.min_width)
   12.41 +p.add_argument('-t',help='Bron-Kerbosh (couple cores) timeout (-1 - unlimited)',metavar='int',type=timeout,default=c.timeout)
   12.42 +p.add_argument('-T',help='Bron-Kerbosh (blocks) timeout (-1 - unlimited)',metavar='int',type=timeout,default=c.timeout_2)
   12.43 +
   12.44 +args = p.parse_args()
   12.45 +
   12.46 +if not args.o and not args.H:
   12.47 +    print 'Error: no output file provided'
   12.48 +    exit()
   12.49 +
   12.50 +try:
   12.51 +    alignment = Alignment().append_file(args.i, format='fasta')
   12.52 +except:
   12.53 +    alignment = Alignment().append_file(args.i, format='msf')
   12.54 +
   12.55 +block = Block.from_alignment(alignment)
   12.56 +for sequence in block.sequences:
   12.57 +    sequence.auto_pdb()
   12.58 +
   12.59 +blocks = list(block.blocks3d(max_delta=args.d,
   12.60 +    timeout=args.t, timeout_2=args.T,
   12.61 +    min_width=args.m))
   12.62 +
   12.63 +column2pos = {}
   12.64 +for i, column in enumerate(block.columns):
   12.65 +    column2pos[column] = i
   12.66 +
   12.67 +if args.H:
   12.68 +    # html-file
   12.69 +    js_blocks = []
   12.70 +    js_fasta_dict = {}
   12.71 +    for b in blocks:
   12.72 +        p_from = column2pos[b.columns[0]]
   12.73 +        p_to = column2pos[b.columns[-1]]
   12.74 +        js_block = {}
   12.75 +        js_block['start'] = p_from
   12.76 +        js_block['end'] = p_to
   12.77 +        js_block['IDs'] = []
   12.78 +        for sequence in b.sequences:
   12.79 +            js_block['IDs'].append(sequence.name)
   12.80 +        js_block['cores'] = []
   12.81 +        js_blocks.append(js_block)
   12.82 +    for sequence in block.sequences:
   12.83 +        js_fasta_dict[sequence.name] = str(sequence)
   12.84 +    self_js_test = ("blocks = json('%(blocks)s');" +\
   12.85 +        "fasta_dict=json('%(fasta_dict)s');") %\
   12.86 +        {'blocks': json.dumps(js_blocks),
   12.87 +         'fasta_dict': json.dumps(js_fasta_dict)}
   12.88 +    t = open(html_template).read()
   12.89 +    t = t.replace('self_js_text', self_js_test)
   12.90 +    args.H.write(t)
   12.91 +
   12.92 +if args.o:
   12.93 +    t = args.o
   12.94 +    # text output
   12.95 +    t.write("Block|from|to(exclusive)|gaps\n")
   12.96 +    for i, b in enumerate(blocks):
   12.97 +        p_from = column2pos[b.columns[0]]
   12.98 +        p_to = column2pos[b.columns[-1]] + 1
   12.99 +        width = p_to - p_from
  12.100 +        gaps = width - len(b.columns)
  12.101 +        t.write("%(i)i|%(from)i|%(to)i|%(gaps)i\n" %\
  12.102 +            {'i': i, 'from': p_from,
  12.103 +            'to': p_to, 'gaps': gaps})
  12.104 +    t.write('#\n')
  12.105 +    t.write("Block|Sequence\n")
  12.106 +    for i, b in enumerate(blocks):
  12.107 +        for sequence in b.sequences:
  12.108 +            t.write("%(i)i|%(sequence)s\n" %\
  12.109 +                {'i': i, 'sequence': sequence.name})
  12.110 +

    13.1 --- a/blocks3d/clon.py	Tue Feb 22 17:16:56 2011 +0300
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,38 +0,0 @@
    13.4 -# -*- coding: cp1251 -*-
    13.5 -
    13.6 -####################################
    13.7 -#   Create new clon of list or dict
    13.8 -####################################
    13.9 -
   13.10 -
   13.11 -def clon (obj):
   13.12 -    t = type(obj)
   13.13 -
   13.14 -
   13.15 -    if t == list or t == tuple:
   13.16 -
   13.17 -        r = []
   13.18 -
   13.19 -        for i in obj:
   13.20 -            r.append(clon(i))
   13.21 -
   13.22 -        if t == tuple:
   13.23 -            r = tuple(r)
   13.24 -
   13.25 -        return r
   13.26 -
   13.27 -
   13.28 -    if t == dict:
   13.29 -
   13.30 -        r = {}
   13.31 -
   13.32 -        for key, value in obj.items():
   13.33 -            r[key] = clon(value)
   13.34 -
   13.35 -
   13.36 -        return r
   13.37 -
   13.38 -    return obj
   13.39 -
   13.40 -
   13.41 -

    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/blocks3d/protein_pdb.py	Tue Feb 22 19:25:27 2011 +0300
    14.3 @@ -0,0 +1,18 @@
    14.4 +
    14.5 +import sys
    14.6 +
    14.7 +from allpy import protein, structure
    14.8 +import protein_pdb
    14.9 +
   14.10 +class Sequence(protein.Sequence, structure.SequenceMixin):
   14.11 +    types = protein_pdb
   14.12 +
   14.13 +class Alignment(protein.Alignment, structure.AlignmentMixin):
   14.14 +    types = protein_pdb
   14.15 +
   14.16 +class Block(protein.Block, structure.BlockMixin):
   14.17 +    types = protein_pdb
   14.18 +
   14.19 +class Monomer(protein.Monomer):
   14.20 +    types = protein_pdb
   14.21 +

    15.1 --- a/blocks3d/www/input/blocks.js	Tue Feb 22 17:16:56 2011 +0300
    15.2 +++ b/blocks3d/www/input/blocks.js	Tue Feb 22 19:25:27 2011 +0300
    15.3 @@ -2,9 +2,10 @@
    15.4  /*
    15.5  list of blocks
    15.6  block = {
    15.7 -    start: integer
    15.8 -    end: integer
    15.9 +    int start; (inclusive)
   15.10 +    int end; (inclusive)
   15.11      IDs: list of IDs
   15.12 +    cores: []
   15.13  }
   15.14  */
   15.15  

    16.1 --- a/geometrical_core/argparse_validators.py	Tue Feb 22 17:16:56 2011 +0300
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,63 +0,0 @@
    16.4 -
    16.5 -import argparse
    16.6 -
    16.7 -def f_nng(string):
    16.8 -    """ Validates nonnegative (>=0) float """
    16.9 -    try:
   16.10 -        value = float(string)
   16.11 -    except:
   16.12 -        msg = "%r is wrong float" % string
   16.13 -        raise argparse.ArgumentTypeError(msg)
   16.14 -    if value < 0:
   16.15 -        msg = "%r is negative" % string
   16.16 -        raise argparse.ArgumentTypeError(msg)
   16.17 -    return value
   16.18 -
   16.19 -def part(string):
   16.20 -    """ Validates 0.0 <= float <= 1.0 """
   16.21 -    try:
   16.22 -        value = float(string)
   16.23 -    except:
   16.24 -        msg = "%r is wrong float" % string
   16.25 -        raise argparse.ArgumentTypeError(msg)
   16.26 -    if not (0.0 <= value <= 1.0):
   16.27 -        msg = "%r is not float, representing part, ie in [0, 1]" % string
   16.28 -        raise argparse.ArgumentTypeError(msg)
   16.29 -    return value
   16.30 -
   16.31 -def timeout(string):
   16.32 -    """ Validates int >= -1 """
   16.33 -    try:
   16.34 -        value = int(string)
   16.35 -    except:
   16.36 -        msg = "%r is wrong integer" % string
   16.37 -        raise argparse.ArgumentTypeError(msg)
   16.38 -    if value < -1:
   16.39 -        msg = "integer %r is less than -1" % string
   16.40 -        raise argparse.ArgumentTypeError(msg)
   16.41 -    return value
   16.42 -
   16.43 -def pos(string):
   16.44 -    """ Validates positive integer """
   16.45 -    try:
   16.46 -        value = int(string)
   16.47 -    except:
   16.48 -        msg = "%r is wrong integer" % string
   16.49 -        raise argparse.ArgumentTypeError(msg)
   16.50 -    if value < 1:
   16.51 -        msg = "%r is not positive integer" % string
   16.52 -        raise argparse.ArgumentTypeError(msg)
   16.53 -    return value
   16.54 -
   16.55 -def i_nng(string):
   16.56 -    """ Validates int >= 0 """
   16.57 -    try:
   16.58 -        value = int(string)
   16.59 -    except:
   16.60 -        msg = "%r is wrong integer" % string
   16.61 -        raise argparse.ArgumentTypeError(msg)
   16.62 -    if value < 0:
   16.63 -        msg = "integer %r is less than 0" % string
   16.64 -        raise argparse.ArgumentTypeError(msg)
   16.65 -    return value
   16.66 -

    17.1 --- a/geometrical_core/geometrical-core	Tue Feb 22 17:16:56 2011 +0300
    17.2 +++ b/geometrical_core/geometrical-core	Tue Feb 22 19:25:27 2011 +0300
    17.3 @@ -12,7 +12,8 @@
    17.4  from allpy import markup
    17.5  from allpy import fileio
    17.6  from spt import gcs_to_spt
    17.7 -from argparse_validators import f_nng, part, timeout, pos, i_nng
    17.8 +
    17.9 +from allpy.argparse_validators import f_nng, part, timeout, pos, i_nng
   17.10  
   17.11  r = argparse.FileType('r')
   17.12  w = argparse.FileType('w')

    18.1 --- a/geometrical_core/protein_pdb.py	Tue Feb 22 17:16:56 2011 +0300
    18.2 +++ b/geometrical_core/protein_pdb.py	Tue Feb 22 19:25:27 2011 +0300
    18.3 @@ -2,7 +2,7 @@
    18.4  import sys
    18.5  
    18.6  from allpy import protein, structure
    18.7 -protein_pdb = sys.modules[__name__]
    18.8 +import protein_pdb
    18.9  
   18.10  class Sequence(protein.Sequence, structure.SequenceMixin):
   18.11      types = protein_pdb
   18.12 @@ -15,3 +15,4 @@
   18.13  
   18.14  class Monomer(protein.Monomer):
   18.15      types = protein_pdb
   18.16 +