allpy

diff allpy/base.py @ 261:d60628e29b24
Moved contents of block.py to base.py without any changes.
author: Daniil Alexeyevsky <me.dendik@gmail.com>
date: Tue, 14 Dec 2010 21:02:49 +0300
parents: aae821828b03
children: e361a7b7d9aa
     1.1 --- a/allpy/base.py	Tue Dec 14 20:51:53 2010 +0300
     1.2 +++ b/allpy/base.py	Tue Dec 14 21:02:49 2010 +0300
     1.3 @@ -1,3 +1,10 @@
     1.4 +import sys
     1.5 +import os
     1.6 +from tempfile import NamedTemporaryFile
     1.7 +
     1.8 +import config
     1.9 +from graph import Graph
    1.10 +from Bio.PDB import Superimposer
    1.11  from fasta import save_fasta
    1.12  import data.codes
    1.13  
    1.14 @@ -338,7 +345,161 @@
    1.15          for m in self.body[sequence]])
    1.16  
    1.17  class Block(object):
    1.18 -    """ """
    1.19 -    pass
    1.20 +    """ Block of alignment
    1.21 +    
    1.22 +    Mandatory data:
    1.23 +    *   self.alignment -- alignment object, which the block belongs to
    1.24 +    *   self.sequences - set of sequence objects that contain monomers
    1.25 +        and/or gaps, that constitute the block
    1.26 +    *   self.positions -- list of positions of the alignment.body that
    1.27 +        are included in the block; position[i+1] is always to the right from position[i]
    1.28 +    
    1.29 +    Don't change self.sequences -- it may be a link to other block.sequences
    1.30 +    
    1.31 +    How to create a new block:
    1.32 +    >>> import alignment
    1.33 +    >>> import block
    1.34 +    >>> proj = alignment.Alignment(open("test.fasta"))
    1.35 +    >>> block1 = block.Block(proj)
    1.36 +    """
    1.37 +    
    1.38 +    def __init__(self, alignment, sequences=None, positions=None):
    1.39 +        """ Builds new block from alignment
    1.40 +        
    1.41 +        if sequences==None, all sequences are used
    1.42 +        if positions==None, all positions are used
    1.43 +        """
    1.44 +        if sequences == None:
    1.45 +            sequences = set(alignment.sequences) # copy
    1.46 +        if positions == None:
    1.47 +            positions = range(len(alignment))
    1.48 +        self.alignment = alignment
    1.49 +        self.sequences = sequences
    1.50 +        self.positions = positions
    1.51 +    
    1.52 +    def save_fasta(self, out_file, long_line=70, gap='-'):
    1.53 +        """ Saves alignment to given file in fasta-format 
    1.54 +        
    1.55 +        No changes in the names, descriptions or order of the sequences
    1.56 +        are made.
    1.57 +        """
    1.58 +        for sequence in self.sequences:
    1.59 +            alignment_monomers = self.alignment.body[sequence]
    1.60 +            block_monomers = [alignment_monomers[i] for i in self.positions]
    1.61 +            string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
    1.62 +            save_fasta(out_file, string, sequence.name, sequence.description, long_line)
    1.63 +    
    1.64 +    def geometrical_cores(self, max_delta=config.delta, 
    1.65 +    timeout=config.timeout, minsize=config.minsize, 
    1.66 +    ac_new_atoms=config.ac_new_atoms,
    1.67 +    ac_count=config.ac_count):
    1.68 +        """ Returns length-sorted list of blocks, representing GCs
    1.69 +        
    1.70 +        max_delta -- threshold of distance spreading
    1.71 +        timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
    1.72 +        minsize -- min size of each core
    1.73 +        ac_new_atoms -- min part or new atoms in new alternative core
    1.74 +            current GC is compared with each of already selected GCs
    1.75 +            if difference is less then ac_new_atoms, current GC is skipped
    1.76 +            difference = part of new atoms in current core
    1.77 +        ac_count -- max number of cores (including main core)
    1.78 +            -1 means infinity
    1.79 +        If more than one pdb chain for some sequence provided, consider all of them
    1.80 +        cost is calculated as 1 / (delta + 1) 
    1.81 +            delta in [0, +inf) => cost in (0, 1]
    1.82 +        """
    1.83 +        nodes = self.positions
    1.84 +        lines = {}
    1.85 +        for i in self.positions:
    1.86 +            for j in self.positions:
    1.87 +                if i < j:
    1.88 +                    distances = []
    1.89 +                    for sequence in self.sequences:
    1.90 +                        for chain in sequence.pdb_chains:
    1.91 +                            m1 = self.alignment.body[sequence][i]
    1.92 +                            m2 = self.alignment.body[sequence][j]
    1.93 +                            if m1 and m2:
    1.94 +                                r1 = sequence.pdb_residues[chain][m1]
    1.95 +                                r2 = sequence.pdb_residues[chain][m2]
    1.96 +                                ca1 = r1['CA']
    1.97 +                                ca2 = r2['CA']
    1.98 +                                d = ca1 - ca2 # Bio.PDB feature
    1.99 +                                distances.append(d)
   1.100 +                    if len(distances) >= 2:
   1.101 +                        delta = max(distances) - min(distances)
   1.102 +                        if delta <= max_delta:
   1.103 +                            lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
   1.104 +        graph = Graph(nodes, lines)
   1.105 +        cliques = graph.cliques(timeout=timeout, minsize=minsize)
   1.106 +        GCs = []
   1.107 +        for clique in cliques:
   1.108 +            for GC in GCs:
   1.109 +                if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
   1.110 +                    break
   1.111 +            else:
   1.112 +                GCs.append(Block(self.alignment, self.sequences, clique))
   1.113 +                if ac_count != -1 and len(GCs) >= ac_count:
   1.114 +                    break
   1.115 +        return GCs
   1.116 +    
   1.117 +    def xstring(self, x='X', gap='-'):
   1.118 +        """ Returns string consisting of gap chars and chars x at self.positions
   1.119 +        
   1.120 +        Length of returning string = length of alignment
   1.121 +        """
   1.122 +        monomers = [False] * len(self.alignment)
   1.123 +        for i in self.positions:
   1.124 +            monomers[i] = True
   1.125 +        return ''.join([x if m else gap for m in monomers])
   1.126 +    
   1.127 +    def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70):
   1.128 +        """ Save xstring and name in fasta format """
   1.129 +        save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line)
   1.130 +    
   1.131 +    def monomers(self, sequence):
   1.132 +        """ Iterates monomers of this sequence from this block """
   1.133 +        alignment_sequence = self.alignment.body[sequence]
   1.134 +        return (alignment_sequence[i] for i in self.positions)
   1.135 +    
   1.136 +    def ca_atoms(self, sequence, pdb_chain):
   1.137 +        """ Iterates Ca-atom of monomers of this sequence from this block  """
   1.138 +        return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
   1.139 +    
   1.140 +    def sequences_chains(self):
   1.141 +        """ Iterates pairs (sequence, chain) """
   1.142 +        for sequence in self.alignment.sequences:
   1.143 +            if sequence in self.sequences:
   1.144 +                for chain in sequence.pdb_chains:
   1.145 +                    yield (sequence, chain)
   1.146 +    
   1.147 +    def superimpose(self):
   1.148 +        """ Superimpose all pdb_chains in this block """
   1.149 +        sequences_chains = list(self.sequences_chains())
   1.150 +        if len(sequences_chains) >= 1:
   1.151 +            sup = Superimposer()
   1.152 +            fixed_sequence, fixed_chain = sequences_chains.pop()
   1.153 +            fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
   1.154 +            for sequence, chain in sequences_chains:
   1.155 +                moving_atoms =  self.ca_atoms(sequence, chain)
   1.156 +                sup.set_atoms(fixed_atoms, moving_atoms)
   1.157 +                # Apply rotation/translation to the moving atoms
   1.158 +                sup.apply(moving_atoms)
   1.159 +    
   1.160 +    def pdb_save(self, out_file):
   1.161 +        """ Save all sequences 
   1.162 +        
   1.163 +        Returns {(sequence, chain): CHAIN}
   1.164 +        CHAIN is chain letter in new file
   1.165 +        """
   1.166 +        tmp_file = NamedTemporaryFile(delete=False)
   1.167 +        tmp_file.close()
   1.168 +        
   1.169 +        for sequence, chain in self.sequences_chains():
   1.170 +            sequence.pdb_save(tmp_file.name, chain)
   1.171 +            # TODO: read from tmp_file.name
   1.172 +            # change CHAIN
   1.173 +            # add to out_file
   1.174 +        
   1.175 +        os.unlink(NamedTemporaryFile)
   1.176  
   1.177  # vim: set ts=4 sts=4 sw=4 et:
author	Daniil Alexeyevsky <me.dendik@gmail.com>
date	Tue, 14 Dec 2010 21:02:49 +0300
parents	aae821828b03
children	e361a7b7d9aa