Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/diff/9ea5099309b8/allpy/base.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 20:17:02 2013
Кодировка:
allpy: allpy/base.py diff

allpy

diff allpy/base.py @ 274:9ea5099309b8

Moved PDB parts of allpy.base.Sequence to allpy.pdb.Sequence
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Wed, 15 Dec 2010 19:19:11 +0300
parents bda87d4c525e
children 4a2341bc90b1
line diff
     1.1 --- a/allpy/base.py	Wed Dec 15 18:58:54 2010 +0300
     1.2 +++ b/allpy/base.py	Wed Dec 15 19:19:11 2010 +0300
     1.3 @@ -6,9 +6,7 @@
     1.4  
     1.5  import config
     1.6  from graph import Graph
     1.7 -from Bio.PDB import Superimposer, CaPPBuilder, PDBIO
     1.8  from Bio.PDB.DSSP import make_dssp_dict
     1.9 -from allpy.pdb import std_id, pdb_id_parse, get_structure
    1.10  from fasta import save_fasta
    1.11  import data.codes
    1.12  
    1.13 @@ -127,46 +125,24 @@
    1.14          return self.type is other
    1.15  
    1.16  class Sequence(list):
    1.17 -    """ Sequence of Monomers
    1.18 +    """Sequence of Monomers.
    1.19  
    1.20 -    list of monomer objects (aminoacids or nucleotides)
    1.21 +    This behaves like list of monomer objects. In addition to standard list
    1.22 +    behaviour, Sequence has the following attributes:
    1.23  
    1.24 -    Mandatory data:
    1.25 +    *   name -- str with the name of the sequence
    1.26 +    *   description -- str with description of the sequence
    1.27 +    *   source -- str denoting source of the sequence
    1.28  
    1.29 -    *   name -- str with the name of sequence
    1.30 -    *   description -- str with description of the sequence
    1.31 +    Any of them may be empty (i.e. hold empty string)
    1.32 +    """
    1.33  
    1.34 -    Optional (may be empty):
    1.35 -
    1.36 -    *   source -- source of sequence
    1.37 -    *   pdb_chain -- Bio.PDB.Chain
    1.38 -    *   pdb_file -- file object
    1.39 -
    1.40 -    *   pdb_residues -- {Monomer: Bio.PDB.Residue}
    1.41 -    *   pdb_secstr -- {Monomer: 'Secondary structure'}
    1.42 -            Code   Secondary structure
    1.43 -            H      alpha-helix
    1.44 -            B      Isolated beta-bridge residue
    1.45 -            E      Strand
    1.46 -            G      3-10 helix
    1.47 -            I      pi-helix
    1.48 -            T      Turn
    1.49 -            S      Bend
    1.50 -            -      Other
    1.51 -
    1.52 -
    1.53 -    ?TODO: global pdb_structures
    1.54 -    """
    1.55      def __init__(self, monomers=None, name='', description=""):
    1.56          if not monomers:
    1.57              monomers = []
    1.58          self.name = name
    1.59          self.description = description
    1.60          self.monomers = monomers
    1.61 -        self.pdb_chains = []
    1.62 -        self.pdb_files = {}
    1.63 -        self.pdb_residues = {}
    1.64 -        self.pdb_secstr = {}
    1.65  
    1.66      def __len__(self):
    1.67          return len(self.monomers)
    1.68 @@ -186,34 +162,6 @@
    1.69      def __ne__(self, other):
    1.70          return not (self == other)
    1.71  
    1.72 -    def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
    1.73 -        """ Reads Pdb chain from file
    1.74 -
    1.75 -        and align each Monomer with PDB.Residue (TODO)
    1.76 -        """
    1.77 -        name = std_id(pdb_id, pdb_chain, pdb_model)
    1.78 -        structure = get_structure(pdb_file, name)
    1.79 -        chain = structure[pdb_model][pdb_chain]
    1.80 -        self.pdb_chains.append(chain)
    1.81 -        self.pdb_residues[chain] = {}
    1.82 -        self.pdb_secstr[chain] = {}
    1.83 -        pdb_sequence = Sequence.from_pdb_chain(chain)
    1.84 -        a = alignment.Alignment.from_sequences(self, pdb_sequence)
    1.85 -        a.muscle_align()
    1.86 -        for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
    1.87 -            if pdb_sequence.pdb_has(chain, pdb_monomer):
    1.88 -                residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
    1.89 -                self.pdb_residues[chain][monomer] = residue
    1.90 -        self.pdb_files[chain] = pdb_file
    1.91 -
    1.92 -    def pdb_unload(self):
    1.93 -        """ Delete all pdb-connected links """
    1.94 -        #~ gc.get_referrers(self.pdb_chains[0])
    1.95 -        self.pdb_chains = []
    1.96 -        self.pdb_residues = {}
    1.97 -        self.pdb_secstr = {} # FIXME
    1.98 -        self.pdb_files = {} # FIXME
    1.99 -
   1.100      @classmethod
   1.101      def from_string(cls, string, name='', description=''):
   1.102          """Create sequences from string of one-letter codes."""
   1.103 @@ -221,90 +169,6 @@
   1.104          monomers = [monomer(letter) for letter in string]
   1.105          return cls(monomers, name, description)
   1.106  
   1.107 -    @staticmethod
   1.108 -    def from_pdb_chain(chain):
   1.109 -        """ Returns Sequence with Monomers with link to Bio.PDB.Residue
   1.110 -
   1.111 -        chain is Bio.PDB.Chain
   1.112 -        """
   1.113 -        cappbuilder = CaPPBuilder()
   1.114 -        peptides = cappbuilder.build_peptides(chain)
   1.115 -        sequence = Sequence()
   1.116 -        sequence.pdb_chains = [chain]
   1.117 -        sequence.pdb_residues[chain] = {}
   1.118 -        sequence.pdb_secstr[chain] = {}
   1.119 -        for peptide in peptides:
   1.120 -            for ca_atom in peptide.get_ca_list():
   1.121 -                residue = ca_atom.get_parent()
   1.122 -                monomer = AminoAcidType.from_pdb_residue(residue).instance()
   1.123 -                sequence.pdb_residues[chain][monomer] = residue
   1.124 -                sequence.monomers.append(monomer)
   1.125 -        return sequence
   1.126 -
   1.127 -    def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
   1.128 -        """ Adds pdb information to each monomer
   1.129 -
   1.130 -        Returns if information has been successfully added
   1.131 -        TODO: conformity_file
   1.132 -
   1.133 -        id-format lava flow
   1.134 -        """
   1.135 -        if not conformity_info:
   1.136 -            path = os.path.join(pdb_directory, self.name)
   1.137 -            if os.path.exists(path) and os.path.getsize(path):
   1.138 -                match = pdb_id_parse(self.name)
   1.139 -                self.pdb_chain_add(open(path), match['code'],
   1.140 -                match['chain'], match['model'])
   1.141 -            else:
   1.142 -                match = pdb_id_parse(self.name)
   1.143 -                if match:
   1.144 -                    code = match['code']
   1.145 -                    pdb_filename = config.pdb_dir % code
   1.146 -                    if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
   1.147 -                        url = config.pdb_url % code
   1.148 -                        print "Download %s" % url
   1.149 -                        pdb_file = open(pdb_filename, 'w')
   1.150 -                        data = urllib2.urlopen(url).read()
   1.151 -                        pdb_file.write(data)
   1.152 -                        pdb_file.close()
   1.153 -                        print "Save %s" % pdb_filename
   1.154 -                    pdb_file = open(pdb_filename)
   1.155 -                    self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
   1.156 -
   1.157 -    def pdb_save(self, out_filename, pdb_chain):
   1.158 -        """ Saves pdb_chain to out_file """
   1.159 -        class GlySelect(Select):
   1.160 -            def accept_chain(self, chain):
   1.161 -                if chain == pdb_chain:
   1.162 -                    return 1
   1.163 -                else:
   1.164 -                    return 0
   1.165 -        io = PDBIO()
   1.166 -        structure = chain.get_parent()
   1.167 -        io.set_structure(structure)
   1.168 -        io.save(out_filename, GlySelect())
   1.169 -
   1.170 -
   1.171 -    def pdb_add_sec_str(self, pdb_chain):
   1.172 -        """ Add secondary structure data """
   1.173 -        tmp_file = NamedTemporaryFile(delete=False)
   1.174 -        tmp_file.close()
   1.175 -        pdb_file = self.pdb_files[pdb_chain].name
   1.176 -        os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
   1.177 -        dssp, keys = make_dssp_dict(tmp_file.name)
   1.178 -        for monomer in self.monomers:
   1.179 -            if self.pdb_has(pdb_chain, monomer):
   1.180 -                residue = self.pdb_residues[pdb_chain][monomer]
   1.181 -                try:
   1.182 -                    d = dssp[(pdb_chain.get_id(), residue.get_id())]
   1.183 -                    self.pdb_secstr[pdb_chain][monomer] = d[1]
   1.184 -                except:
   1.185 -                    print "No dssp information about %s at %s" % (monomer, pdb_chain)
   1.186 -        os.unlink(tmp_file.name)
   1.187 -
   1.188 -    def pdb_has(self, chain, monomer):
   1.189 -        return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
   1.190 -
   1.191      def secstr_has(self, chain, monomer):
   1.192          return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
   1.193