allpy

changeset 153:0c7f6117481b
idea implemented: pdb and sec_str data moved from monomer to sequence
author: boris <bnagaev@gmail.com>
date: Tue, 26 Oct 2010 21:53:23 +0400
parents: ff68a1bef5eb
children: be4834043074
files: lib/block.py lib/monomer.py lib/project.py lib/sequence.py
diffstat: 4 files changed, 39 insertions(+), 28 deletions(-) [+]
[-]

lib/block.py 8

lib/monomer.py 17

lib/project.py 2

lib/sequence.py 40 lib/block.py 8 lib/monomer.py 17 lib/project.py 2 lib/sequence.py 40
lib/block.py 8
lib/monomer.py 17
     1.1 --- a/lib/block.py	Tue Oct 26 13:32:19 2010 +0400
     1.2 +++ b/lib/block.py	Tue Oct 26 21:53:23 2010 +0400
     1.3 @@ -87,8 +87,10 @@
     1.4                              m1 = self.project.alignment[sequence][i]
     1.5                              m2 = self.project.alignment[sequence][j]
     1.6                              if m1 and m2:
     1.7 -                                ca1 = m1.pdb_residues[chain]['CA']
     1.8 -                                ca2 = m2.pdb_residues[chain]['CA']
     1.9 +                                r1 = sequence.pdb_residues[chain][m1]
    1.10 +                                r2 = sequence.pdb_residues[chain][m2]
    1.11 +                                ca1 = r1['CA']
    1.12 +                                ca2 = r2['CA']
    1.13                                  d = ca1 - ca2 # Bio.PDB feature
    1.14                                  distances.append(d)
    1.15                      if len(distances) >= 2:
    1.16 @@ -129,7 +131,7 @@
    1.17      
    1.18      def ca_atoms(self, sequence, pdb_chain):
    1.19          """ Iterates Ca-atom of monomers of this sequence from this block  """
    1.20 -        return (monomer.pdb_residues[pdb_chain] for monomer in self.monomers())
    1.21 +        return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
    1.22      
    1.23      def sequences_chains(self):
    1.24          """ Iterates pairs (sequence, chain) """

     2.1 --- a/lib/monomer.py	Tue Oct 26 13:32:19 2010 +0400
     2.2 +++ b/lib/monomer.py	Tue Oct 26 21:53:23 2010 +0400
     2.3 @@ -1,5 +1,4 @@
     2.4  #!/usr/bin/python
     2.5 -# -*- coding: utf-8 -*- 
     2.6  
     2.7  from allpy_data.AAdict import AAdict
     2.8  import Bio.PDB
     2.9 @@ -49,28 +48,12 @@
    2.10      """ Monomer 
    2.11      
    2.12      type -- link to MonomerType object
    2.13 -    pdb_residues -- dictionary like {Bio.PDB.Chain: Bio.PDB.Residue}
    2.14 -    pdb_secstr -- dictionary like {Bio.PDB.Chain: 'Secondary structure'}
    2.15 -        Code   Secondary structure
    2.16 -        H      ЮБ-helix
    2.17 -        B      Isolated ЮВ-bridge residue
    2.18 -        E      Strand
    2.19 -        G      3-10 helix
    2.20 -        I      Юљ-helix
    2.21 -        T      Turn
    2.22 -        S      Bend
    2.23 -        -      Other
    2.24      
    2.25      Idea: move pdb_residues and pdb_secstr to Sequence object
    2.26      """
    2.27      def __init__(self, monomer_type):
    2.28          self.type = monomer_type
    2.29 -        self.pdb_residues = {}
    2.30 -        self.pdb_secstr = {}
    2.31          
    2.32 -    def pdb_residue_add(self, chain, residue):
    2.33 -        self.pdb_residues[chain] = residue
    2.34 -    
    2.35      def __eq__(self, other):
    2.36          return self.type == other.type
    2.37  

     3.1 --- a/lib/project.py	Tue Oct 26 13:32:19 2010 +0400
     3.2 +++ b/lib/project.py	Tue Oct 26 21:53:23 2010 +0400
     3.3 @@ -245,7 +245,7 @@
     3.4      def secstr(self, secuence, pdb_chain, gap='-'):
     3.5          """ Returns string representing secondary structure """
     3.6          return ''.join([
     3.7 -        (m.pdb_secstr[pdb_chain] if m and pdb_chain in m.pdb_secstr else gap) 
     3.8 +        (secuence.pdb_secstr[pdb_chain][m] if secuence.pdb_has(pdb_chain, m) else gap) 
     3.9          for m in self.alignment[secuence]])
    3.10      
    3.11      def save_secstr(self, out_file, secuence, pdb_chain, 

     4.1 --- a/lib/sequence.py	Tue Oct 26 13:32:19 2010 +0400
     4.2 +++ b/lib/sequence.py	Tue Oct 26 21:53:23 2010 +0400
     4.3 @@ -1,4 +1,6 @@
     4.4  #!/usr/bin/python
     4.5 +# -*- coding: utf-8 -*- 
     4.6 +
     4.7  from monomer import AminoAcidType
     4.8  from Bio.PDB import PDBParser, CaPPBuilder, PDBIO
     4.9  from Bio.PDB.DSSP import make_dssp_dict
    4.10 @@ -22,6 +24,19 @@
    4.11      *   pdb_chains -- list of Bio.PDB.Chain's
    4.12      *   pdb_files -- dictionary like {Bio.PDB.Chain: file_obj}
    4.13      
    4.14 +    *   pdb_residues -- dictionary like {Bio.PDB.Chain: {Monomer: Bio.PDB.Residue}}
    4.15 +    *   pdb_secstr -- dictionary like {Bio.PDB.Chain: {Monomer: 'Secondary structure'}}
    4.16 +            Code   Secondary structure
    4.17 +            H      ЮБ-helix
    4.18 +            B      Isolated ЮВ-bridge residue
    4.19 +            E      Strand
    4.20 +            G      3-10 helix
    4.21 +            I      Юљ-helix
    4.22 +            T      Turn
    4.23 +            S      Bend
    4.24 +            -      Other
    4.25 +    
    4.26 +    
    4.27      ?TODO: global pdb_structures 
    4.28      """
    4.29      def __init__(self, monomers=None, name='', description=""):
    4.30 @@ -32,7 +47,9 @@
    4.31          self.monomers = monomers 
    4.32          self.pdb_chains = []
    4.33          self.pdb_files = {}
    4.34 -    
    4.35 +        self.pdb_residues = {}
    4.36 +        self.pdb_secstr = {}
    4.37 +        
    4.38      def __len__(self):
    4.39          return len(self.monomers)
    4.40      
    4.41 @@ -60,12 +77,15 @@
    4.42          structure = PDBParser().get_structure(name, pdb_file)
    4.43          chain = structure[pdb_model][pdb_chain]
    4.44          self.pdb_chains.append(chain)
    4.45 +        self.pdb_residues[chain] = {}
    4.46 +        self.pdb_secstr[chain] = {}
    4.47          pdb_sequence = Sequence.from_pdb_chain(chain)
    4.48          alignment = project.Project.from_sequences(self, pdb_sequence)
    4.49          alignment.muscle_align()
    4.50          for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self):
    4.51 -            if pdb_monomer and chain in pdb_monomer.pdb_residues:
    4.52 -                monomer.pdb_residue_add(chain, pdb_monomer.pdb_residues[chain])
    4.53 +            if pdb_sequence.pdb_has(chain, pdb_monomer):
    4.54 +                residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
    4.55 +                self.pdb_residues[chain][monomer] = residue
    4.56          self.pdb_files[chain] = pdb_file
    4.57          
    4.58      @staticmethod
    4.59 @@ -87,11 +107,13 @@
    4.60          peptides = cappbuilder.build_peptides(chain)
    4.61          sequence = Sequence()
    4.62          sequence.pdb_chains = [chain]
    4.63 +        sequence.pdb_residues[chain] = {}
    4.64 +        sequence.pdb_secstr[chain] = {}
    4.65          for peptide in peptides:
    4.66              for ca_atom in peptide.get_ca_list():
    4.67                  residue = ca_atom.get_parent()
    4.68                  monomer = AminoAcidType.from_pdb_residue(residue).instance()
    4.69 -                monomer.pdb_residue_add(chain, residue)
    4.70 +                sequence.pdb_residues[chain][monomer] = residue
    4.71                  sequence.monomers.append(monomer)
    4.72          return sequence
    4.73      
    4.74 @@ -147,12 +169,16 @@
    4.75          os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
    4.76          dssp, keys = make_dssp_dict(tmp_file.name)
    4.77          for monomer in self.monomers:
    4.78 -            if pdb_chain in monomer.pdb_residues:
    4.79 -                residue = monomer.pdb_residues[pdb_chain]
    4.80 +            if self.pdb_has(pdb_chain, monomer):
    4.81 +                residue = self.pdb_residues[pdb_chain][monomer]
    4.82                  try:
    4.83                      d = dssp[(pdb_chain.get_id(), residue.get_id())]
    4.84 -                    monomer.pdb_secstr[pdb_chain] = d[1]
    4.85 +                    self.pdb_secstr[pdb_chain][monomer] = d[1]
    4.86                  except:
    4.87                      print "No dssp information about %s at %s" % (monomer, pdb_chain)
    4.88          os.unlink(tmp_file.name)
    4.89      
    4.90 +    def pdb_has(self, chain, monomer):
    4.91 +        return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
    4.92 +    
    4.93 +