allpy

changeset 585:7eeae2471e69
structure.py: improve set_pdb_chain() * improve controls (asserts) * fix documentation
author: boris (kodomo) <bnagaev@gmail.com>
date: Sat, 26 Mar 2011 00:34:30 +0300
parents: ab3f5300bf4e
children: 27e89e4a9346
files: allpy/config.py allpy/structure.py
diffstat: 2 files changed, 48 insertions(+), 13 deletions(-) [+]
[-]

allpy/config.py 6

allpy/structure.py 55 allpy/config.py 6 allpy/structure.py 55
allpy/config.py 6
allpy/structure.py 55
     1.1 --- a/allpy/config.py	Sat Mar 26 00:18:15 2011 +0300
     1.2 +++ b/allpy/config.py	Sat Mar 26 00:34:30 2011 +0300
     1.3 @@ -2,7 +2,11 @@
     1.4  delta = 2.0 # for geometrical core building
     1.5  minsize = 20 # min size of returning cores
     1.6  maxabsent = 0.15 # deprecated?
     1.7 -max_mutations = 2
     1.8 +
     1.9 +# defaults for set_pdb_chain
    1.10 +min_continuous_match = 20
    1.11 +min_match = 5
    1.12 +max_waste_in_pdb = 5
    1.13  
    1.14  # pdb download url (XXXX is pdb code place)
    1.15  pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb'

     2.1 --- a/allpy/structure.py	Sat Mar 26 00:18:15 2011 +0300
     2.2 +++ b/allpy/structure.py	Sat Mar 26 00:34:30 2011 +0300
     2.3 @@ -65,12 +65,27 @@
     2.4      """
     2.5  
     2.6      def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain='A', pdb_model=0,
     2.7 -            max_mutations=config.max_mutations, xyz_only=False):
     2.8 +            xyz_only=False,
     2.9 +            min_continuous_match=config.min_continuous_match,
    2.10 +            min_match=config.min_match,
    2.11 +            max_waste_in_pdb=config.max_waste_in_pdb):
    2.12          """ Read Pdb chain from file
    2.13  
    2.14 -        max_mutations -- max number of monomer type mismatches
    2.15 +        and align each Monomer with PDB.Residue
    2.16  
    2.17 -        and align each Monomer with PDB.Residue
    2.18 +            * xyz_only -- if True PDB.Residue objects are not stores
    2.19 +            * min_continuous_match
    2.20 +            * min_match
    2.21 +            * max_waste_in_pdb
    2.22 +
    2.23 +        The sequence is aligned with pdb_sequence using muscle.
    2.24 +        The alignment is splitted to continuous_blocks
    2.25 +        of min length _min_continuous_match_.
    2.26 +        Monomers from continuous_blocks are used to get pdb information.
    2.27 +        The number of them should be more than _min_match_.
    2.28 +        The number of other monomers with structure from pdb_sequence
    2.29 +        should be less than _max_waste_in_pdb_.
    2.30 +
    2.31          raise AssertionError
    2.32          """
    2.33          structure = get_structure(pdb_file, self.name)
    2.34 @@ -84,15 +99,31 @@
    2.35          a.append_sequence(pdb_sequence)
    2.36          a.process(processors.Muscle())
    2.37          mutations = 0
    2.38 -        for monomer, pdb_monomer in a.columns_as_lists():
    2.39 -            if monomer and hasattr(pdb_monomer, 'pdb_residue'):
    2.40 -                if monomer == pdb_monomer:
    2.41 -                    monomer.ca_xyz = pdb_monomer.pdb_residue['CA'].get_vector()
    2.42 -                    if not xyz_only:
    2.43 -                        monomer.pdb_residue = pdb_monomer.pdb_residue
    2.44 -                else:
    2.45 -                    mutations += 1
    2.46 -        assert mutations <= max_mutations
    2.47 +        Block = self.types.Block
    2.48 +        matches = Block.from_alignment(a, columns=[])
    2.49 +        for column in a.columns:
    2.50 +            if self in column and pdb_sequence in column:
    2.51 +                monomer = column[self]
    2.52 +                pdb_monomer = column[pdb_sequence]
    2.53 +                if hasattr(pdb_monomer, 'pdb_residue'):
    2.54 +                    if monomer == pdb_monomer:
    2.55 +                        matches.columns.append(column)
    2.56 +        match_blocks = matches.continuous_blocks(min_continuous_match)
    2.57 +        for block in match_blocks:
    2.58 +            for monomer, pdb_monomer in block.columns_as_lists():
    2.59 +                monomer.ca_xyz = pdb_monomer.pdb_residue['CA'].get_vector()
    2.60 +                if not xyz_only:
    2.61 +                    monomer.pdb_residue = pdb_monomer.pdb_residue
    2.62 +        matched_pdb = []
    2.63 +        for block in match_blocks:
    2.64 +            for column in block.columns:
    2.65 +                matched_pdb.append(column[pdb_sequence])
    2.66 +        central_start = pdb_sequence.index(matched_pdb[0])
    2.67 +        central_stop = pdb_sequence.index(matched_pdb[-1]) + 1
    2.68 +        central_pdb = pdb_sequence[central_start:central_stop]
    2.69 +        central_pdb = [m for m in central_pdb if hasattr(m, 'pdb_residue')]
    2.70 +        assert len(set(central_pdb) - set(matched_pdb)) <= max_waste_in_pdb
    2.71 +        assert len(matched_pdb) >= min_match
    2.72  
    2.73      def pdb_unload(self):
    2.74          """ Delete all pdb-connected links """