allpy
changeset 585:7eeae2471e69
structure.py: improve set_pdb_chain()
* improve controls (asserts)
* fix documentation
author | boris (kodomo) <bnagaev@gmail.com> |
---|---|
date | Sat, 26 Mar 2011 00:34:30 +0300 |
parents | ab3f5300bf4e |
children | 27e89e4a9346 |
files | allpy/config.py allpy/structure.py |
diffstat | 2 files changed, 48 insertions(+), 13 deletions(-) [+] |
line diff
1.1 --- a/allpy/config.py Sat Mar 26 00:18:15 2011 +0300 1.2 +++ b/allpy/config.py Sat Mar 26 00:34:30 2011 +0300 1.3 @@ -2,7 +2,11 @@ 1.4 delta = 2.0 # for geometrical core building 1.5 minsize = 20 # min size of returning cores 1.6 maxabsent = 0.15 # deprecated? 1.7 -max_mutations = 2 1.8 + 1.9 +# defaults for set_pdb_chain 1.10 +min_continuous_match = 20 1.11 +min_match = 5 1.12 +max_waste_in_pdb = 5 1.13 1.14 # pdb download url (XXXX is pdb code place) 1.15 pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb'
2.1 --- a/allpy/structure.py Sat Mar 26 00:18:15 2011 +0300 2.2 +++ b/allpy/structure.py Sat Mar 26 00:34:30 2011 +0300 2.3 @@ -65,12 +65,27 @@ 2.4 """ 2.5 2.6 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain='A', pdb_model=0, 2.7 - max_mutations=config.max_mutations, xyz_only=False): 2.8 + xyz_only=False, 2.9 + min_continuous_match=config.min_continuous_match, 2.10 + min_match=config.min_match, 2.11 + max_waste_in_pdb=config.max_waste_in_pdb): 2.12 """ Read Pdb chain from file 2.13 2.14 - max_mutations -- max number of monomer type mismatches 2.15 + and align each Monomer with PDB.Residue 2.16 2.17 - and align each Monomer with PDB.Residue 2.18 + * xyz_only -- if True PDB.Residue objects are not stores 2.19 + * min_continuous_match 2.20 + * min_match 2.21 + * max_waste_in_pdb 2.22 + 2.23 + The sequence is aligned with pdb_sequence using muscle. 2.24 + The alignment is splitted to continuous_blocks 2.25 + of min length _min_continuous_match_. 2.26 + Monomers from continuous_blocks are used to get pdb information. 2.27 + The number of them should be more than _min_match_. 2.28 + The number of other monomers with structure from pdb_sequence 2.29 + should be less than _max_waste_in_pdb_. 2.30 + 2.31 raise AssertionError 2.32 """ 2.33 structure = get_structure(pdb_file, self.name) 2.34 @@ -84,15 +99,31 @@ 2.35 a.append_sequence(pdb_sequence) 2.36 a.process(processors.Muscle()) 2.37 mutations = 0 2.38 - for monomer, pdb_monomer in a.columns_as_lists(): 2.39 - if monomer and hasattr(pdb_monomer, 'pdb_residue'): 2.40 - if monomer == pdb_monomer: 2.41 - monomer.ca_xyz = pdb_monomer.pdb_residue['CA'].get_vector() 2.42 - if not xyz_only: 2.43 - monomer.pdb_residue = pdb_monomer.pdb_residue 2.44 - else: 2.45 - mutations += 1 2.46 - assert mutations <= max_mutations 2.47 + Block = self.types.Block 2.48 + matches = Block.from_alignment(a, columns=[]) 2.49 + for column in a.columns: 2.50 + if self in column and pdb_sequence in column: 2.51 + monomer = column[self] 2.52 + pdb_monomer = column[pdb_sequence] 2.53 + if hasattr(pdb_monomer, 'pdb_residue'): 2.54 + if monomer == pdb_monomer: 2.55 + matches.columns.append(column) 2.56 + match_blocks = matches.continuous_blocks(min_continuous_match) 2.57 + for block in match_blocks: 2.58 + for monomer, pdb_monomer in block.columns_as_lists(): 2.59 + monomer.ca_xyz = pdb_monomer.pdb_residue['CA'].get_vector() 2.60 + if not xyz_only: 2.61 + monomer.pdb_residue = pdb_monomer.pdb_residue 2.62 + matched_pdb = [] 2.63 + for block in match_blocks: 2.64 + for column in block.columns: 2.65 + matched_pdb.append(column[pdb_sequence]) 2.66 + central_start = pdb_sequence.index(matched_pdb[0]) 2.67 + central_stop = pdb_sequence.index(matched_pdb[-1]) + 1 2.68 + central_pdb = pdb_sequence[central_start:central_stop] 2.69 + central_pdb = [m for m in central_pdb if hasattr(m, 'pdb_residue')] 2.70 + assert len(set(central_pdb) - set(matched_pdb)) <= max_waste_in_pdb 2.71 + assert len(matched_pdb) >= min_match 2.72 2.73 def pdb_unload(self): 2.74 """ Delete all pdb-connected links """