Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/7eeae2471e69
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 08:03:52 2012
Кодировка:

# HG changeset patch
# User boris (kodomo)
# Date 1301088870 -10800
# Node ID 7eeae2471e69a6200e9933ae332374a84403d2b6
# Parent ab3f5300bf4ec68bacac1df4d1a0b6b161f39116
structure.py: improve set_pdb_chain()

* improve controls (asserts)
* fix documentation

diff -r ab3f5300bf4e -r 7eeae2471e69 allpy/config.py
--- a/allpy/config.py Sat Mar 26 00:18:15 2011 +0300
+++ b/allpy/config.py Sat Mar 26 00:34:30 2011 +0300
@@ -2,7 +2,11 @@
delta = 2.0 # for geometrical core building
minsize = 20 # min size of returning cores
maxabsent = 0.15 # deprecated?
-max_mutations = 2
+
+# defaults for set_pdb_chain
+min_continuous_match = 20
+min_match = 5
+max_waste_in_pdb = 5

# pdb download url (XXXX is pdb code place)
pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb'
diff -r ab3f5300bf4e -r 7eeae2471e69 allpy/structure.py
--- a/allpy/structure.py Sat Mar 26 00:18:15 2011 +0300
+++ b/allpy/structure.py Sat Mar 26 00:34:30 2011 +0300
@@ -65,12 +65,27 @@
"""

def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain='A', pdb_model=0,
- max_mutations=config.max_mutations, xyz_only=False):
+ xyz_only=False,
+ min_continuous_match=config.min_continuous_match,
+ min_match=config.min_match,
+ max_waste_in_pdb=config.max_waste_in_pdb):
""" Read Pdb chain from file

- max_mutations -- max number of monomer type mismatches
+ and align each Monomer with PDB.Residue

- and align each Monomer with PDB.Residue
+ * xyz_only -- if True PDB.Residue objects are not stores
+ * min_continuous_match
+ * min_match
+ * max_waste_in_pdb
+
+ The sequence is aligned with pdb_sequence using muscle.
+ The alignment is splitted to continuous_blocks
+ of min length _min_continuous_match_.
+ Monomers from continuous_blocks are used to get pdb information.
+ The number of them should be more than _min_match_.
+ The number of other monomers with structure from pdb_sequence
+ should be less than _max_waste_in_pdb_.
+
raise AssertionError
"""
structure = get_structure(pdb_file, self.name)
@@ -84,15 +99,31 @@
a.append_sequence(pdb_sequence)
a.process(processors.Muscle())
mutations = 0
- for monomer, pdb_monomer in a.columns_as_lists():
- if monomer and hasattr(pdb_monomer, 'pdb_residue'):
- if monomer == pdb_monomer:
- monomer.ca_xyz = pdb_monomer.pdb_residue['CA'].get_vector()
- if not xyz_only:
- monomer.pdb_residue = pdb_monomer.pdb_residue
- else:
- mutations += 1
- assert mutations <= max_mutations
+ Block = self.types.Block
+ matches = Block.from_alignment(a, columns=[])
+ for column in a.columns:
+ if self in column and pdb_sequence in column:
+ monomer = column[self]
+ pdb_monomer = column[pdb_sequence]
+ if hasattr(pdb_monomer, 'pdb_residue'):
+ if monomer == pdb_monomer:
+ matches.columns.append(column)
+ match_blocks = matches.continuous_blocks(min_continuous_match)
+ for block in match_blocks:
+ for monomer, pdb_monomer in block.columns_as_lists():
+ monomer.ca_xyz = pdb_monomer.pdb_residue['CA'].get_vector()
+ if not xyz_only:
+ monomer.pdb_residue = pdb_monomer.pdb_residue
+ matched_pdb = []
+ for block in match_blocks:
+ for column in block.columns:
+ matched_pdb.append(column[pdb_sequence])
+ central_start = pdb_sequence.index(matched_pdb[0])
+ central_stop = pdb_sequence.index(matched_pdb[-1]) + 1
+ central_pdb = pdb_sequence[central_start:central_stop]
+ central_pdb = [m for m in central_pdb if hasattr(m, 'pdb_residue')]
+ assert len(set(central_pdb) - set(matched_pdb)) <= max_waste_in_pdb
+ assert len(matched_pdb) >= min_match

def pdb_unload(self):
""" Delete all pdb-connected links """