allpy
changeset 620:b1d9fb2a7e2a
Merge with backout of 612:3a27920ff4ed and 613:71910cefe37a
These commits contain testing version of algorithm (612) and parameters (613)
author | boris (kodomo) <bnagaev@gmail.com> |
---|---|
date | Fri, 29 Apr 2011 17:02:41 +0400 |
parents | 52f311cae778 b17985965fa7 |
children | 4bc2075d338a |
files | allpy/config.py debian/geometrical-core.install |
diffstat | 13 files changed, 240 insertions(+), 51 deletions(-) [+] |
line diff
1.1 --- a/.hgignore Fri Apr 29 17:01:29 2011 +0400 1.2 +++ b/.hgignore Fri Apr 29 17:02:41 2011 +0400 1.3 @@ -26,8 +26,9 @@ 1.4 debian/python-allpy/ 1.5 debian/blocks3d/ 1.6 debian/blocks3d-wt/ 1.7 -debian/geometrical-core/ 1.8 +debian/geometrical-core2/ 1.9 1.10 +debian/bin/ 1.11 debian/*.substvars 1.12 debian/*.debhelper* 1.13 debian/*stamp*
2.1 --- a/.hgtags Fri Apr 29 17:01:29 2011 +0400 2.2 +++ b/.hgtags Fri Apr 29 17:02:41 2011 +0400 2.3 @@ -1,2 +1,5 @@ 2.4 c51ef42eb5e5c2c98dac3c99271777905fb4da76 first run 2.5 c1b67c10f8f3db62cce4b70c292a5882e91bf5b6 0.0 2.6 +292b74612a42f2a1f270ad8184cc0883ad215564 1.1.0 2.7 +364232e428887b5b7ec2f697c3aaa83c434449d3 1.2.0 2.8 +b730512ecae51c89e8d268cd524270bbc50afc8a 1.3.0
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/NEWS Fri Apr 29 17:02:41 2011 +0400 3.3 @@ -0,0 +1,4 @@ 3.4 +1.3.0 (2011-04-15) 3.5 + 3.6 + * new: started NEWS file 3.7 + * new: started tracking version numbers
4.1 --- a/README.devel Fri Apr 29 17:01:29 2011 +0400 4.2 +++ b/README.devel Fri Apr 29 17:02:41 2011 +0400 4.3 @@ -1,13 +1,42 @@ 4.4 +Using library in-place 4.5 +====================== 4.6 + 4.7 To use the library in-place, without installing, you have to add it's 4.8 directory to PYTHONPATH. 4.9 4.10 -e.g, type this in your shell: 4.11 +e.g, type this in your shell:: 4.12 4.13 - export PYTHONPATH=`pwd` 4.14 + export PYTHONPATH=`pwd` 4.15 4.16 -alternatively, you can type: 4.17 +alternatively, you can type:: 4.18 4.19 . ./SETPATH 4.20 4.21 which adds the right path to PYTHONPATH and also adds 'utils' directory 4.22 -to PATH 4.23 +to PATH. 4.24 + 4.25 +NEWS file 4.26 +========= 4.27 + 4.28 +NEWS file contains news that are important to the library users, not to the 4.29 +devels. It must contain a word abot every change in any interfaces. It may 4.30 +contain a word about bugfix, if that bugfix was important and could cause users 4.31 +some grief. It must not contain anything else. 4.32 + 4.33 +The file grows upwards (like blog, so that all the most important things are at 4.34 +the top, like) 4.35 + 4.36 +Example file format:: 4.37 + 4.38 + 1.3.1 (2011-05-01) 4.39 + * bugfix: the Moon was going to fall 4.40 + 4.41 + 1.3.0 (2011-04-15) 4.42 + 4.43 + * new: new feature description 4.44 + * new: new feature description 4.45 + * change(!): critical library interface change 4.46 + * change: change in the library interface 4.47 + * bugfix: short bug summary, see #ticket 4.48 + * bugfix: important bug that could destroy the Earth but did not hit trac 4.49 +
5.1 --- a/allpy/config.py Fri Apr 29 17:01:29 2011 +0400 5.2 +++ b/allpy/config.py Fri Apr 29 17:02:41 2011 +0400 5.3 @@ -23,6 +23,6 @@ 5.4 ac_count = 5 5.5 5.6 # blocks3d 5.7 -min_width = 3 5.8 +min_width = 4 5.9 timeout_2 = 10 # bron-kerbosh for blocks 5.10
6.1 --- a/allpy/structure.py Fri Apr 29 17:01:29 2011 +0400 6.2 +++ b/allpy/structure.py Fri Apr 29 17:02:41 2011 +0400 6.3 @@ -316,6 +316,53 @@ 6.4 columns.append(column) 6.5 return columns 6.6 6.7 + def superimpose(self, gc, extra_columns=False): 6.8 + """ Superimpose monomers from this block at gc positions 6.9 + 6.10 + * gc -- collection of columns to use as for superimposition 6.11 + * extra_columns -- allow columns with gaps or without structure 6.12 + """ 6.13 + gc = list(gc) 6.14 + if len(self.sequences) >= 1: 6.15 + sequences = copy(self.sequences) 6.16 + main_sequence = sequences.pop() 6.17 + for sequence in sequences: 6.18 + fixed_gc = [] 6.19 + moving_gc = [] 6.20 + for column in gc: 6.21 + if main_sequence in column and sequence in column: 6.22 + if hasattr(column[main_sequence], 'pdb_residue') \ 6.23 + and hasattr(column[sequence], 'pdb_residue'): 6.24 + fixed_gc.append(column[main_sequence].pdb_residue['CA']) 6.25 + moving_gc.append(column[sequence].pdb_residue['CA']) 6.26 + else: 6.27 + assert extra_columns 6.28 + else: 6.29 + assert extra_columns 6.30 + sup = Superimposer() 6.31 + sup.set_atoms(fixed_gc, moving_gc) 6.32 + moving = sequence.pdb_chain.get_atoms() 6.33 + sup.apply(moving) 6.34 + 6.35 + def save_pdb(self, out_file): 6.36 + """ Save all sequences 6.37 + 6.38 + return {sequence: (new_chain, new_model)} 6.39 + """ 6.40 + map = {} 6.41 + chains = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 6.42 + chain_index = -1 6.43 + model = 0 6.44 + for sequence in self.sequences: 6.45 + chain_index += 1 6.46 + if chain_index >= len(chains): 6.47 + chain_index = 0 6.48 + model += 1 6.49 + chain = chains[chain_index] 6.50 + sequence.save_pdb(out_file, chain, model) 6.51 + map[sequence] = (chain, model) 6.52 + return map 6.53 + 6.54 class BlockMixin(base.Block, AlignmentMixin): 6.55 """Mixin to add 3D properties to blocks. 6.56 6.57 @@ -576,39 +623,4 @@ 6.58 result.append(column[sequence]) 6.59 return result 6.60 6.61 - def superimpose(self, gc): 6.62 - """ Superimpose monomers from this block at gc positions """ 6.63 - gc = list(gc) 6.64 - if len(self.sequences) >= 1: 6.65 - sequences = copy(self.sequences) 6.66 - main_sequence = sequences.pop() 6.67 - fixed_gc = [column[main_sequence].pdb_residue['CA'] \ 6.68 - for column in gc] 6.69 - for sequence in sequences: 6.70 - moving_gc = [column[sequence].pdb_residue['CA'] \ 6.71 - for column in gc] 6.72 - sup = Superimposer() 6.73 - sup.set_atoms(fixed_gc, moving_gc) 6.74 - moving = sequence.pdb_chain.get_atoms() 6.75 - sup.apply(moving) 6.76 - 6.77 - def save_pdb(self, out_file): 6.78 - """ Save all sequences 6.79 - 6.80 - return {sequence: (new_chain, new_model)} 6.81 - """ 6.82 - map = {} 6.83 - chains = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 6.84 - chain_index = -1 6.85 - model = 0 6.86 - for sequence in self.sequences: 6.87 - chain_index += 1 6.88 - if chain_index >= len(chains): 6.89 - chain_index = 0 6.90 - model += 1 6.91 - chain = chains[chain_index] 6.92 - sequence.save_pdb(out_file, chain, model) 6.93 - map[sequence] = (chain, model) 6.94 - return map 6.95 - 6.96 # vim: set ts=4 sts=4 sw=4 et:
7.1 --- a/debian/changelog Fri Apr 29 17:01:29 2011 +0400 7.2 +++ b/debian/changelog Fri Apr 29 17:02:41 2011 +0400 7.3 @@ -1,3 +1,27 @@ 7.4 +allpy (1.3.0-2) unstable; urgency=low 7.5 + 7.6 + * renamed geometrical-core to geometrical-core2 by request of Boris, 7.7 + since the new version may yet be inferior to the old one in some aspects 7.8 + 7.9 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru> Fri, 15 Apr 2011 22:31:11 +0400 7.10 + 7.11 +allpy (1.3.0-1) unstable; urgency=low 7.12 + 7.13 + * geometrical-core depends on particular version of python-allpy (>= 1.0.0) 7.14 + 7.15 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru> Fri, 15 Apr 2011 22:07:18 +0400 7.16 + 7.17 +allpy (1.3.0) unstable; urgency=low 7.18 + 7.19 + * New allpy release. 7.20 + Version tracking an compact history starts with this version. The question 7.21 + of related programs and utils versioning is yet undecided. 7.22 + * New geometrical-core requires two packages with local install. 7.23 + Added sed script to add /usr/share/geometrical-core to sys.path. 7.24 + The required packages go there. 7.25 + 7.26 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru> Fri, 15 Apr 2011 21:21:40 +0400 7.27 + 7.28 allpy (0.0-6) unstable; urgency=medium 7.29 7.30 * blocks3d-wt: reorganized apache configs for more stable operation
8.1 --- a/debian/control.in Fri Apr 29 17:01:29 2011 +0400 8.2 +++ b/debian/control.in Fri Apr 29 17:02:41 2011 +0400 8.3 @@ -10,9 +10,9 @@ 8.4 Depends: ${python:Depends}, ${misc:Depends} 8.5 Description: Python library for working with alignments 8.6 8.7 -Package: geometrical-core 8.8 +Package: geometrical-core2 8.9 Architecture: all 8.10 -Depends: ${python:Depends}, ${misc:Depends} 8.11 +Depends: python-allpy (>= 1.0.0), ${python:Depends}, ${misc:Depends} 8.12 Description: Find geometrical core in a multiple alignment 8.13 8.14 Package: blocks3d
9.1 --- a/debian/geometrical-core.install Fri Apr 29 17:01:29 2011 +0400 9.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 9.3 @@ -1,1 +0,0 @@ 9.4 -geometrical_core/geometrical-core usr/bin
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/debian/geometrical-core2.install Fri Apr 29 17:02:41 2011 +0400 10.3 @@ -0,0 +1,2 @@ 10.4 +debian/bin/geometrical-core2 usr/bin 10.5 +geometrical_core/*.py usr/share/geometrical-core2
11.1 --- a/debian/rules Fri Apr 29 17:01:29 2011 +0400 11.2 +++ b/debian/rules Fri Apr 29 17:02:41 2011 +0400 11.3 @@ -11,6 +11,17 @@ 11.4 build/python-allpy:: 11.5 make -C docs html 11.6 11.7 +build/geometrical-core2:: 11.8 + -mkdir debian/bin 11.9 + fix_path='import sys\nsys.path.append("/usr/share/geometrical-core2")'; \ 11.10 + cat geometrical_core/geometrical-core \ 11.11 + | sed "1 { x; s@^@\n$${fix_path}\n@; x }; /^$$/ x" \ 11.12 + > debian/bin/geometrical-core2 11.13 + chmod +x debian/bin/geometrical-core2 11.14 + 11.15 +clean:: 11.16 + rm -rf debian/bin 11.17 + 11.18 # Manually build blocks3d-wt to avoid build-dependency on qmake 11.19 b3dwt = blocks3d/wt 11.20 b3dwt_cpp = $(wildcard $(b3dwt)/*.C)
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/pair_cores/rasmol_output.py Fri Apr 29 17:02:41 2011 +0400 12.3 @@ -0,0 +1,85 @@ 12.4 +#!/usr/bin/python 12.5 + 12.6 +import os 12.7 +import sys 12.8 +from gzip import GzipFile 12.9 +from copy import copy 12.10 + 12.11 +from protein_pdb import Alignment, Block, Monomer, Sequence 12.12 +from allpy import structure 12.13 + 12.14 +def load(pdb_code): 12.15 + path = '/home/students/y08/bnagaev/pdb/%s/pdb%s.ent.gz' % (pdb_code[1:3], pdb_code) 12.16 + try: 12.17 + return GzipFile(path) 12.18 + except: 12.19 + return structure.download_pdb(pdb_code) 12.20 + 12.21 +def monomer2str(sequence, m): 12.22 + code = structure.pdb_id_parse(sequence.name)['code'] 12.23 + model = structure.pdb_id_parse(sequence.name)['model'] 12.24 + chain = structure.pdb_id_parse(sequence.name)['chain'] 12.25 + resi = m.pdb_residue.get_id()[1] 12.26 + return '/%s/%i/%s/%i' % (code, model, chain, resi) 12.27 + 12.28 +def columns_for_align(columns, sequence, sequence2): 12.29 + cc = [c for c in columns if sequence in c and sequence2 in c and hasattr(c[sequence], 'pdb_residue') and hasattr(c[sequence2], 'pdb_residue')] 12.30 + str = ' or '.join(monomer2str(sequence, column[sequence]) for column in cc) 12.31 + return '((%s) and name ca)' % str 12.32 + 12.33 +aln_file = sys.argv[1] 12.34 +blocks_file = sys.argv[2] 12.35 +out_pdb = sys.argv[3] 12.36 + 12.37 +alignment = Alignment().append_file(open(aln_file)) 12.38 +blocks = alignment.blocks_from_file(open(blocks_file)) 12.39 + 12.40 +for sequence in alignment.sequences: 12.41 + sequence.auto_pdb(pdb_getter=load) 12.42 + 12.43 +columns = set() 12.44 +for b in blocks: 12.45 + columns |= set(b.columns) 12.46 +columns = list(columns) 12.47 + 12.48 +_sequences = copy(alignment.sequences) 12.49 +base_seq = _sequences.pop() 12.50 + 12.51 +alignment.superimpose(columns, extra_columns=True) 12.52 +map = alignment.save_pdb(open(out_pdb, 'w')) 12.53 + 12.54 +for sequence, chain_model in map.items(): 12.55 + print 'define str_%s *:%s' % (sequence.name, chain_model[0]) 12.56 + 12.57 +print 'select all' 12.58 +print 'color chain' 12.59 +print 'restrict none' 12.60 +print 'background [230, 240, 250]' 12.61 +print 'select none' 12.62 + 12.63 +n = 0 12.64 + 12.65 +column2pos = alignment.column2pos() 12.66 + 12.67 +for block in blocks: 12.68 + selection = [] 12.69 + for b in block.continuous_blocks(): 12.70 + n += 1 12.71 + selections = [] 12.72 + for sequence in b.sequences: 12.73 + try: 12.74 + chain = map[sequence][0] 12.75 + resi_from = b.columns[0][sequence].pdb_residue.get_id()[1] 12.76 + resi_to = b.columns[-1][sequence].pdb_residue.get_id()[1] 12.77 + selections.append('%i-%i:%s' % (resi_from, resi_to, chain)) 12.78 + except: 12.79 + pass 12.80 + print '' 12.81 + print 'backbone' 12.82 + print 'define block_%i %s' % (n, ','.join(selections)) 12.83 + print 'select block_%i' % n 12.84 + print 'echo block_%i %s %i-%i' % (n, ', '.join([b.sequences[0].name, b.sequences[1].name]), 12.85 + column2pos[b.columns[0]], column2pos[b.columns[-1]]) 12.86 + print 'backbone 100' 12.87 + print 'pause' 12.88 +
13.1 --- a/pair_cores/score.py Fri Apr 29 17:01:29 2011 +0400 13.2 +++ b/pair_cores/score.py Fri Apr 29 17:02:41 2011 +0400 13.3 @@ -4,13 +4,15 @@ 13.4 import sys 13.5 13.6 from allpy.graph import Graph 13.7 +from allpy import config 13.8 from protein_pdb import Alignment, Block, Monomer, Sequence 13.9 13.10 def cn2(n): 13.11 return n*(n-1)/2 13.12 13.13 -def score(in_file, pair_cores_file, debug=False): 13.14 +def score(in_file, pair_cores_file, debug=False, out_blocks=None): 13.15 alignment = Alignment().append_file(open(in_file)) 13.16 + assert os.path.exists(pair_cores_file) and os.path.getsize(pair_cores_file) 13.17 pair_cores = alignment.blocks_from_file(open(pair_cores_file)) 13.18 13.19 C = [] 13.20 @@ -20,15 +22,27 @@ 13.21 for column in alignment.columns: 13.22 column2blocks[column] = [] 13.23 for b in pair_cores: 13.24 - for column in b.columns[1:-1]: 13.25 - column2blocks[column].append(b) 13.26 + if len(b.columns) >= config.min_width: 13.27 + for column in b.columns[1:-1]: 13.28 + column2blocks[column].append(b) 13.29 + new_blocks = {} 13.30 for column in alignment.columns: 13.31 if len(column) >= 2: 13.32 sequence_graph = Graph() 13.33 for b in column2blocks[column]: 13.34 assert len(b.sequences) == 2 13.35 sequence_graph.set_edge(b.sequences[0], b.sequences[1]) 13.36 - c = len(sequence_graph.connected_components()) 13.37 + cc = sequence_graph.connected_components() 13.38 + c = len(cc) 13.39 + if out_blocks: 13.40 + for sequences in cc: 13.41 + for s1 in sequences: 13.42 + for s2 in sequences: 13.43 + if s1 != s2: 13.44 + new_blocks.setdefault(frozenset([s1,s2]), 13.45 + Block.from_alignment(alignment, 13.46 + columns=[],sequences=[s1,s2])) 13.47 + new_blocks[frozenset([s1,s2])].columns.append(column) 13.48 singles = len(column) - len(sequence_graph) 13.49 c += singles 13.50 C.append(c) 13.51 @@ -36,15 +50,20 @@ 13.52 if debug: 13.53 print C[-1], L[-1] 13.54 13.55 + if out_blocks: 13.56 + alignment.blocks_to_file(open(out_blocks, 'w'), new_blocks.values()) 13.57 + 13.58 score = sum([i*j for (i,j) in zip(C,L)]) / float(sum(L)) 13.59 - print("%(score)f %(n)i %(name)s" % {'score': score, 13.60 - 'n': len(alignment.sequences), 'name': in_file}) 13.61 + print("%(score)f %(n)i %(columns)i %(name)s" % {'score': score, 13.62 + 'n': len(alignment.sequences), 'name': in_file, 13.63 + 'columns': len(L)}) 13.64 13.65 try: 13.66 in_file = sys.argv[1] 13.67 pair_cores_file = sys.argv[2] 13.68 debug = len(sys.argv) >= 4 and sys.argv[3] == 'debug' 13.69 - score(in_file, pair_cores_file, debug) 13.70 + out_blocks = len(sys.argv) >= 5 and sys.argv[4] 13.71 + score(in_file, pair_cores_file, debug, out_blocks) 13.72 except: 13.73 pass 13.74