allpy
changeset 593:6c6ae5013ab6
add pair_cores program package
* pair_cores
* score
* mkhtml
author | boris (kodomo) <bnagaev@gmail.com> |
---|---|
date | Sat, 26 Mar 2011 10:48:11 +0300 |
parents | 2f851b66a2c7 |
children | 2a5210791f43 |
files | pair_cores/html.htm pair_cores/html.py pair_cores/mkhtml.py pair_cores/pair_cores.py pair_cores/protein_pdb.py pair_cores/score.py |
diffstat | 6 files changed, 139 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/pair_cores/html.htm Sat Mar 26 10:48:11 2011 +0300 1.3 @@ -0,0 +1,1 @@ 1.4 +../blocks3d/html.htm 1.5 \ No newline at end of file
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/pair_cores/html.py Sat Mar 26 10:48:11 2011 +0300 2.3 @@ -0,0 +1,1 @@ 2.4 +../blocks3d/html.py 2.5 \ No newline at end of file
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/pair_cores/mkhtml.py Sat Mar 26 10:48:11 2011 +0300 3.3 @@ -0,0 +1,16 @@ 3.4 +#!/usr/bin/python 3.5 + 3.6 +import os 3.7 +import json 3.8 +import sys 3.9 + 3.10 +from protein_pdb import Alignment, Block, Monomer, Sequence 3.11 +from html import html_template 3.12 + 3.13 +fasta_file = sys.argv[1] 3.14 +blocks_file = sys.argv[2] 3.15 + 3.16 +alignment = Alignment().append_file(open(fasta_file)) 3.17 +blocks = alignment.blocks_from_file(open(blocks_file)) 3.18 +alignment.blocks_to_html(sys.stdout, blocks, open(html_template).read()) 3.19 +
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/pair_cores/pair_cores.py Sat Mar 26 10:48:11 2011 +0300 4.3 @@ -0,0 +1,85 @@ 4.4 +#!/usr/bin/python 4.5 + 4.6 +from gzip import GzipFile 4.7 +import os 4.8 +import sys 4.9 +from copy import deepcopy 4.10 +import argparse 4.11 +from copy import copy 4.12 + 4.13 +from protein_pdb import Alignment, Block, Monomer, Sequence 4.14 +from allpy import processors 4.15 +import allpy.base 4.16 +from html import html_template 4.17 + 4.18 +def remove_monomers_without_structure(block): 4.19 + monomers_with_structure = set() 4.20 + 4.21 + for sequence in block.sequences: 4.22 + for monomer in sequence: 4.23 + if hasattr(monomer, 'ca_xyz'): 4.24 + monomers_with_structure.add(monomer) 4.25 + 4.26 + for sequence in block.sequences: 4.27 + to_delete = [] 4.28 + for i, monomer in enumerate(sequence): 4.29 + if monomer not in monomers_with_structure: 4.30 + to_delete.append(i) 4.31 + for n, i in enumerate(to_delete): 4.32 + assert not hasattr(sequence.pop(i-n), 'ca_xyz') 4.33 + 4.34 + for column in block.columns: 4.35 + for s, m in column.items(): 4.36 + if m not in monomers_with_structure: 4.37 + assert not hasattr(column.pop(s), 'ca_xyz') 4.38 + 4.39 +r = argparse.FileType('r') 4.40 +w = argparse.FileType('w') 4.41 + 4.42 +p = argparse.ArgumentParser( 4.43 +description='PairCores', 4.44 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 4.45 +) 4.46 + 4.47 +p.add_argument('-v','--version',action='version',version='%(prog)s 2.0') 4.48 +p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True) 4.49 +p.add_argument('-d',help='Distance spreading',metavar='float',type=float,default=2.0) 4.50 +p.add_argument('-o',help='Output alignment file',metavar='FILE',type=w, required=True) 4.51 +p.add_argument('-b',help='Output pair_cores file',metavar='FILE',type=w, required=True) 4.52 +p.add_argument('-H',help='Output HTML file',metavar='FILE',type=w) 4.53 + 4.54 +args = p.parse_args() 4.55 + 4.56 +try: 4.57 + alignment = Alignment().append_file(args.i) 4.58 +except: 4.59 + raise Exception() 4.60 + 4.61 +unique_sequences = list(set([s.name for s in alignment.sequences])) 4.62 +if len(unique_sequences) < len(alignment.sequences): 4.63 + alignment.sequences = unique_sequences 4.64 + 4.65 +bad_sequences = set() 4.66 +for sequence in copy(alignment.sequences): 4.67 + try: 4.68 + sequence.auto_pdb(xyz_only=True) 4.69 + except: 4.70 + bad_sequences.add(sequence) 4.71 +if bad_sequences: 4.72 + alignment.sequences = list(set(alignment.sequences)-bad_sequences) 4.73 + 4.74 +if len(alignment.sequences) < 2: 4.75 + raise Exception() 4.76 + 4.77 +block = Block.from_alignment(alignment) 4.78 + 4.79 +remove_monomers_without_structure(block) 4.80 + 4.81 +block.to_file(args.o) 4.82 + 4.83 +blocks = block.pair_core_parts(max_delta=args.d, timeout=0) 4.84 +block.blocks_to_file(args.b, blocks) 4.85 + 4.86 +if args.H: 4.87 + block.blocks_to_html(args.H, blocks, open(html_template).read()) 4.88 +
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/pair_cores/protein_pdb.py Sat Mar 26 10:48:11 2011 +0300 5.3 @@ -0,0 +1,1 @@ 5.4 +../blocks3d/protein_pdb.py 5.5 \ No newline at end of file
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/pair_cores/score.py Sat Mar 26 10:48:11 2011 +0300 6.3 @@ -0,0 +1,35 @@ 6.4 +#!/usr/bin/python 6.5 + 6.6 +import os 6.7 +import sys 6.8 + 6.9 +from protein_pdb import Alignment, Block, Monomer, Sequence 6.10 + 6.11 +in_file = sys.argv[1] 6.12 +pair_cores_file = sys.argv[2] 6.13 + 6.14 +alignment = Alignment().append_file(open(in_file)) 6.15 +pair_cores = alignment.blocks_from_file(open(pair_cores_file)) 6.16 + 6.17 +score = 0.0 6.18 +max_score = 0.0 6.19 + 6.20 +column2blocks = {} 6.21 +for column in alignment.columns: 6.22 + column2blocks[column] = [] 6.23 +for b in pair_cores: 6.24 + for column in b.columns: 6.25 + column2blocks[column].append(b) 6.26 +for column in alignment.columns: 6.27 + pairs_set = set() 6.28 + for b in column2blocks[column]: 6.29 + pairs_set.add(frozenset(b.sequences)) 6.30 + m = len(column) 6.31 + if m > 1: 6.32 + c = m*(m-1) / 2 6.33 + max_score += c 6.34 + score += len(pairs_set) 6.35 + 6.36 +print("%f = %i / %i %s %s" % (float(score)/max_score, score, max_score, in_file, pair_cores_file)) 6.37 + 6.38 +