allpy
changeset 595:688348824267
pair_cores: improve score.py
add score_all.py for convenience
apply transitivity considerations
author | boris (kodomo) <bnagaev@gmail.com> |
---|---|
date | Fri, 01 Apr 2011 17:14:26 +0400 |
parents | 2a5210791f43 |
children | 211330654df7 |
files | pair_cores/score.py pair_cores/score_all.py |
diffstat | 2 files changed, 51 insertions(+), 22 deletions(-) [+] |
line diff
1.1 --- a/pair_cores/score.py Fri Apr 01 17:05:13 2011 +0400 1.2 +++ b/pair_cores/score.py Fri Apr 01 17:14:26 2011 +0400 1.3 @@ -3,33 +3,42 @@ 1.4 import os 1.5 import sys 1.6 1.7 +from allpy.graph import Graph 1.8 from protein_pdb import Alignment, Block, Monomer, Sequence 1.9 1.10 -in_file = sys.argv[1] 1.11 -pair_cores_file = sys.argv[2] 1.12 +def cn2(n): 1.13 + return n*(n-1)/2 1.14 1.15 -alignment = Alignment().append_file(open(in_file)) 1.16 -pair_cores = alignment.blocks_from_file(open(pair_cores_file)) 1.17 +def score(in_file, pair_cores_file): 1.18 + alignment = Alignment().append_file(open(in_file)) 1.19 + pair_cores = alignment.blocks_from_file(open(pair_cores_file)) 1.20 1.21 -score = 0.0 1.22 -max_score = 0.0 1.23 + score = 0.0 1.24 + max_score = 0.0 1.25 1.26 -column2blocks = {} 1.27 -for column in alignment.columns: 1.28 - column2blocks[column] = [] 1.29 -for b in pair_cores: 1.30 - for column in b.columns: 1.31 - column2blocks[column].append(b) 1.32 -for column in alignment.columns: 1.33 - pairs_set = set() 1.34 - for b in column2blocks[column]: 1.35 - pairs_set.add(frozenset(b.sequences)) 1.36 - m = len(column) 1.37 - if m > 1: 1.38 - c = m*(m-1) / 2 1.39 - max_score += c 1.40 - score += len(pairs_set) 1.41 + column2blocks = {} 1.42 + for column in alignment.columns: 1.43 + column2blocks[column] = [] 1.44 + for b in pair_cores: 1.45 + for column in b.columns: 1.46 + column2blocks[column].append(b) 1.47 + for column in alignment.columns: 1.48 + sequence_graph = Graph() 1.49 + pairs_set = set() 1.50 + for b in column2blocks[column]: 1.51 + assert len(b.sequences) == 2 1.52 + sequence_graph.set_edge(b.sequences[0], b.sequences[1]) 1.53 + for g in sequence_graph.connected_components(): 1.54 + assert len(g) > 1 1.55 + score += cn2(len(g)) 1.56 + max_score += cn2(len(column)) 1.57 1.58 -print("%f = %i / %i %s %s" % (float(score)/max_score, score, max_score, in_file, pair_cores_file)) 1.59 + print("%f = %i / %i %s %s" % (float(score)/max_score, score, max_score, in_file, pair_cores_file)) 1.60 1.61 +try: 1.62 + in_file = sys.argv[1] 1.63 + pair_cores_file = sys.argv[2] 1.64 + score(in_file, pair_cores_file) 1.65 +except: 1.66 + pass 1.67
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/pair_cores/score_all.py Fri Apr 01 17:14:26 2011 +0400 2.3 @@ -0,0 +1,20 @@ 2.4 + 2.5 +import sys 2.6 +import os 2.7 + 2.8 +from score import score 2.9 + 2.10 +Dir = sys.argv[1] 2.11 +db = sys.argv[2] 2.12 +d = sys.argv[3] 2.13 + 2.14 +for i in os.listdir(Dir): 2.15 + i = os.path.join(Dir, i) 2.16 + in_file = os.path.join(i, 'structure_only.fasta') 2.17 + pair_cores_file = os.path.join(i, '%s-%s.blocks'%(db, d)) 2.18 + try: 2.19 + score(in_file, pair_cores_file) 2.20 + except: 2.21 + pass 2.22 + 2.23 +