Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/688348824267
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:46:06 2012
Кодировка:
allpy: 688348824267

allpy

changeset 595:688348824267

pair_cores: improve score.py add score_all.py for convenience apply transitivity considerations
author boris (kodomo) <bnagaev@gmail.com>
date Fri, 01 Apr 2011 17:14:26 +0400
parents 2a5210791f43
children 211330654df7
files pair_cores/score.py pair_cores/score_all.py
diffstat 2 files changed, 51 insertions(+), 22 deletions(-) [+]
line diff
     1.1 --- a/pair_cores/score.py	Fri Apr 01 17:05:13 2011 +0400
     1.2 +++ b/pair_cores/score.py	Fri Apr 01 17:14:26 2011 +0400
     1.3 @@ -3,33 +3,42 @@
     1.4  import os
     1.5  import sys
     1.6  
     1.7 +from allpy.graph import Graph
     1.8  from protein_pdb import Alignment, Block, Monomer, Sequence
     1.9  
    1.10 -in_file = sys.argv[1]
    1.11 -pair_cores_file = sys.argv[2]
    1.12 +def cn2(n):
    1.13 +    return n*(n-1)/2
    1.14  
    1.15 -alignment = Alignment().append_file(open(in_file))
    1.16 -pair_cores = alignment.blocks_from_file(open(pair_cores_file))
    1.17 +def score(in_file, pair_cores_file):
    1.18 +    alignment = Alignment().append_file(open(in_file))
    1.19 +    pair_cores = alignment.blocks_from_file(open(pair_cores_file))
    1.20  
    1.21 -score = 0.0
    1.22 -max_score = 0.0
    1.23 +    score = 0.0
    1.24 +    max_score = 0.0
    1.25  
    1.26 -column2blocks = {}
    1.27 -for column in alignment.columns:
    1.28 -    column2blocks[column] = []
    1.29 -for b in pair_cores:
    1.30 -    for column in b.columns:
    1.31 -        column2blocks[column].append(b)
    1.32 -for column in alignment.columns:
    1.33 -    pairs_set = set()
    1.34 -    for b in column2blocks[column]:
    1.35 -        pairs_set.add(frozenset(b.sequences))
    1.36 -    m = len(column)
    1.37 -    if m > 1:
    1.38 -        c = m*(m-1) / 2
    1.39 -        max_score += c
    1.40 -        score += len(pairs_set)
    1.41 +    column2blocks = {}
    1.42 +    for column in alignment.columns:
    1.43 +        column2blocks[column] = []
    1.44 +    for b in pair_cores:
    1.45 +        for column in b.columns:
    1.46 +            column2blocks[column].append(b)
    1.47 +    for column in alignment.columns:
    1.48 +        sequence_graph = Graph()
    1.49 +        pairs_set = set()
    1.50 +        for b in column2blocks[column]:
    1.51 +            assert len(b.sequences) == 2
    1.52 +            sequence_graph.set_edge(b.sequences[0], b.sequences[1])
    1.53 +        for g in sequence_graph.connected_components():
    1.54 +            assert len(g) > 1
    1.55 +            score += cn2(len(g))
    1.56 +        max_score += cn2(len(column))
    1.57  
    1.58 -print("%f = %i / %i     %s %s" % (float(score)/max_score, score, max_score, in_file, pair_cores_file))
    1.59 +    print("%f = %i / %i     %s %s" % (float(score)/max_score, score, max_score, in_file, pair_cores_file))
    1.60  
    1.61 +try:
    1.62 +    in_file = sys.argv[1]
    1.63 +    pair_cores_file = sys.argv[2]
    1.64 +    score(in_file, pair_cores_file)
    1.65 +except:
    1.66 +    pass
    1.67  
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/pair_cores/score_all.py	Fri Apr 01 17:14:26 2011 +0400
     2.3 @@ -0,0 +1,20 @@
     2.4 +
     2.5 +import sys
     2.6 +import os
     2.7 +
     2.8 +from score import score
     2.9 +
    2.10 +Dir = sys.argv[1]
    2.11 +db = sys.argv[2]
    2.12 +d = sys.argv[3]
    2.13 +
    2.14 +for i in os.listdir(Dir):
    2.15 +    i = os.path.join(Dir, i)
    2.16 +    in_file = os.path.join(i, 'structure_only.fasta')
    2.17 +    pair_cores_file = os.path.join(i, '%s-%s.blocks'%(db, d))
    2.18 +    try:
    2.19 +        score(in_file, pair_cores_file)
    2.20 +    except:
    2.21 +        pass
    2.22 +
    2.23 +