allpy
changeset 648:f4f658d86ca1
Some testing done for blocks_finder:remove_contained_preblocks time-consumption optimisation.
author | Boris Burkov <BurkovBA@gmail.com> |
---|---|
date | Fri, 10 Jun 2011 15:33:16 +0400 |
parents | b35116e13f35 |
children | e3f06e45de03 |
files | sequence_based_blocks_search/blocks_finder.py |
diffstat | 1 files changed, 24 insertions(+), 3 deletions(-) [+] |
line diff
1.1 --- a/sequence_based_blocks_search/blocks_finder.py Wed Jun 08 15:15:57 2011 +0400 1.2 +++ b/sequence_based_blocks_search/blocks_finder.py Fri Jun 10 15:33:16 2011 +0400 1.3 @@ -35,7 +35,13 @@ 1.4 return self.term1 - len(self.sequences)*self.term2 + self.term3 1.5 1.6 def __repr__(self): 1.7 - return "preblock:%s, %s, %s, %s"%(self.first_column_index, self.last_column_index, self.calculate_score(), self.sequences) 1.8 + representation_of_sequences = "[" 1.9 + for sequence in self.sequences: 1.10 + if sequence.name: representation_of_sequences += ""+str(sequence.name)+", " 1.11 + else: representation_of_sequences += ""+repr(sequence.name)+", " 1.12 + if representation_of_sequences!="[": representation_of_sequences=representation_of_sequences[:-2]+"]" 1.13 + else: representation_of_sequences+="]" 1.14 + return "<Preblock %s, %s, %s, %s>"%(self.first_column_index, self.last_column_index, self.calculate_score(), representation_of_sequences) 1.15 1.16 1.17 class BreakCycleException(Exception): 1.18 @@ -91,10 +97,15 @@ 1.19 #add one inconserved column to the old ones, merge all together 1.20 for preblock in candidate_preblocks: preblock.term3 += math.log(0.3) - math.log(1) 1.21 candidate_preblocks+=addition_to_candidate_preblocks 1.22 - #remove from candidates those preblocks that share the same set of sequences with others, but are shorter and have smaller score 1.23 + #remove from candidates those preblocks that share the same set of sequences with others, but are shorter and have lower scores 1.24 print index_of_column, len(candidate_preblocks), 1.25 - remove_contained_preblocks(candidate_preblocks) 1.26 + if len(candidate_preblocks)>1000: 1.27 + import cProfile 1.28 + cProfile.runctx('remove_contained_preblocks(candidate_preblocks)',globals(),locals()) 1.29 + else: 1.30 + remove_contained_preblocks(candidate_preblocks) 1.31 print len(candidate_preblocks) 1.32 + remove_bad_candidates(candidate_preblocks, index_of_column, 0.5, 10) 1.33 #for those preblocks ending with a conserved position and having the score above threshold, add new links 1.34 for preblock in candidate_preblocks: 1.35 if preblock.alignment.columns[preblock.last_column_index] is column and preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(alignment.sequences), functional_groups): 1.36 @@ -154,6 +165,16 @@ 1.37 counter-=1 1.38 1.39 1.40 +def remove_bad_candidates(preblocks, index_of_column, relative_weight_threshold, distance_threshold): 1.41 + for preblock in preblocks: 1.42 + if preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(preblock.alignment.sequences), functional_groups): 1.43 + pass 1.44 + else: 1.45 + if preblock.calculate_score() < calculate_threshold(len(preblock.sequences), len(preblock.alignment.sequences), functional_groups)\ 1.46 + and index_of_column - preblock.first_column_index > distance_threshold: 1.47 + preblocks.remove(preblock) 1.48 + 1.49 + 1.50 def calculate_threshold(number_of_sequences_in_preblock, number_of_sequences_in_alignment, functional_groups): 1.51 """ 1.52 We seek to find the probability, that given n randomly generated