allpy
changeset 194:1048b7a728e7
repeats: preorder traversal for repeat_group.
work in progress
author | boris <bnagaev@gmail.com> |
---|---|
date | Thu, 18 Nov 2010 23:51:20 +0300 |
parents | 50633fcbbdc4 |
children | 596bdc5897bf |
files | repeats/repeat_joiner.py |
diffstat | 1 files changed, 71 insertions(+), 4 deletions(-) [+] |
line diff
1.1 --- a/repeats/repeat_joiner.py Thu Nov 18 21:42:16 2010 +0300 1.2 +++ b/repeats/repeat_joiner.py Thu Nov 18 23:51:20 2010 +0300 1.3 @@ -2,8 +2,12 @@ 1.4 1.5 1.6 Throuot: [from, to) 1.7 +from < to 1.8 +ori: True for '+'-strand, - for '-'-strand 1.9 """ 1.10 1.11 +import Queue 1.12 + 1.13 from bx.intervals.intersection import Intersecter, Interval 1.14 1.15 import config 1.16 @@ -14,11 +18,17 @@ 1.17 class Repeat(Interval): 1.18 """ 1.19 chromosome 1.20 + ori 1.21 repeat_pair 1.22 + 1.23 + optional (in result_group): 1.24 + group_start -- position of real_start() of this repeat in group 1.25 + group_ori -- if position of real_start() of this repeat in the group < position of real_end() 1.26 """ 1.27 1.28 - def __init__(self, repeat_joiner, chromosome_name, pos_from, pos_to): 1.29 + def __init__(self, repeat_joiner, chromosome_name, pos_from, pos_to, ori=True): 1.30 Interval.__init__(self, pos_from, pos_to) 1.31 + self.ori = ori 1.32 if chromosome_name not in repeat_pair: 1.33 repeat_pair[chromosome_name] = Chromosome() 1.34 repeat_pair[chromosome_name].insert_interval(self) 1.35 @@ -38,6 +48,18 @@ 1.36 """ visit thick edge at first and then all thin edges """ 1.37 return [self.thick_edge()] + self.thin_edges(min_intersection) 1.38 1.39 + def real_start(self): 1.40 + if self.ori: 1.41 + return self.start 1.42 + else: 1.43 + return self.end - 1 1.44 + 1.45 + def real_end(self): 1.46 + if self.ori: 1.47 + return self.end 1.48 + else: 1.49 + return self.start - 1 1.50 + 1.51 class RepeatPair(tuple): 1.52 """ 1.53 (repeat, repeat) 1.54 @@ -46,17 +68,62 @@ 1.55 repeat1.repeat_pair = self 1.56 repeat2.repeat_pair = self 1.57 1.58 +class ResultGroup(list): 1.59 + """ 1.60 + list of repeats 1.61 + 1.62 + start -- value to add to each result_repeat.start 1.63 + to make minimal position = 0 1.64 + """ 1.65 + 1.66 + def __init__(self): 1.67 + self.start = 0 1.68 + 1.69 class RepeatJoiner(dict): 1.70 """ 1.71 dictionary {<chromosome name>: chromosome} 1.72 + 1.73 + repeat_groups -- list of result_groups 1.74 """ 1.75 + def __init__(self): 1.76 + self.repeat_groups = [] 1.77 1.78 def _treat_repeat(self, used): 1.79 + def expand_repeat(repeat): 1.80 + repeat_group = RepeatGroup() 1.81 + self.repeat_groups.append(repeat_group) 1.82 + q = Queue.Queue() 1.83 + q.put(repeat) 1.84 + repeat.group_ori = True 1.85 + repeat.group_start = repeat.real_start() 1.86 + while not q.empty(): 1.87 + r = q.get() 1.88 + if r not in used: 1.89 + used.add(r) 1.90 + repeat_group.append(repeat) 1.91 + for r1 in r.edges(): 1.92 + if r1 not in used: 1.93 + q.put(r1) 1.94 + if r1.thick_edge() == r: 1.95 + r1.group_start = r.group_start 1.96 + r1.group_ori = r.group_ori 1.97 + else: 1.98 + r1.group_ori = r.group_ori ^ r1.ori ^ r.ori 1.99 + delta = r1.real_start() - r.real_start()) 1.100 + if r1.group_ori: 1.101 + tr1.group_start = r.group_start + delta 1.102 + else: 1.103 + tr1.group_start = r.group_start - delta 1.104 + 1.105 + 1.106 + 1.107 + 1.108 + 1.109 + 1.110 def treat_repeat(repeat): 1.111 if repeat not in used: 1.112 - used.add(repeat) 1.113 - pass 1.114 - return treat_repeat 1.115 + expand_repeat(repeat) 1.116 + return treat_repeat 1.117 1.118 1.119 def full_repeaits(self):