Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/1048b7a728e7
Дата изменения: Unknown
Дата индексирования: Mon Oct 1 23:54:24 2012
Кодировка:
allpy: 1048b7a728e7

allpy

changeset 194:1048b7a728e7

repeats: preorder traversal for repeat_group. work in progress
author boris <bnagaev@gmail.com>
date Thu, 18 Nov 2010 23:51:20 +0300
parents 50633fcbbdc4
children 596bdc5897bf
files repeats/repeat_joiner.py
diffstat 1 files changed, 71 insertions(+), 4 deletions(-) [+]
line diff
     1.1 --- a/repeats/repeat_joiner.py	Thu Nov 18 21:42:16 2010 +0300
     1.2 +++ b/repeats/repeat_joiner.py	Thu Nov 18 23:51:20 2010 +0300
     1.3 @@ -2,8 +2,12 @@
     1.4  
     1.5  
     1.6  Throuot: [from, to)
     1.7 +from < to
     1.8 +ori: True for '+'-strand, - for '-'-strand
     1.9  """
    1.10  
    1.11 +import Queue
    1.12 +
    1.13  from bx.intervals.intersection import Intersecter, Interval
    1.14  
    1.15  import config
    1.16 @@ -14,11 +18,17 @@
    1.17  class Repeat(Interval):
    1.18      """
    1.19      chromosome
    1.20 +    ori
    1.21      repeat_pair
    1.22 +
    1.23 +    optional (in result_group):
    1.24 +        group_start -- position of real_start() of this repeat in group
    1.25 +        group_ori -- if position of real_start() of this repeat in the group < position of real_end()
    1.26      """
    1.27      
    1.28 -    def __init__(self, repeat_joiner, chromosome_name, pos_from, pos_to):
    1.29 +    def __init__(self, repeat_joiner, chromosome_name, pos_from, pos_to, ori=True):
    1.30          Interval.__init__(self, pos_from, pos_to)
    1.31 +        self.ori = ori
    1.32          if chromosome_name not in repeat_pair:
    1.33              repeat_pair[chromosome_name] = Chromosome()
    1.34          repeat_pair[chromosome_name].insert_interval(self)
    1.35 @@ -38,6 +48,18 @@
    1.36          """ visit thick edge at first and then all thin edges """
    1.37          return [self.thick_edge()] + self.thin_edges(min_intersection)
    1.38  
    1.39 +    def real_start(self):
    1.40 +        if self.ori:
    1.41 +            return self.start
    1.42 +        else:
    1.43 +            return self.end - 1
    1.44 +
    1.45 +    def real_end(self):
    1.46 +        if self.ori:
    1.47 +            return self.end
    1.48 +        else:
    1.49 +            return self.start - 1
    1.50 +
    1.51  class RepeatPair(tuple):
    1.52      """
    1.53      (repeat, repeat)
    1.54 @@ -46,17 +68,62 @@
    1.55          repeat1.repeat_pair = self
    1.56          repeat2.repeat_pair = self
    1.57  
    1.58 +class ResultGroup(list):
    1.59 +    """
    1.60 +    list of repeats
    1.61 +    
    1.62 +    start -- value to add to each result_repeat.start
    1.63 +        to make minimal position = 0
    1.64 +    """
    1.65 +    
    1.66 +    def __init__(self):
    1.67 +        self.start = 0
    1.68 +
    1.69  class RepeatJoiner(dict):
    1.70      """
    1.71      dictionary {<chromosome name>: chromosome} 
    1.72 +
    1.73 +    repeat_groups -- list of result_groups
    1.74      """
    1.75 +    def __init__(self):
    1.76 +        self.repeat_groups = []
    1.77  
    1.78      def _treat_repeat(self, used):
    1.79 +        def expand_repeat(repeat):
    1.80 +            repeat_group = RepeatGroup()
    1.81 +            self.repeat_groups.append(repeat_group)
    1.82 +            q = Queue.Queue()
    1.83 +            q.put(repeat)
    1.84 +            repeat.group_ori = True
    1.85 +            repeat.group_start = repeat.real_start()
    1.86 +            while not q.empty():
    1.87 +                r = q.get()
    1.88 +                if r not in used:
    1.89 +                    used.add(r)
    1.90 +                    repeat_group.append(repeat) 
    1.91 +                    for r1 in r.edges():
    1.92 +                        if r1 not in used:
    1.93 +                            q.put(r1)
    1.94 +                            if r1.thick_edge() == r:
    1.95 +                                r1.group_start = r.group_start
    1.96 +                                r1.group_ori = r.group_ori
    1.97 +                            else:
    1.98 +                                r1.group_ori = r.group_ori ^ r1.ori ^ r.ori
    1.99 +                                delta = r1.real_start() - r.real_start())   
   1.100 +                                if r1.group_ori:
   1.101 +                                    tr1.group_start = r.group_start + delta
   1.102 +                                else:
   1.103 +                                    tr1.group_start = r.group_start - delta
   1.104 +
   1.105 +
   1.106 +
   1.107 +
   1.108 +
   1.109 +
   1.110          def treat_repeat(repeat):
   1.111              if repeat not in used:
   1.112 -                used.add(repeat)
   1.113 -                pass
   1.114 -    return treat_repeat
   1.115 +                expand_repeat(repeat)
   1.116 +        return treat_repeat
   1.117  
   1.118  
   1.119      def full_repeaits(self):