Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/0c7f6117481b/lib/graph.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:38:25 2014
Кодировка:
allpy: lib/graph.py annotate

allpy

annotate lib/graph.py @ 153:0c7f6117481b

idea implemented: pdb and sec_str data moved from monomer to sequence
author boris <bnagaev@gmail.com>
date Tue, 26 Oct 2010 21:53:23 +0400
parents db9d116e979f
children
rev   line source
bnagaev@116 1 # -*- coding: utf-8 -*-
bnagaev@116 2
bnagaev@121 3 from datetime import datetime, timedelta
bnagaev@116 4 from copy import copy
bnagaev@116 5
bnagaev@116 6 class TimeoutError(Exception):
bnagaev@116 7 pass
bnagaev@116 8
bnagaev@116 9
bnagaev@116 10
bnagaev@116 11 class Graph(object):
bnagaev@147 12 """ Undirected weighted graph
bnagaev@147 13
bnagaev@116 14 Data:
bnagaev@116 15 nodes -- set of elements
bnagaev@116 16 lines -- {line: cost}.
bnagaev@116 17 line is frozenset([e1, e2])
bnagaev@116 18 cost is float in (0, 1] or 1 (if all lines are equal)
bnagaev@116 19
bnagaev@116 20 >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1})
bnagaev@116 21 >>> g.fast_cliques()
bnagaev@116 22 Fast algorithm started
bnagaev@116 23 [frozenset([1, 2]), frozenset([3])]
bnagaev@119 24 >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1, frozenset([1,1]): 1})
bnagaev@119 25 >>> g.fast_cliques()
bnagaev@119 26 Fast algorithm started
bnagaev@119 27 [frozenset([1, 2]), frozenset([3])]
bnagaev@116 28 >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98,
bnagaev@119 29 ... frozenset([2,3]): 0.1, frozenset([1,1]): 1})
bnagaev@116 30 >>> g.fast_cliques()
bnagaev@116 31 Fast algorithm started
bnagaev@116 32 [frozenset([1, 2, 3]), frozenset([4])]
bnagaev@116 33 >>> g.bron_kerbosh()
bnagaev@116 34 Bron and Kerbosh algorithm started
bnagaev@116 35 [frozenset([1, 2, 3]), frozenset([4])]
bnagaev@119 36 >>> g.cliques()
bnagaev@119 37 Bron and Kerbosh algorithm started
bnagaev@119 38 [frozenset([1, 2, 3])]
bnagaev@116 39 """
bnagaev@116 40
bnagaev@116 41 def __init__(self, nodes=None, lines=None):
bnagaev@116 42 if not nodes:
bnagaev@116 43 nodes = set()
bnagaev@116 44 if not lines:
bnagaev@116 45 lines = dict()
bnagaev@121 46 self.nodes = set(nodes) # copy
bnagaev@119 47 self.lines = {}
bnagaev@119 48 for line, cost in lines.items():
bnagaev@119 49 if len(line) == 2 and line.issubset(self.nodes):
bnagaev@119 50 self.lines[line] = cost
bnagaev@119 51
bnagaev@117 52 @staticmethod
bnagaev@117 53 def line(k1, k2):
bnagaev@150 54 """ Construct object, representing line of graph """
bnagaev@117 55 return frozenset([k1, k2])
bnagaev@116 56
bnagaev@116 57 def bounded(self, k1, k2):
bnagaev@150 58 """ Return if these two nodes of the graph are bounded with line """
bnagaev@117 59 return k1 == k2 or Graph.line(k1, k2) in self.lines
bnagaev@116 60
bnagaev@116 61 def count_one(self, node):
bnagaev@147 62 """ Returns number of connections of this node """
bnagaev@117 63 return len([node1 for node1 in self.nodes if self.bounded(node, node1)]) - 1
bnagaev@116 64
bnagaev@116 65 def cost_one(self, node):
bnagaev@147 66 """ Returns sum of costs of all connections of this node """
bnagaev@121 67 return sum([self.lines.get(Graph.line(node, node1), 0)
bnagaev@121 68 for node1 in self.nodes if node != node1])
bnagaev@116 69
bnagaev@116 70 def count_all(self):
bnagaev@147 71 """ Returns {node: number of connections of this node} """
bnagaev@116 72 c = dict([(node, 0) for node in self.nodes])
bnagaev@116 73 for line in self.lines:
bnagaev@116 74 for node in line:
bnagaev@116 75 c[node] += 1
bnagaev@116 76 return c
bnagaev@116 77
bnagaev@116 78
bnagaev@116 79 def drop_node(self, node):
bnagaev@147 80 """ Remove node and all involved lines """
bnagaev@116 81 for node1 in self.nodes:
bnagaev@117 82 self.lines.pop(Graph.line(node, node1), None)
bnagaev@116 83 self.nodes.discard(node)
bnagaev@116 84
bnagaev@116 85 def add_node(self, node, parent_graph):
bnagaev@147 86 """ Add node and corresponding lines from parent_graph
bnagaev@147 87
bnagaev@118 88 Added lines should be contained in self graph
bnagaev@118 89 (takes care of hanging lines)
bnagaev@118 90 """
bnagaev@116 91 self.nodes.add(node)
bnagaev@116 92 for node1 in self.nodes:
bnagaev@117 93 line = Graph.line(node, node1)
bnagaev@116 94 if line in parent_graph.lines:
bnagaev@117 95 self.lines[line] = parent_graph.lines[line]
bnagaev@116 96
bnagaev@116 97 def drop_nodes(self, nodes):
bnagaev@147 98 """ Run drop_node for each of given nodes
bnagaev@150 99
bnagaev@118 100 Returns if nodes was not empty (ugly beauty)
bnagaev@118 101 """
bnagaev@116 102 for node in nodes:
bnagaev@116 103 self.drop_node(node)
bnagaev@116 104 return bool(nodes)
bnagaev@116 105
bnagaev@116 106 def drop_if_count(self, minsize):
bnagaev@147 107 """ Run drop_node for each node, that has less than minsize lines """
bnagaev@116 108 while True:
bnagaev@121 109 if not self.drop_nodes([node for (node, count)
bnagaev@121 110 in self.count_all().items() if count < minsize]):
bnagaev@116 111 break
bnagaev@116 112
bnagaev@116 113 def bron_kerbosh(self, timeout=-1, minsize=1):
bnagaev@147 114 """ Bron and Kerboch algorithm implementation
bnagaev@147 115
bnagaev@116 116 returns list of cliques
bnagaev@116 117 clique is frozenset
bnagaev@116 118 if timeout=-1, it means infinity
bnagaev@116 119 if timeout has happened, raises TimeoutError
bnagaev@116 120
bnagaev@116 121 lava flow
bnagaev@116 122 """
bnagaev@116 123 print 'Bron and Kerbosh algorithm started'
bnagaev@116 124 cliques = []
bnagaev@116 125
bnagaev@116 126 depth = 0
bnagaev@116 127 list_candidates = [copy(self.nodes)]
bnagaev@116 128 list_used = [set()]
bnagaev@116 129 compsub = []
bnagaev@116 130
bnagaev@116 131 start_time = datetime.now()
bnagaev@121 132 timeout_timedelta = timedelta(timeout)
bnagaev@121 133
bnagaev@116 134 while True: # ????????...
bnagaev@116 135 if depth == -1:
bnagaev@116 136 break # ??????! ?????? ???????????????? (????????????????) ????????????????
bnagaev@116 137 candidates = copy(list_candidates[depth])
bnagaev@116 138 used = copy(list_used[depth])
bnagaev@116 139 if not candidates: # ???????? candidates ???? ??????????
bnagaev@116 140 depth -= 1
bnagaev@116 141 if compsub:
bnagaev@116 142 compsub.pop()
bnagaev@116 143 continue
bnagaev@116 144
bnagaev@116 145 # ?? used ???? ???????????????? ??????????????, ?????????????????????? ???? ?????????? ?????????????????? ???? candidates
bnagaev@116 146 # (?????? ???? used ???? ?????????????????? ???????? ???? ?? 1 ???? candidates)
bnagaev@116 147 used_candidates = False
bnagaev@116 148
bnagaev@116 149 for used1 in used:
bnagaev@116 150 for candidates1 in candidates:
bnagaev@116 151 if not self.bounded(used1, candidates1):
bnagaev@116 152 break
bnagaev@116 153 else:
bnagaev@116 154 used_candidates = True
bnagaev@116 155
bnagaev@116 156 if used_candidates:
bnagaev@116 157 depth -= 1
bnagaev@116 158
bnagaev@116 159 if compsub:
bnagaev@116 160 compsub.pop()
bnagaev@116 161 continue
bnagaev@116 162
bnagaev@116 163 # ???????????????? ?????????????? v ???? candidates ?? ?????????????????? ???? ?? compsub
bnagaev@116 164 v = candidates.pop()
bnagaev@116 165 candidates.add(v)
bnagaev@116 166 compsub.append(v)
bnagaev@116 167 # ?????????????????? new_candidates ?? new_used, ???????????? ???? candidates ?? used ??????????????, ???? ?????????????????????? ?? v
bnagaev@116 168 # (???? ????????, ???????????????? ???????????? ?????????????????????? ?? v)
bnagaev@116 169 new_candidates = set()
bnagaev@116 170 for candidates1 in candidates:
bnagaev@116 171 if self.bounded(candidates1, v) and candidates1 != v:
bnagaev@116 172 new_candidates.add(candidates1)
bnagaev@116 173
bnagaev@116 174 new_used = set()
bnagaev@116 175 for used1 in used:
bnagaev@116 176 if self.bounded(used1, v) and used1 != v:
bnagaev@116 177 new_used.add(used1)
bnagaev@116 178
bnagaev@116 179 # ?????????????? v ???? candidates ?? ???????????????? ?? used
bnagaev@116 180 list_candidates[depth].remove(v)
bnagaev@116 181 list_used[depth].add(v)
bnagaev@116 182 # ???????? new_candidates ?? new_used ??????????
bnagaev@116 183 if not new_candidates and not new_used:
bnagaev@116 184 # compsub ??? ??????????
bnagaev@116 185 if len(compsub) >= minsize:
bnagaev@116 186 cliques.append(frozenset(compsub))
bnagaev@116 187 else:
bnagaev@116 188 # ?????????? ???????????????????? ???????????????? bron_kerbosh(new_candidates, new_used)
bnagaev@116 189 depth += 1
bnagaev@116 190
bnagaev@116 191 # TIMEOUT check start
bnagaev@116 192 if timeout != -1:
bnagaev@121 193 if datetime.now() - start_time > timeout_timedelta:
bnagaev@116 194 raise TimeoutError
bnagaev@116 195 # TIMEOUT check end
bnagaev@116 196
bnagaev@116 197 if depth >= len(list_candidates):
bnagaev@116 198 list_candidates.append(set())
bnagaev@116 199 list_used.append(set())
bnagaev@116 200
bnagaev@116 201 list_candidates[depth] = copy(new_candidates)
bnagaev@116 202 list_used[depth] = copy(new_used)
bnagaev@116 203
bnagaev@116 204 continue
bnagaev@116 205
bnagaev@116 206 # ?????????????? v ???? compsub
bnagaev@116 207 if compsub:
bnagaev@116 208 compsub.pop()
bnagaev@116 209
bnagaev@116 210 return cliques
bnagaev@116 211
bnagaev@116 212
bnagaev@116 213 def fast_cliques(self, minsize=1):
bnagaev@147 214 """ returns list of cliques
bnagaev@147 215
bnagaev@116 216 clique is frozenset
bnagaev@116 217 """
bnagaev@116 218 print 'Fast algorithm started'
bnagaev@116 219 cliques = []
bnagaev@116 220
bnagaev@116 221 while True:
bnagaev@117 222 graph = Graph(self.nodes, self.lines)
bnagaev@116 223 for clique in cliques:
bnagaev@116 224 graph.drop_nodes(clique)
bnagaev@119 225 if not graph.nodes:
bnagaev@116 226 break
bnagaev@116 227
bnagaev@116 228 while True:
bnagaev@116 229 # drop nodes, while its is possible
bnagaev@119 230 if len(graph.nodes) == 1:
bnagaev@119 231 break
bnagaev@116 232 c = graph.count_all()
bnagaev@116 233 min_count = min(c.values())
bnagaev@116 234 bad_nodes = [node for (node, count) in c.items() if count == min_count]
bnagaev@119 235 if len(bad_nodes) == len(graph.nodes) and min_count != 0:
bnagaev@116 236 break
bnagaev@116 237
bnagaev@116 238 costs = dict([(node, graph.cost_one(node)) for node in bad_nodes])
bnagaev@116 239 min_cost = min(costs.values())
bnagaev@116 240 for node, cost in costs.items():
bnagaev@116 241 if cost == min_cost:
bnagaev@116 242 graph.drop_node(node)
bnagaev@119 243 break
bnagaev@116 244
bnagaev@116 245 while True:
bnagaev@116 246 # add nodes, while its is possible
bnagaev@116 247 candidats = {}
bnagaev@116 248 for node in self.nodes:
bnagaev@117 249 c = len([i for i in graph.nodes if self.bounded(node, i)])
bnagaev@117 250 if c == len(self.nodes):
bnagaev@117 251 graph1 = Graph(graph.nodes, graph.lines)
bnagaev@116 252 graph1.add_node(node, self)
bnagaev@116 253 candidats[node] = graph1.cost_one(node)
bnagaev@116 254 if not candidats:
bnagaev@116 255 break
bnagaev@116 256
bnagaev@116 257 max_cost = max(candidats.values())
bnagaev@116 258 node = [node for (node, cost) in candidats.items() if cost == max_cost][0]
bnagaev@116 259 graph.add_node(node, self)
bnagaev@116 260
bnagaev@116 261 cliques.append(frozenset(graph.nodes))
bnagaev@116 262
bnagaev@116 263 return cliques
bnagaev@116 264
bnagaev@116 265
bnagaev@116 266 def cliques(self, timeout=-1, minsize=1):
bnagaev@147 267 """ returns length-sorted list of cliques
bnagaev@147 268
bnagaev@116 269 clique is frozenset
bnagaev@116 270
bnagaev@116 271 can change self!
bnagaev@116 272
bnagaev@116 273 try to execute bron_kerbosh
bnagaev@116 274 if it raises TimeoutError, executes fast_cliques
bnagaev@116 275 """
bnagaev@116 276
bnagaev@117 277 self.drop_if_count(minsize)
bnagaev@116 278
bnagaev@116 279 try:
bnagaev@117 280 cliques = self.bron_kerbosh(timeout, minsize)
bnagaev@117 281 cliques.sort(key=lambda clique: len(clique), reverse=True)
bnagaev@116 282 except TimeoutError:
bnagaev@117 283 cliques = self.fast_cliques(minsize)
bnagaev@117 284 return cliques
bnagaev@116 285
bnagaev@116 286 if __name__ == "__main__":
bnagaev@116 287 import doctest
bnagaev@116 288 doctest.testmod()