allpy: c84a7840f9b8 lib/block.py

allpy

view lib/block.py @ 125:c84a7840f9b8

Backed out changeset 9f96bc38bc3d There can be more than one GC!!

author	boris <bnagaev@gmail.com>
date	Sat, 23 Oct 2010 23:50:50 +0400
parents	9f96bc38bc3d
children	69e1650feb6b

line source

1 #!usr/bin/python

3 import sys

5 import project

6 import sequence

7 import monomer

8 import config

9 from graph import Graph

11 class Block(object):

12 """

13 Mandatory data:

14 * self.project -- project object, which the block belongs to

15 * self.sequences - set of sequence objects that contain monomers

16 and/or gaps, that constitute the block

17 * self.positions -- sorted list of positions of the project.alignment that

18 are included in the block

20 How to create a new block:

21 >>> import project

22 >>> import block

23 >>> proj = project.Project(open("test.fasta"))

24 >>> block1 = block.Block(proj)

25 """

27 def __init__(self, project, sequences=None, positions=None):

28 """

29 Builds new block from project

30 if sequences==None, all sequences are used

31 if positions==None, all positions are used

32 """

33 if sequences == None:

34 sequences = project.sequences

35 if positions == None:

36 positions = range(len(project))

37 self.project = project

38 self.sequences = sequences

39 self.positions = positions

41 def save_fasta(self, out_file, long_line=60):

42 """

43 Saves alignment to given file in fasta-format

44 Splits long lines to substrings of length=long_line

45 To prevent this, set long_line=None

47 No changes in the names, descriptions or order of the sequences

48 are made.

49 """

50 for sequence in self.sequences:

51 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)

52 alignment_monomers = self.project.alignment[sequence]

53 block_monomers = [alignment_monomers[i] for i in self.positions]

54 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])

55 if long_line:

56 for i in range(0, len(string) // long_line + 1):

57 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])

58 else:

59 out_file.write("%s \n" % string)

61 def geometrical_core(self, max_delta=config.delta,

62 timeout=config.timeout, minsize=config.minsize):

63 """

64 returns sorted list of positions, representing geometrical core

65 delta -- threshold of distance spreading

67 If more than one pdb chain for some sequence provided, consider all of them

68 cost is calculated as 1 / (delta + 1)

69 delta in [0, +inf) => cost in (0, 1]

70 """

71 nodes = self.positions

72 lines = {}

73 for i in self.positions:

74 for j in self.positions:

75 if i < j:

76 distances = []

77 for sequence in self.sequences:

78 for chain in sequence.pdb_chains:

79 m1 = self.project.alignment[sequence][i]

80 m2 = self.project.alignment[sequence][j]

81 if m1 and m2:

82 ca1 = m1.pdb_residues[chain]['CA']

83 ca2 = m2.pdb_residues[chain]['CA']

84 d = ca1 - ca2 # Bio.PDB feature

85 distances.append(d)

86 if len(distances) >= 2:

87 delta = max(distances) - min(distances)

88 if delta <= max_delta:

89 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)

90 graph = Graph(nodes, lines)

91 return graph.cliques(timeout=timeout, minsize=minsize)

93 def xstring(self, x):

94 """

95 Returns string consisting of '-' and chars x at self.positions

96 Length of returning string = length of project

97 """

98 monomers = [False] * len(self.project)

99 for i in self.positions:

100 monomers[i] = True

101 return ''.join([x if m else '-' for m in monomers])