Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/596bdc5897bf/allpy/sequence.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 07:40:28 2013
Кодировка:
allpy: 596bdc5897bf allpy/sequence.py

allpy

view allpy/sequence.py @ 195:596bdc5897bf

repeats: function repeat_joiner.full_repeats() seems completed
author boris <bnagaev@gmail.com>
date Fri, 19 Nov 2010 00:17:11 +0300
parents a016cad5f5a0
children 346a6cd4fc1d
line source
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
4 from monomer import AminoAcidType
5 from Bio.PDB import CaPPBuilder, PDBIO
6 from Bio.PDB.DSSP import make_dssp_dict
7 from allpy_pdb import std_id, pdb_id_parse, get_structure
8 import alignment
9 import sys
10 import config
11 import os.path
12 import urllib2
13 from tempfile import NamedTemporaryFile
14 import os
17 class Sequence(object):
18 """ Sequence of Monomers
20 Mandatory data:
21 * name -- str with the name of sequence
22 * description -- str with description of the sequence
23 * monomers -- list of monomer objects (aminoacids or nucleotides)
25 Optional (may be empty):
26 * pdb_chains -- list of Bio.PDB.Chain's
27 * pdb_files -- dictionary like {Bio.PDB.Chain: file_obj}
29 * pdb_residues -- dictionary like {Bio.PDB.Chain: {Monomer: Bio.PDB.Residue}}
30 * pdb_secstr -- dictionary like {Bio.PDB.Chain: {Monomer: 'Secondary structure'}}
31 Code Secondary structure
32 H alpha-helix
33 B Isolated beta-bridge residue
34 E Strand
35 G 3-10 helix
36 I pi-helix
37 T Turn
38 S Bend
39 - Other
42 ?TODO: global pdb_structures
43 """
44 def __init__(self, monomers=None, name='', description=""):
45 if not monomers:
46 monomers = []
47 self.name = name
48 self.description = description
49 self.monomers = monomers
50 self.pdb_chains = []
51 self.pdb_files = {}
52 self.pdb_residues = {}
53 self.pdb_secstr = {}
55 def __len__(self):
56 return len(self.monomers)
58 def __str__(self):
59 """ Returns sequence in one-letter code """
60 return ''.join([monomer.type.code1 for monomer in self.monomers])
62 def __eq__(self, other):
63 """ Returns if all corresponding monomers of this sequences are equal
65 If lengths of sequences are not equal, returns False
66 """
67 return len(self) == len(other) and \
68 all([a==b for a, b in zip(self.monomers, other.monomers)])
70 def __ne__(self, other):
71 return not (self == other)
73 def pdb_chain_add(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
74 """ Reads Pdb chain from file
76 and align each Monomer with PDB.Residue (TODO)
77 """
78 name = std_id(pdb_id, pdb_chain, pdb_model)
79 structure = get_structure(pdb_file, name)
80 chain = structure[pdb_model][pdb_chain]
81 self.pdb_chains.append(chain)
82 self.pdb_residues[chain] = {}
83 self.pdb_secstr[chain] = {}
84 pdb_sequence = Sequence.from_pdb_chain(chain)
85 a = alignment.Alignment.from_sequences(self, pdb_sequence)
86 a.muscle_align()
87 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
88 if pdb_sequence.pdb_has(chain, pdb_monomer):
89 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
90 self.pdb_residues[chain][monomer] = residue
91 self.pdb_files[chain] = pdb_file
93 def pdb_unload(self):
94 """ Delete all pdb-connected links """
95 #~ gc.get_referrers(self.pdb_chains[0])
96 self.pdb_chains = []
97 self.pdb_residues = {}
98 self.pdb_secstr = {} # FIXME
99 self.pdb_files = {} # FIXME
101 @staticmethod
102 def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType):
103 """ Import data from one-letter code
105 monomer_kind is class, inherited from MonomerType
106 """
107 monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str]
108 return Sequence(monomers, name, description)
110 @staticmethod
111 def from_pdb_chain(chain):
112 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
114 chain is Bio.PDB.Chain
115 """
116 cappbuilder = CaPPBuilder()
117 peptides = cappbuilder.build_peptides(chain)
118 sequence = Sequence()
119 sequence.pdb_chains = [chain]
120 sequence.pdb_residues[chain] = {}
121 sequence.pdb_secstr[chain] = {}
122 for peptide in peptides:
123 for ca_atom in peptide.get_ca_list():
124 residue = ca_atom.get_parent()
125 monomer = AminoAcidType.from_pdb_residue(residue).instance()
126 sequence.pdb_residues[chain][monomer] = residue
127 sequence.monomers.append(monomer)
128 return sequence
130 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
131 """ Adds pdb information to each monomer
133 Returns if information has been successfully added
134 TODO: conformity_file
136 id-format lava flow
137 """
138 if not conformity_info:
139 path = os.path.join(pdb_directory, self.name)
140 if os.path.exists(path) and os.path.getsize(path):
141 match = pdb_id_parse(self.name)
142 self.pdb_chain_add(open(path), match['code'],
143 match['chain'], match['model'])
144 else:
145 match = pdb_id_parse(self.name)
146 if match:
147 code = match['code']
148 pdb_filename = config.pdb_dir % code
149 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
150 url = config.pdb_url % code
151 print "Download %s" % url
152 pdb_file = open(pdb_filename, 'w')
153 data = urllib2.urlopen(url).read()
154 pdb_file.write(data)
155 pdb_file.close()
156 print "Save %s" % pdb_filename
157 pdb_file = open(pdb_filename)
158 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
160 def pdb_save(self, out_filename, pdb_chain):
161 """ Saves pdb_chain to out_file """
162 class GlySelect(Select):
163 def accept_chain(self, chain):
164 if chain == pdb_chain:
165 return 1
166 else:
167 return 0
168 io = PDBIO()
169 structure = chain.get_parent()
170 io.set_structure(structure)
171 io.save(out_filename, GlySelect())
174 def pdb_add_sec_str(self, pdb_chain):
175 """ Add secondary structure data """
176 tmp_file = NamedTemporaryFile(delete=False)
177 tmp_file.close()
178 pdb_file = self.pdb_files[pdb_chain].name
179 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
180 dssp, keys = make_dssp_dict(tmp_file.name)
181 for monomer in self.monomers:
182 if self.pdb_has(pdb_chain, monomer):
183 residue = self.pdb_residues[pdb_chain][monomer]
184 try:
185 d = dssp[(pdb_chain.get_id(), residue.get_id())]
186 self.pdb_secstr[pdb_chain][monomer] = d[1]
187 except:
188 print "No dssp information about %s at %s" % (monomer, pdb_chain)
189 os.unlink(tmp_file.name)
191 def pdb_has(self, chain, monomer):
192 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
194 def secstr_has(self, chain, monomer):
195 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
197 @staticmethod
198 def file_slice(file, n_from, n_to, fasta_name='', name='', description='', monomer_kind=AminoAcidType):
199 """ Build and return sequence, consisting of part of sequence from file
201 Does not control gaps
202 """
203 inside = False
204 number_used = 0
205 s = ''
206 for line in file:
207 line = line.split()
208 if not inside:
209 if line.startswith('>%s' % fasta_name):
210 inside = True
211 else:
212 n = len(line)
213 s += line[(n_from - number_user):(n_to - number_user)]
214 return Sequence.from_str(s, name, description, monomer_kind)