Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/0ffdb88c13bd/lib/sequence.py
Дата изменения: Unknown
Дата индексирования: Sat Mar 1 22:45:46 2014
Кодировка:
allpy: 0ffdb88c13bd lib/sequence.py

allpy

view lib/sequence.py @ 161:0ffdb88c13bd

fix fasta.long_line
author boris (netbook) <bnagaev@gmail.com>
date Thu, 28 Oct 2010 19:33:15 +0400
parents 2ed0c183867a
children 2deb7942fbed
line source
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
4 from monomer import AminoAcidType
5 from Bio.PDB import CaPPBuilder, PDBIO
6 from Bio.PDB.DSSP import make_dssp_dict
7 from allpy_pdb import std_id, pdb_id_parse, get_structure
8 import project
9 import sys
10 import config
11 import os.path
12 import urllib2
13 from tempfile import NamedTemporaryFile
14 import os
17 class Sequence(object):
18 """ Sequence of Monomers
20 Mandatory data:
21 * name -- str with the name of sequence
22 * description -- str with description of the sequence
23 * monomers -- list of monomer objects (aminoacids or nucleotides)
24 * pdb_chains -- list of Bio.PDB.Chain's
25 * pdb_files -- dictionary like {Bio.PDB.Chain: file_obj}
27 * pdb_residues -- dictionary like {Bio.PDB.Chain: {Monomer: Bio.PDB.Residue}}
28 * pdb_secstr -- dictionary like {Bio.PDB.Chain: {Monomer: 'Secondary structure'}}
29 Code Secondary structure
30 H ??-helix
31 B Isolated ??-bridge residue
32 E Strand
33 G 3-10 helix
34 I ??-helix
35 T Turn
36 S Bend
37 - Other
40 ?TODO: global pdb_structures
41 """
42 def __init__(self, monomers=None, name='', description=""):
43 if not monomers:
44 monomers = []
45 self.name = name
46 self.description = description
47 self.monomers = monomers
48 self.pdb_chains = []
49 self.pdb_files = {}
50 self.pdb_residues = {}
51 self.pdb_secstr = {}
53 def __len__(self):
54 return len(self.monomers)
56 def __str__(self):
57 """ Returns sequence in one-letter code """
58 return ''.join([monomer.type.code1 for monomer in self.monomers])
60 def __eq__(self, other):
61 """ Returns if all corresponding monomers of this sequences are equal
63 If lengths of sequences are not equal, returns False
64 """
65 return len(self) == len(other) and \
66 all([a==b for a, b in zip(self.monomers, other.monomers)])
68 def __ne__(self, other):
69 return not (self == other)
71 def pdb_chain_add(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
72 """ Reads Pdb chain from file
74 and align each Monomer with PDB.Residue (TODO)
75 """
76 name = std_id(pdb_id, pdb_chain, pdb_model)
77 structure = get_structure(pdb_file, name)
78 chain = structure[pdb_model][pdb_chain]
79 self.pdb_chains.append(chain)
80 self.pdb_residues[chain] = {}
81 self.pdb_secstr[chain] = {}
82 pdb_sequence = Sequence.from_pdb_chain(chain)
83 alignment = project.Project.from_sequences(self, pdb_sequence)
84 alignment.muscle_align()
85 for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self):
86 if pdb_sequence.pdb_has(chain, pdb_monomer):
87 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
88 self.pdb_residues[chain][monomer] = residue
89 self.pdb_files[chain] = pdb_file
91 def pdb_unload(self):
92 """ Delete all pdb-connected links """
93 #~ gc.get_referrers(self.pdb_chains[0])
94 self.pdb_chains = []
95 self.pdb_residues = {}
96 self.pdb_secstr = {} # FIXME
97 self.pdb_files = {} # FIXME
99 @staticmethod
100 def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType):
101 """ Import data from one-letter code
103 monomer_kind is class, inherited from MonomerType
104 """
105 monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str]
106 return Sequence(monomers, name, description)
108 @staticmethod
109 def from_pdb_chain(chain):
110 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
112 chain is Bio.PDB.Chain
113 """
114 cappbuilder = CaPPBuilder()
115 peptides = cappbuilder.build_peptides(chain)
116 sequence = Sequence()
117 sequence.pdb_chains = [chain]
118 sequence.pdb_residues[chain] = {}
119 sequence.pdb_secstr[chain] = {}
120 for peptide in peptides:
121 for ca_atom in peptide.get_ca_list():
122 residue = ca_atom.get_parent()
123 monomer = AminoAcidType.from_pdb_residue(residue).instance()
124 sequence.pdb_residues[chain][monomer] = residue
125 sequence.monomers.append(monomer)
126 return sequence
128 def pdb_auto_add(self, conformity_info=None, pdb_directory='./pdb'):
129 """ Adds pdb information to each monomer
131 Returns if information has been successfully added
132 TODO: conformity_file
134 id-format lava flow
135 """
136 if not conformity_info:
137 path = os.path.join(pdb_directory, self.name)
138 if os.path.exists(path):
139 match = pdb_id_parse(self.name)
140 self.pdb_chain_add(open(path), match['code'],
141 match['chain'], match['model'])
142 else:
143 match = pdb_id_parse(self.name)
144 if match:
145 code = match['code']
146 pdb_filename = config.pdb_dir % code
147 if not os.path.exists(pdb_filename):
148 url = config.pdb_url % code
149 print "Download %s" % url
150 pdb_file = open(pdb_filename, 'w')
151 data = urllib2.urlopen(url).read()
152 pdb_file.write(data)
153 pdb_file.close()
154 print "Save %s" % pdb_filename
155 pdb_file = open(pdb_filename)
156 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
158 def pdb_save(self, out_filename, pdb_chain):
159 """ Saves pdb_chain to out_file """
160 class GlySelect(Select):
161 def accept_chain(self, chain):
162 if chain == pdb_chain:
163 return 1
164 else:
165 return 0
166 io = PDBIO()
167 structure = chain.get_parent()
168 io.set_structure(structure)
169 io.save(out_filename, GlySelect())
172 def pdb_add_sec_str(self, pdb_chain):
173 """ Add secondary structure data """
174 tmp_file = NamedTemporaryFile(delete=False)
175 tmp_file.close()
176 pdb_file = self.pdb_files[pdb_chain].name
177 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
178 dssp, keys = make_dssp_dict(tmp_file.name)
179 for monomer in self.monomers:
180 if self.pdb_has(pdb_chain, monomer):
181 residue = self.pdb_residues[pdb_chain][monomer]
182 try:
183 d = dssp[(pdb_chain.get_id(), residue.get_id())]
184 self.pdb_secstr[pdb_chain][monomer] = d[1]
185 except:
186 print "No dssp information about %s at %s" % (monomer, pdb_chain)
187 os.unlink(tmp_file.name)
189 def pdb_has(self, chain, monomer):
190 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
192 def secstr_has(self, chain, monomer):
193 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]