Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/675b402094be/lib/sequence.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 02:17:21 2013
Кодировка:
allpy: 675b402094be lib/sequence.py

allpy

view lib/sequence.py @ 151:675b402094be

day commit -- a lot of changes fasta.py: universal save_fasta() determine_long_line -- for determine length of fasta sequence string in user input everywhere: standart long_line=60 --> 70 blocK.sequences_chains: returns sequences in order as in project added monomer pdb_secstr to store secondary structure pdb adding: some improvements and fixes fix in from_pdb_chain: use all peptides, not only first Sequence.pdb_files added to store information about pdb file for each chain dssp bindings to get secondary structure /sec_str -- tool to map secondary structure on each sequence of alignment
author boris (netbook) <bnagaev@gmail.com>
date Tue, 26 Oct 2010 00:40:36 +0400
parents f7dead025719
children 0c7f6117481b
line source
1 #!/usr/bin/python
2 from monomer import AminoAcidType
3 from Bio.PDB import PDBParser, CaPPBuilder, PDBIO
4 from Bio.PDB.DSSP import make_dssp_dict
5 from allpy_pdb import std_id, pdb_id_parse
6 import project
7 import sys
8 import config
9 import os.path
10 import urllib2
11 from tempfile import NamedTemporaryFile
12 import os
15 class Sequence(object):
16 """ Sequence of Monomers
18 Mandatory data:
19 * name -- str with the name of sequence
20 * description -- str with description of the sequence
21 * monomers -- list of monomer objects (aminoacids or nucleotides)
22 * pdb_chains -- list of Bio.PDB.Chain's
23 * pdb_files -- dictionary like {Bio.PDB.Chain: file_obj}
25 ?TODO: global pdb_structures
26 """
27 def __init__(self, monomers=None, name='', description=""):
28 if not monomers:
29 monomers = []
30 self.name = name
31 self.description = description
32 self.monomers = monomers
33 self.pdb_chains = []
34 self.pdb_files = {}
36 def __len__(self):
37 return len(self.monomers)
39 def __str__(self):
40 """ Returns sequence in one-letter code """
41 return ''.join([monomer.type.code1 for monomer in self.monomers])
43 def __eq__(self, other):
44 """ Returns if all corresponding monomers of this sequences are equal
46 If lengths of sequences are not equal, returns False
47 """
48 return len(self) == len(other) and \
49 all([a==b for a, b in zip(self.monomers, other.monomers)])
51 def __ne__(self, other):
52 return not (self == other)
54 def pdb_chain_add(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
55 """ Reads Pdb chain from file
57 and align each Monomer with PDB.Residue (TODO)
58 """
59 name = std_id(pdb_id, pdb_chain, pdb_model)
60 structure = PDBParser().get_structure(name, pdb_file)
61 chain = structure[pdb_model][pdb_chain]
62 self.pdb_chains.append(chain)
63 pdb_sequence = Sequence.from_pdb_chain(chain)
64 alignment = project.Project.from_sequences(self, pdb_sequence)
65 alignment.muscle_align()
66 for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self):
67 if pdb_monomer and chain in pdb_monomer.pdb_residues:
68 monomer.pdb_residue_add(chain, pdb_monomer.pdb_residues[chain])
69 self.pdb_files[chain] = pdb_file
71 @staticmethod
72 def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType):
73 """ Import data from one-letter code
75 monomer_kind is class, inherited from MonomerType
76 """
77 monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str]
78 return Sequence(monomers, name, description)
80 @staticmethod
81 def from_pdb_chain(chain):
82 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
84 chain is Bio.PDB.Chain
85 """
86 cappbuilder = CaPPBuilder()
87 peptides = cappbuilder.build_peptides(chain)
88 sequence = Sequence()
89 sequence.pdb_chains = [chain]
90 for peptide in peptides:
91 for ca_atom in peptide.get_ca_list():
92 residue = ca_atom.get_parent()
93 monomer = AminoAcidType.from_pdb_residue(residue).instance()
94 monomer.pdb_residue_add(chain, residue)
95 sequence.monomers.append(monomer)
96 return sequence
98 def pdb_auto_add(self, conformity_info=None, pdb_directory='./pdb'):
99 """ Adds pdb information to each monomer
101 Returns if information has been successfully added
102 TODO: conformity_file
104 id-format lava flow
105 """
106 if not conformity_info:
107 path = os.path.join(pdb_directory, self.name)
108 if os.path.exists(path):
109 match = pdb_id_parse(self.name)
110 self.pdb_chain_add(open(path), match['code'],
111 match['chain'], match['model'])
112 else:
113 match = pdb_id_parse(self.name)
114 if match:
115 code = match['code']
116 pdb_filename = config.pdb_dir % code
117 if not os.path.exists(pdb_filename):
118 url = config.pdb_url % code
119 print "Download %s" % url
120 pdb_file = open(pdb_filename, 'w')
121 data = urllib2.urlopen(url).read()
122 pdb_file.write(data)
123 pdb_file.close()
124 print "Save %s" % pdb_filename
125 pdb_file = open(pdb_filename)
126 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
128 def pdb_save(self, out_filename, pdb_chain):
129 """ Saves pdb_chain to out_file """
130 class GlySelect(Select):
131 def accept_chain(self, chain):
132 if chain == pdb_chain:
133 return 1
134 else:
135 return 0
136 io = PDBIO()
137 structure = chain.get_parent()
138 io.set_structure(structure)
139 io.save(out_filename, GlySelect())
142 def pdb_add_sec_str(self, pdb_chain):
143 """ Add secondary structure data """
144 tmp_file = NamedTemporaryFile(delete=False)
145 tmp_file.close()
146 pdb_file = self.pdb_files[pdb_chain].name
147 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
148 dssp, keys = make_dssp_dict(tmp_file.name)
149 for monomer in self.monomers:
150 if pdb_chain in monomer.pdb_residues:
151 residue = monomer.pdb_residues[pdb_chain]
152 try:
153 d = dssp[(pdb_chain.get_id(), residue.get_id())]
154 monomer.pdb_secstr[pdb_chain] = d[1]
155 except:
156 print "No dssp information about %s at %s" % (monomer, pdb_chain)
157 os.unlink(tmp_file.name)