allpy
diff allpy/base.py @ 274:9ea5099309b8
Moved PDB parts of allpy.base.Sequence to allpy.pdb.Sequence
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Wed, 15 Dec 2010 19:19:11 +0300 |
parents | bda87d4c525e |
children | 4a2341bc90b1 |
line diff
1.1 --- a/allpy/base.py Wed Dec 15 18:58:54 2010 +0300 1.2 +++ b/allpy/base.py Wed Dec 15 19:19:11 2010 +0300 1.3 @@ -6,9 +6,7 @@ 1.4 1.5 import config 1.6 from graph import Graph 1.7 -from Bio.PDB import Superimposer, CaPPBuilder, PDBIO 1.8 from Bio.PDB.DSSP import make_dssp_dict 1.9 -from allpy.pdb import std_id, pdb_id_parse, get_structure 1.10 from fasta import save_fasta 1.11 import data.codes 1.12 1.13 @@ -127,46 +125,24 @@ 1.14 return self.type is other 1.15 1.16 class Sequence(list): 1.17 - """ Sequence of Monomers 1.18 + """Sequence of Monomers. 1.19 1.20 - list of monomer objects (aminoacids or nucleotides) 1.21 + This behaves like list of monomer objects. In addition to standard list 1.22 + behaviour, Sequence has the following attributes: 1.23 1.24 - Mandatory data: 1.25 + * name -- str with the name of the sequence 1.26 + * description -- str with description of the sequence 1.27 + * source -- str denoting source of the sequence 1.28 1.29 - * name -- str with the name of sequence 1.30 - * description -- str with description of the sequence 1.31 + Any of them may be empty (i.e. hold empty string) 1.32 + """ 1.33 1.34 - Optional (may be empty): 1.35 - 1.36 - * source -- source of sequence 1.37 - * pdb_chain -- Bio.PDB.Chain 1.38 - * pdb_file -- file object 1.39 - 1.40 - * pdb_residues -- {Monomer: Bio.PDB.Residue} 1.41 - * pdb_secstr -- {Monomer: 'Secondary structure'} 1.42 - Code Secondary structure 1.43 - H alpha-helix 1.44 - B Isolated beta-bridge residue 1.45 - E Strand 1.46 - G 3-10 helix 1.47 - I pi-helix 1.48 - T Turn 1.49 - S Bend 1.50 - - Other 1.51 - 1.52 - 1.53 - ?TODO: global pdb_structures 1.54 - """ 1.55 def __init__(self, monomers=None, name='', description=""): 1.56 if not monomers: 1.57 monomers = [] 1.58 self.name = name 1.59 self.description = description 1.60 self.monomers = monomers 1.61 - self.pdb_chains = [] 1.62 - self.pdb_files = {} 1.63 - self.pdb_residues = {} 1.64 - self.pdb_secstr = {} 1.65 1.66 def __len__(self): 1.67 return len(self.monomers) 1.68 @@ -186,34 +162,6 @@ 1.69 def __ne__(self, other): 1.70 return not (self == other) 1.71 1.72 - def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0): 1.73 - """ Reads Pdb chain from file 1.74 - 1.75 - and align each Monomer with PDB.Residue (TODO) 1.76 - """ 1.77 - name = std_id(pdb_id, pdb_chain, pdb_model) 1.78 - structure = get_structure(pdb_file, name) 1.79 - chain = structure[pdb_model][pdb_chain] 1.80 - self.pdb_chains.append(chain) 1.81 - self.pdb_residues[chain] = {} 1.82 - self.pdb_secstr[chain] = {} 1.83 - pdb_sequence = Sequence.from_pdb_chain(chain) 1.84 - a = alignment.Alignment.from_sequences(self, pdb_sequence) 1.85 - a.muscle_align() 1.86 - for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self): 1.87 - if pdb_sequence.pdb_has(chain, pdb_monomer): 1.88 - residue = pdb_sequence.pdb_residues[chain][pdb_monomer] 1.89 - self.pdb_residues[chain][monomer] = residue 1.90 - self.pdb_files[chain] = pdb_file 1.91 - 1.92 - def pdb_unload(self): 1.93 - """ Delete all pdb-connected links """ 1.94 - #~ gc.get_referrers(self.pdb_chains[0]) 1.95 - self.pdb_chains = [] 1.96 - self.pdb_residues = {} 1.97 - self.pdb_secstr = {} # FIXME 1.98 - self.pdb_files = {} # FIXME 1.99 - 1.100 @classmethod 1.101 def from_string(cls, string, name='', description=''): 1.102 """Create sequences from string of one-letter codes.""" 1.103 @@ -221,90 +169,6 @@ 1.104 monomers = [monomer(letter) for letter in string] 1.105 return cls(monomers, name, description) 1.106 1.107 - @staticmethod 1.108 - def from_pdb_chain(chain): 1.109 - """ Returns Sequence with Monomers with link to Bio.PDB.Residue 1.110 - 1.111 - chain is Bio.PDB.Chain 1.112 - """ 1.113 - cappbuilder = CaPPBuilder() 1.114 - peptides = cappbuilder.build_peptides(chain) 1.115 - sequence = Sequence() 1.116 - sequence.pdb_chains = [chain] 1.117 - sequence.pdb_residues[chain] = {} 1.118 - sequence.pdb_secstr[chain] = {} 1.119 - for peptide in peptides: 1.120 - for ca_atom in peptide.get_ca_list(): 1.121 - residue = ca_atom.get_parent() 1.122 - monomer = AminoAcidType.from_pdb_residue(residue).instance() 1.123 - sequence.pdb_residues[chain][monomer] = residue 1.124 - sequence.monomers.append(monomer) 1.125 - return sequence 1.126 - 1.127 - def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'): 1.128 - """ Adds pdb information to each monomer 1.129 - 1.130 - Returns if information has been successfully added 1.131 - TODO: conformity_file 1.132 - 1.133 - id-format lava flow 1.134 - """ 1.135 - if not conformity_info: 1.136 - path = os.path.join(pdb_directory, self.name) 1.137 - if os.path.exists(path) and os.path.getsize(path): 1.138 - match = pdb_id_parse(self.name) 1.139 - self.pdb_chain_add(open(path), match['code'], 1.140 - match['chain'], match['model']) 1.141 - else: 1.142 - match = pdb_id_parse(self.name) 1.143 - if match: 1.144 - code = match['code'] 1.145 - pdb_filename = config.pdb_dir % code 1.146 - if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename): 1.147 - url = config.pdb_url % code 1.148 - print "Download %s" % url 1.149 - pdb_file = open(pdb_filename, 'w') 1.150 - data = urllib2.urlopen(url).read() 1.151 - pdb_file.write(data) 1.152 - pdb_file.close() 1.153 - print "Save %s" % pdb_filename 1.154 - pdb_file = open(pdb_filename) 1.155 - self.pdb_chain_add(pdb_file, code, match['chain'], match['model']) 1.156 - 1.157 - def pdb_save(self, out_filename, pdb_chain): 1.158 - """ Saves pdb_chain to out_file """ 1.159 - class GlySelect(Select): 1.160 - def accept_chain(self, chain): 1.161 - if chain == pdb_chain: 1.162 - return 1 1.163 - else: 1.164 - return 0 1.165 - io = PDBIO() 1.166 - structure = chain.get_parent() 1.167 - io.set_structure(structure) 1.168 - io.save(out_filename, GlySelect()) 1.169 - 1.170 - 1.171 - def pdb_add_sec_str(self, pdb_chain): 1.172 - """ Add secondary structure data """ 1.173 - tmp_file = NamedTemporaryFile(delete=False) 1.174 - tmp_file.close() 1.175 - pdb_file = self.pdb_files[pdb_chain].name 1.176 - os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name}) 1.177 - dssp, keys = make_dssp_dict(tmp_file.name) 1.178 - for monomer in self.monomers: 1.179 - if self.pdb_has(pdb_chain, monomer): 1.180 - residue = self.pdb_residues[pdb_chain][monomer] 1.181 - try: 1.182 - d = dssp[(pdb_chain.get_id(), residue.get_id())] 1.183 - self.pdb_secstr[pdb_chain][monomer] = d[1] 1.184 - except: 1.185 - print "No dssp information about %s at %s" % (monomer, pdb_chain) 1.186 - os.unlink(tmp_file.name) 1.187 - 1.188 - def pdb_has(self, chain, monomer): 1.189 - return chain in self.pdb_residues and monomer in self.pdb_residues[chain] 1.190 - 1.191 def secstr_has(self, chain, monomer): 1.192 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain] 1.193