allpy

changeset 385:d7fc6865ce58
refactoring of pdb-related code * remove (transiently) secstr from pdb.py * refactoring of pdb loading * create module protein_pdb with 4 classes with both properties * make geometrical-core work (with errors and without saving to fasta)
author: boris <bnagaev@gmail.com>
date: Wed, 02 Feb 2011 15:50:20 +0300
parents: 01f9e9c3493e
children: 06659228cd6f
files: allpy/config.py allpy/pdb.py geometrical_core/geometrical-core geometrical_core/protein_pdb.py
diffstat: 4 files changed, 154 insertions(+), 243 deletions(-) [+]
[-]

allpy/config.py 2

allpy/pdb.py 242

geometrical_core/geometrical-core 136

geometrical_core/protein_pdb.py 17 allpy/config.py 2 allpy/pdb.py 242 geometrical_core/geometrical-core 136 geometrical_core/protein_pdb.py 17
allpy/config.py 2
allpy/pdb.py 242
     1.1 --- a/allpy/config.py	Wed Feb 02 15:37:15 2011 +0300
     1.2 +++ b/allpy/config.py	Wed Feb 02 15:50:20 2011 +0300
     1.3 @@ -5,7 +5,7 @@
     1.4  
     1.5  # pdb download url (XXXX is pdb code place)
     1.6  pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb'
     1.7 -pdb_dir = '/tmp/%s.pdb'
     1.8 +pdb_path = '/tmp/%s.pdb'
     1.9  timeout = 10 # time in sec. for BRON-KERBOSH algorithm
    1.10  
    1.11  

     2.1 --- a/allpy/pdb.py	Wed Feb 02 15:37:15 2011 +0300
     2.2 +++ b/allpy/pdb.py	Wed Feb 02 15:50:20 2011 +0300
     2.3 @@ -17,23 +17,14 @@
     2.4  from Bio.PDB.DSSP import make_dssp_dict
     2.5  
     2.6  import base
     2.7 +import processors
     2.8 +import config
     2.9  from graph import Graph
    2.10  
    2.11  
    2.12  # for pdb-codes
    2.13  re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
    2.14  
    2.15 -#~ # for files
    2.16 -#~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
    2.17 -
    2.18 -def std_id(pdb_id, pdb_chain, pdb_model=None):
    2.19 -    if pdb_model:
    2.20 -        return "%s_%s_%s" % \
    2.21 -        (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)
    2.22 -    else:
    2.23 -        return "%s_%s" % \
    2.24 -        (pdb_id.lower().strip(), pdb_chain.upper().strip())
    2.25 -
    2.26  def pdb_id_parse(ID):
    2.27      match = re1.search(ID)
    2.28      if not match:
    2.29 @@ -49,32 +40,6 @@
    2.30  def get_structure(file, name):
    2.31      return PDBParser().get_structure(name, file)
    2.32  
    2.33 -#~ def std_id_parse(ID):
    2.34 -    #~ """
    2.35 -    #~ Parse standart ID to pdb_code, chain and model
    2.36 -    #~ """
    2.37 -    #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():
    2.38 -        #~ # it is file
    2.39 -        #~ parseO = self.re2.search(ID) # files
    2.40 -    #~ else:
    2.41 -        #~ parseO = self.re1.search(ID.lower()) # pdb codes
    2.42 -    #~ if not parseO:
    2.43 -        #~ return None
    2.44 -    #~ parse = parseO.groups()
    2.45 -    #~ if len(parse) < 2:
    2.46 -        #~ return None
    2.47 -    #~ code = parse[1]
    2.48 -    #~ chain = ''
    2.49 -    #~ model = None
    2.50 -    #~ if len(parse) >= 4:
    2.51 -        #~ chain = parse[3]
    2.52 -        #~ if chain:
    2.53 -            #~ chain = chain.upper()
    2.54 -        #~ if len(parse) >= 6:
    2.55 -            #~ if parse[5]:
    2.56 -                #~ model = parse[5]
    2.57 -    #~ return code, chain, model
    2.58 -
    2.59  class SequenceMixin(base.Sequence):
    2.60      """Mixin for adding PDB data to a Sequence.
    2.61  
    2.62 @@ -85,79 +50,57 @@
    2.63      Attributes:
    2.64  
    2.65      *   pdb_chain -- Bio.PDB.Chain
    2.66 -    *   pdb_file -- file object
    2.67 -
    2.68      *   pdb_residues -- {Monomer: Bio.PDB.Residue}
    2.69 -    *   pdb_secstr -- {Monomer: 'Secondary structure'}
    2.70 -            Code   Secondary structure
    2.71 -            H      alpha-helix
    2.72 -            B      Isolated beta-bridge residue
    2.73 -            E      Strand
    2.74 -            G      3-10 helix
    2.75 -            I      pi-helix
    2.76 -            T      Turn
    2.77 -            S      Bend
    2.78 -            -      Other
    2.79 -
    2.80  
    2.81      ?TODO: global pdb_structures
    2.82      """
    2.83  
    2.84 -    def __init__(self, *args, **kw):
    2.85 -        self.pdb_chains = []
    2.86 -        self.pdb_files = {}
    2.87 -        self.pdb_residues = {}
    2.88 -        self.pdb_secstr = {}
    2.89 -
    2.90 -    def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
    2.91 +    def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain='A', pdb_model=0):
    2.92          """ Reads Pdb chain from file
    2.93  
    2.94          and align each Monomer with PDB.Residue (TODO)
    2.95          """
    2.96 -        name = std_id(pdb_id, pdb_chain, pdb_model)
    2.97 -        structure = get_structure(pdb_file, name)
    2.98 +        structure = get_structure(pdb_file, self.name)
    2.99          chain = structure[pdb_model][pdb_chain]
   2.100 -        self.pdb_chains.append(chain)
   2.101 -        self.pdb_residues[chain] = {}
   2.102 -        self.pdb_secstr[chain] = {}
   2.103 -        pdb_sequence = Sequence.from_pdb_chain(chain)
   2.104 -        a = alignment.Alignment.from_sequences(self, pdb_sequence)
   2.105 -        a.muscle_align()
   2.106 -        for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
   2.107 -            if pdb_sequence.pdb_has(chain, pdb_monomer):
   2.108 -                residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
   2.109 -                self.pdb_residues[chain][monomer] = residue
   2.110 -        self.pdb_files[chain] = pdb_file
   2.111 +        self.pdb_chain = chain
   2.112 +        self.pdb_residues = {}
   2.113 +        pdb_sequence = self.__class__.from_pdb_chain(chain)
   2.114 +        Alignment = self.types.Alignment
   2.115 +        a = Alignment()
   2.116 +        a.append_sequence(self)
   2.117 +        a.append_sequence(pdb_sequence)
   2.118 +        a.process(processors.Muscle())
   2.119 +        for monomer, pdb_monomer in a.columns_as_lists():
   2.120 +            residue = pdb_sequence.pdb_residues[pdb_monomer]
   2.121 +            self.pdb_residues[monomer] = residue
   2.122  
   2.123      def pdb_unload(self):
   2.124          """ Delete all pdb-connected links """
   2.125 -        #~ gc.get_referrers(self.pdb_chains[0])
   2.126 -        self.pdb_chains = []
   2.127 -        self.pdb_residues = {}
   2.128 -        self.pdb_secstr = {} # FIXME
   2.129 -        self.pdb_files = {} # FIXME
   2.130 +        del self.pdb_chain
   2.131 +        del self.pdb_residues
   2.132  
   2.133 -    @staticmethod
   2.134 -    def from_pdb_chain(chain):
   2.135 +    @classmethod
   2.136 +    def from_pdb_chain(cls, chain):
   2.137          """ Returns Sequence with Monomers with link to Bio.PDB.Residue
   2.138  
   2.139          chain is Bio.PDB.Chain
   2.140          """
   2.141          cappbuilder = CaPPBuilder()
   2.142          peptides = cappbuilder.build_peptides(chain)
   2.143 +        Sequence = cls
   2.144 +        Monomer = Sequence.types.Monomer
   2.145          sequence = Sequence()
   2.146 -        sequence.pdb_chains = [chain]
   2.147 -        sequence.pdb_residues[chain] = {}
   2.148 -        sequence.pdb_secstr[chain] = {}
   2.149 +        sequence.pdb_chain = chain
   2.150 +        sequence.pdb_residues = {}
   2.151          for peptide in peptides:
   2.152              for ca_atom in peptide.get_ca_list():
   2.153                  residue = ca_atom.get_parent()
   2.154 -                monomer = AminoAcidType.from_pdb_residue(residue).instance()
   2.155 -                sequence.pdb_residues[chain][monomer] = residue
   2.156 -                sequence.monomers.append(monomer)
   2.157 +                monomer = Monomer.from_code3(residue.get_resname())
   2.158 +                sequence.pdb_residues[monomer] = residue
   2.159 +                sequence.append(monomer)
   2.160          return sequence
   2.161  
   2.162 -    def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
   2.163 +    def auto_pdb(self, conformity=None, dir='/tmp', mask='%s.pdb'):
   2.164          """ Adds pdb information to each monomer
   2.165  
   2.166          Returns if information has been successfully added
   2.167 @@ -165,31 +108,31 @@
   2.168  
   2.169          id-format lava flow
   2.170          """
   2.171 -        if not conformity_info:
   2.172 -            path = os.path.join(pdb_directory, self.name)
   2.173 -            if os.path.exists(path) and os.path.getsize(path):
   2.174 -                match = pdb_id_parse(self.name)
   2.175 -                self.pdb_chain_add(open(path), match['code'],
   2.176 -                match['chain'], match['model'])
   2.177 -            else:
   2.178 -                match = pdb_id_parse(self.name)
   2.179 -                if match:
   2.180 -                    code = match['code']
   2.181 -                    pdb_filename = config.pdb_dir % code
   2.182 -                    if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
   2.183 -                        url = config.pdb_url % code
   2.184 -                        print "Download %s" % url
   2.185 -                        pdb_file = open(pdb_filename, 'w')
   2.186 -                        data = urllib2.urlopen(url).read()
   2.187 -                        pdb_file.write(data)
   2.188 -                        pdb_file.close()
   2.189 -                        print "Save %s" % pdb_filename
   2.190 -                    pdb_file = open(pdb_filename)
   2.191 -                    self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
   2.192 +        match = pdb_id_parse(self.name)
   2.193 +        if not match:
   2.194 +            return False
   2.195 +        code = match['code']
   2.196 +        chain = match['chain']
   2.197 +        model = match['model']
   2.198 +        user_path = os.path.join(dir, mask % self.name)
   2.199 +        path = user_path
   2.200 +        if not os.path.exists(path) or not os.path.getsize(path):
   2.201 +            path = config.pdb_path % code
   2.202 +        if not os.path.exists(path) or not os.path.getsize(path):
   2.203 +            url = config.pdb_url % code
   2.204 +            print "Download %s" % url
   2.205 +            path = user_path
   2.206 +            data = urllib2.urlopen(url).read()
   2.207 +            pdb_file = open(path, 'w')
   2.208 +            pdb_file.write(data)
   2.209 +            pdb_file.close()
   2.210 +            print "Save %s" % path
   2.211 +        self.set_pdb_chain(open(path), code, chain, model)
   2.212 +        return True
   2.213  
   2.214      def pdb_save(self, out_filename, pdb_chain):
   2.215          """ Saves pdb_chain to out_file """
   2.216 -        class GlySelect(Select):
   2.217 +        class MySelect(Select):
   2.218              def accept_chain(self, chain):
   2.219                  if chain == pdb_chain:
   2.220                      return 1
   2.221 @@ -198,32 +141,11 @@
   2.222          io = PDBIO()
   2.223          structure = chain.get_parent()
   2.224          io.set_structure(structure)
   2.225 -        io.save(out_filename, GlySelect())
   2.226 +        io.save(out_filename, MySelect())
   2.227  
   2.228 -
   2.229 -    def pdb_add_sec_str(self, pdb_chain):
   2.230 -        """ Add secondary structure data """
   2.231 -        tmp_file = NamedTemporaryFile(delete=False)
   2.232 -        tmp_file.close()
   2.233 -        pdb_file = self.pdb_files[pdb_chain].name
   2.234 -        os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
   2.235 -        dssp, keys = make_dssp_dict(tmp_file.name)
   2.236 -        for monomer in self.monomers:
   2.237 -            if self.pdb_has(pdb_chain, monomer):
   2.238 -                residue = self.pdb_residues[pdb_chain][monomer]
   2.239 -                try:
   2.240 -                    d = dssp[(pdb_chain.get_id(), residue.get_id())]
   2.241 -                    self.pdb_secstr[pdb_chain][monomer] = d[1]
   2.242 -                except:
   2.243 -                    print "No dssp information about %s at %s" % (monomer, pdb_chain)
   2.244 -        os.unlink(tmp_file.name)
   2.245 -
   2.246 -    def pdb_has(self, chain, monomer):
   2.247 -        return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
   2.248 -
   2.249 -    def secstr_has(self, chain, monomer):
   2.250 -        return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
   2.251 -
   2.252 +    def ca_atoms(self, sequence):
   2.253 +        """ Iterates Ca-atom of monomers of this sequence """
   2.254 +        return (pdb_residues[monomer]['CA'] for monomer in self)
   2.255  
   2.256  class AlignmentMixin(base.Alignment):
   2.257      """Mixin to add 3D properties to alignments.
   2.258 @@ -232,12 +154,7 @@
   2.259      created. This class is intended to be subclassed together with one of
   2.260      Alignment classes.
   2.261      """
   2.262 -
   2.263 -    def secstr(self, sequence, pdb_chain, gap='-'):
   2.264 -        """ Returns string representing secondary structure """
   2.265 -        return ''.join([
   2.266 -        (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)
   2.267 -        for m in self.body[sequence]])
   2.268 +    pass
   2.269  
   2.270  class BlockMixin(base.Block):
   2.271      """Mixin to add 3D properties to blocks.
   2.272 @@ -249,9 +166,9 @@
   2.273  
   2.274      def geometrical_cores(self, max_delta=config.delta,
   2.275      timeout=config.timeout, minsize=config.minsize,
   2.276 -    ac_new_atoms=config.ac_new_atoms,
   2.277 -    ac_count=config.ac_count):
   2.278 -        """ Returns length-sorted list of blocks, representing GCs
   2.279 +    ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count):
   2.280 +        """ Returns length-sorted list of GCs
   2.281 +        GC is set of columns
   2.282  
   2.283          * max_delta -- threshold of distance spreading
   2.284          * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
   2.285 @@ -263,50 +180,43 @@
   2.286          * ac_count -- max number of cores (including main core)
   2.287              -1 means infinity
   2.288  
   2.289 -        If more than one pdb chain for some sequence provided, consider all of them
   2.290          cost is calculated as 1 / (delta + 1)
   2.291 -
   2.292              delta in [0, +inf) => cost in (0, 1]
   2.293          """
   2.294 -        nodes = self.positions
   2.295 +        nodes = self.columns
   2.296          lines = {}
   2.297 -        for i in self.positions:
   2.298 -            for j in self.positions:
   2.299 +        for i, column1 in enumerate(self.columns):
   2.300 +            for j, column2 in enumerate(self.columns):
   2.301                  if i < j:
   2.302                      distances = []
   2.303                      for sequence in self.sequences:
   2.304 -                        for chain in sequence.pdb_chains:
   2.305 -                            m1 = self.alignment.body[sequence][i]
   2.306 -                            m2 = self.alignment.body[sequence][j]
   2.307 -                            if m1 and m2:
   2.308 -                                r1 = sequence.pdb_residues[chain][m1]
   2.309 -                                r2 = sequence.pdb_residues[chain][m2]
   2.310 -                                ca1 = r1['CA']
   2.311 -                                ca2 = r2['CA']
   2.312 -                                d = ca1 - ca2 # Bio.PDB feature
   2.313 -                                distances.append(d)
   2.314 +                        if sequence in column1 and sequence in column2:
   2.315 +                            m1 = column1[sequence]
   2.316 +                            m2 = column2[sequence]
   2.317 +                            r1 = sequence.pdb_residues[m1]
   2.318 +                            r2 = sequence.pdb_residues[m2]
   2.319 +                            ca1 = r1['CA']
   2.320 +                            ca2 = r2['CA']
   2.321 +                            d = ca1 - ca2 # Bio.PDB feature
   2.322 +                            distances.append(d)
   2.323                      if len(distances) >= 2:
   2.324                          delta = max(distances) - min(distances)
   2.325                          if delta <= max_delta:
   2.326 -                            lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
   2.327 +                            line = Graph.line(column1, column2)
   2.328 +                            lines[line] = 1.0 / (1.0 + max_delta)
   2.329          graph = Graph(nodes, lines)
   2.330          cliques = graph.cliques(timeout=timeout, minsize=minsize)
   2.331          GCs = []
   2.332          for clique in cliques:
   2.333              for GC in GCs:
   2.334 -                if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
   2.335 +                new_nodes = clique - GC
   2.336 +                if len(new) < ac_new_atoms * len(clique):
   2.337                      break
   2.338 +            else:
   2.339 +                GCs.append(clique)
   2.340 +        return GCs
   2.341  
   2.342 -    def ca_atoms(self, sequence, pdb_chain):
   2.343 -        """ Iterates Ca-atom of monomers of this sequence from this block  """
   2.344 -        return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
   2.345  
   2.346 -    def sequences_chains(self):
   2.347 -        """ Iterates pairs (sequence, chain) """
   2.348 -        for sequence in self.alignment.sequences:
   2.349 -            if sequence in self.sequences:
   2.350 -                for chain in sequence.pdb_chains:
   2.351 -                    yield (sequence, chain)
   2.352  
   2.353      def superimpose(self):
   2.354          """ Superimpose all pdb_chains in this block """

     3.1 --- a/geometrical_core/geometrical-core	Wed Feb 02 15:37:15 2011 +0300
     3.2 +++ b/geometrical_core/geometrical-core	Wed Feb 02 15:50:20 2011 +0300
     3.3 @@ -4,13 +4,13 @@
     3.4  version 2.0
     3.5  """
     3.6  
     3.7 -from allpy import config, alignment, block
     3.8 -Block = block.Block
     3.9 -Alignment = alignment.Alignment
    3.10  import argparse
    3.11  import os
    3.12  from tempfile import NamedTemporaryFile
    3.13  
    3.14 +from allpy import config
    3.15 +from protein_pdb import Alignment, Block, Monomer, Sequence
    3.16 +
    3.17  r = argparse.FileType('r')
    3.18  w = argparse.FileType('w')
    3.19  c = config
    3.20 @@ -81,7 +81,7 @@
    3.21  2) -1 timeout means running Bron-Kerbosh algorithm without timeout
    3.22  3) Alternative core new aa part: read documentation for more information
    3.23  4) Superposition core identifier: main core is 0, first alternative is 1 etc. ''',
    3.24 -formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
    3.25 +formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    3.26  #~ argument_default=argparse.SUPPRESS,
    3.27  )
    3.28  
    3.29 @@ -102,80 +102,64 @@
    3.30  
    3.31  tmp_file = None
    3.32  
    3.33 -try:
    3.34 -    args = p.parse_args()
    3.35 -    
    3.36 -    if not args.l and not args.f and not args.g and not args.p and not args.s:
    3.37 -        print 'Error: no output file provided'
    3.38 -        exit()
    3.39 -    if not (args.p and args.s) and not (not args.p and not args.s):
    3.40 -        print 'Error: provide both pdb and spt file or none of them'
    3.41 -        exit()
    3.42 -    
    3.43 -    try:
    3.44 -        alignment = Alignment(args.i)
    3.45 -    except:
    3.46 -        args.i.close()
    3.47 -        tmp_file = NamedTemporaryFile(delete=False)
    3.48 -        tmp_file.close()
    3.49 -        os.system('seqret %(msf)s %(fasta)s' % \
    3.50 -        {'msf': args.i.name, 'fasta': tmp_file.name})
    3.51 -        args.i = open(tmp_file.name)
    3.52 -        alignment = Alignment(args.i)
    3.53 -    
    3.54 -    block = Block(alignment)
    3.55 -    GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 
    3.56 -        minsize=args.t, ac_new_atoms=args.n, ac_count=args.a)
    3.57 -    
    3.58 -    if not GCs:
    3.59 -        print 'No cores! Try to change parameters'
    3.60 -        exit()
    3.61 -    
    3.62 -    if args.l:
    3.63 -        l = args.l
    3.64 -        
    3.65 -        l.write('Geometrical core positions for alignment %s' % args.i.name)
    3.66 -        l.write('\n\n')
    3.67 -        l.write('First alignment position is 0')
    3.68 -        
    3.69 -        for i, GC in enumerate(GCs):
    3.70 -            l.write('\n\n')
    3.71 -            if i == 0:
    3.72 -                l.write('Geometrical core:')
    3.73 -            else:
    3.74 -                l.write('Alternative geometrical core %i:' % i)
    3.75 -            l.write('\n')
    3.76 -            l.write(', '.join(str(n) for n in GC.positions))
    3.77 -        l.close()
    3.78 -    
    3.79 -    if args.g and not args.f:
    3.80 -        args.f = args.g
    3.81 -    
    3.82 -    if args.f:
    3.83 -        args.i.seek(0)
    3.84 -        f = args.f
    3.85 -        f.write(args.i.read()) # write sequences
    3.86 -        
    3.87 -        # write GCs
    3.88 -        for i, GC in enumerate(GCs):
    3.89 -            f.write('\n\n')
    3.90 -            if i == 0:
    3.91 -                GC.save_xstring(f, 'GC', 'Main geometrical core')
    3.92 -            else:
    3.93 -                GC.save_xstring(f, 'AGC_%i' % i, 'Alternative geometrical core %i' % i)
    3.94 -        f.close()
    3.95 +#try:
    3.96 +args = p.parse_args()
    3.97  
    3.98 -        
    3.99 -    if args.g:
   3.100 -        args.g.close()
   3.101 -        os.system('seqret %(fasta)s msf::%(msf)s' % \
   3.102 -        {'fasta': args.f.name, 'msf': args.g.name})
   3.103 -    
   3.104 -    
   3.105 -except Exception, t:
   3.106 -    print t
   3.107 +if not args.l and not args.f and not args.g and not args.p and not args.s:
   3.108 +    print 'Error: no output file provided'
   3.109 +    exit()
   3.110 +if not (args.p and args.s) and not (not args.p and not args.s):
   3.111 +    print 'Error: provide both pdb and spt file or none of them'
   3.112      exit()
   3.113  
   3.114 +try:
   3.115 +    alignment = Alignment().append_file(args.i)
   3.116 +except:
   3.117 +    args.i.close()
   3.118 +    tmp_file = NamedTemporaryFile('r', delete=False)
   3.119 +    os.system('seqret %(msf)s %(fasta)s' % \
   3.120 +    {'msf': args.i.name, 'fasta': tmp_file.name})
   3.121 +    args.i = tmp_file
   3.122 +    alignment = Alignment().append_file(args.i)
   3.123 +
   3.124 +block = Block.from_alignment(alignment)
   3.125 +for sequence in block.sequences:
   3.126 +    sequence.auto_pdb()
   3.127 +GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t,
   3.128 +    minsize=args.t, ac_new_atoms=args.n, ac_count=args.a)
   3.129 +
   3.130 +column2pos = {}
   3.131 +for i, column in enumerate(block.columns):
   3.132 +    column2pos[column] = i
   3.133 +
   3.134 +if not GCs:
   3.135 +    print 'No cores! Try to change parameters'
   3.136 +    exit()
   3.137 +
   3.138 +if args.l:
   3.139 +    l = args.l
   3.140 +
   3.141 +    l.write('Geometrical core positions for alignment %s' % args.i.name)
   3.142 +    l.write('\n\n')
   3.143 +    l.write('First alignment position is 0')
   3.144 +
   3.145 +    for i, GC in enumerate(GCs):
   3.146 +        GC = [column2pos[column] for column in GC]
   3.147 +        GC.sort()
   3.148 +        l.write('\n\n')
   3.149 +        if i == 0:
   3.150 +            l.write('Geometrical core:')
   3.151 +        else:
   3.152 +            l.write('Alternative geometrical core %i:' % i)
   3.153 +        l.write('\n')
   3.154 +        l.write(', '.join(str(n) for n in GC))
   3.155 +    l.close()
   3.156 +
   3.157 +if args.g and not args.f:
   3.158 +    args.f = args.g
   3.159 +
   3.160 +    # FIXME to fasta and to msf
   3.161 +
   3.162  if tmp_file:
   3.163 -    os.unlink(tmp_file)
   3.164 +    os.unlink(tmp_file.name)
   3.165  

     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/geometrical_core/protein_pdb.py	Wed Feb 02 15:50:20 2011 +0300
     4.3 @@ -0,0 +1,17 @@
     4.4 +
     4.5 +import sys
     4.6 +
     4.7 +from allpy import protein, pdb
     4.8 +protein_pdb = sys.modules[__name__]
     4.9 +
    4.10 +class Sequence(protein.Sequence, pdb.SequenceMixin):
    4.11 +    types = protein_pdb
    4.12 +
    4.13 +class Alignment(protein.Alignment, pdb.AlignmentMixin):
    4.14 +    types = protein_pdb
    4.15 +
    4.16 +class Block(protein.Block, pdb.BlockMixin):
    4.17 +    types = protein_pdb
    4.18 +
    4.19 +class Monomer(protein.Monomer):
    4.20 +    types = protein_pdb