allpy

changeset 175:fb3164f03984
Project --> Alignment
author: boris <bnagaev@gmail.com>
date: Wed, 03 Nov 2010 19:12:23 +0300
parents: afe1db2a19a2
children: 6dfc736f37a9
files: geometrical_core/geometrical_core.py lib/block.py lib/project.py lib/sequence.py sec_str/sec_str.py test/test.py
diffstat: 6 files changed, 47 insertions(+), 47 deletions(-) [+]
[-]

geometrical_core/geometrical_core.py 10

lib/block.py 36

lib/project.py 28

lib/sequence.py 4

sec_str/sec_str.py 12

test/test.py 4 geometrical_core/geometrical_core.py 10 lib/block.py 36 lib/project.py 28 lib/sequence.py 4 sec_str/sec_str.py 12 test/test.py 4
geometrical_core/geometrical_core.py 10
lib/block.py 36
     1.1 --- a/geometrical_core/geometrical_core.py	Wed Nov 03 19:08:20 2010 +0300
     1.2 +++ b/geometrical_core/geometrical_core.py	Wed Nov 03 19:12:23 2010 +0300
     1.3 @@ -3,9 +3,9 @@
     1.4  version 2.0
     1.5  """
     1.6  
     1.7 -from allpy.lib import config, project, block
     1.8 +from allpy.lib import config, alignment, block
     1.9  Block = block.Block
    1.10 -Project = project.Project
    1.11 +Alignment = alignment.Alignment
    1.12  import argparse
    1.13  import os
    1.14  from tempfile import NamedTemporaryFile
    1.15 @@ -112,7 +112,7 @@
    1.16          exit()
    1.17      
    1.18      try:
    1.19 -        project = Project(args.i)
    1.20 +        alignment = Alignment(args.i)
    1.21      except:
    1.22          args.i.close()
    1.23          tmp_file = NamedTemporaryFile(delete=False)
    1.24 @@ -120,9 +120,9 @@
    1.25          os.system('seqret %(msf)s %(fasta)s' % \
    1.26          {'msf': args.i.name, 'fasta': tmp_file.name})
    1.27          args.i = open(tmp_file.name)
    1.28 -        project = Project(args.i)
    1.29 +        alignment = Alignment(args.i)
    1.30      
    1.31 -    block = Block(project)
    1.32 +    block = Block(alignment)
    1.33      GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 
    1.34          minsize=args.t, ac_new_atoms=args.n, ac_count=args.a)
    1.35      

     2.1 --- a/lib/block.py	Wed Nov 03 19:08:20 2010 +0300
     2.2 +++ b/lib/block.py	Wed Nov 03 19:12:23 2010 +0300
     2.3 @@ -2,7 +2,7 @@
     2.4  
     2.5  import sys
     2.6  
     2.7 -import project
     2.8 +import alignment
     2.9  import sequence
    2.10  import monomer
    2.11  import config
    2.12 @@ -16,32 +16,32 @@
    2.13      """ Block of alignment
    2.14      
    2.15      Mandatory data:
    2.16 -    *   self.project -- project object, which the block belongs to
    2.17 +    *   self.alignment -- alignment object, which the block belongs to
    2.18      *   self.sequences - set of sequence objects that contain monomers
    2.19          and/or gaps, that constitute the block
    2.20 -    *   self.positions -- list of positions of the project.body that
    2.21 +    *   self.positions -- list of positions of the alignment.body that
    2.22          are included in the block; position[i+1] is always to the right from position[i]
    2.23      
    2.24      Don't change self.sequences -- it may be a link to other block.sequences
    2.25      
    2.26      How to create a new block:
    2.27 -    >>> import project
    2.28 +    >>> import alignment
    2.29      >>> import block
    2.30 -    >>> proj = project.Project(open("test.fasta"))
    2.31 +    >>> proj = alignment.Alignment(open("test.fasta"))
    2.32      >>> block1 = block.Block(proj)
    2.33      """
    2.34      
    2.35 -    def __init__(self, project, sequences=None, positions=None):
    2.36 -        """ Builds new block from project
    2.37 +    def __init__(self, alignment, sequences=None, positions=None):
    2.38 +        """ Builds new block from alignment
    2.39          
    2.40          if sequences==None, all sequences are used
    2.41          if positions==None, all positions are used
    2.42          """
    2.43          if sequences == None:
    2.44 -            sequences = set(project.sequences) # copy
    2.45 +            sequences = set(alignment.sequences) # copy
    2.46          if positions == None:
    2.47 -            positions = range(len(project))
    2.48 -        self.project = project
    2.49 +            positions = range(len(alignment))
    2.50 +        self.alignment = alignment
    2.51          self.sequences = sequences
    2.52          self.positions = positions
    2.53      
    2.54 @@ -52,7 +52,7 @@
    2.55          are made.
    2.56          """
    2.57          for sequence in self.sequences:
    2.58 -            alignment_monomers = self.project.body[sequence]
    2.59 +            alignment_monomers = self.alignment.body[sequence]
    2.60              block_monomers = [alignment_monomers[i] for i in self.positions]
    2.61              string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
    2.62              save_fasta(out_file, string, sequence.name, sequence.description, long_line)
    2.63 @@ -84,8 +84,8 @@
    2.64                      distances = []
    2.65                      for sequence in self.sequences:
    2.66                          for chain in sequence.pdb_chains:
    2.67 -                            m1 = self.project.body[sequence][i]
    2.68 -                            m2 = self.project.body[sequence][j]
    2.69 +                            m1 = self.alignment.body[sequence][i]
    2.70 +                            m2 = self.alignment.body[sequence][j]
    2.71                              if m1 and m2:
    2.72                                  r1 = sequence.pdb_residues[chain][m1]
    2.73                                  r2 = sequence.pdb_residues[chain][m2]
    2.74 @@ -105,7 +105,7 @@
    2.75                  if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
    2.76                      break
    2.77              else:
    2.78 -                GCs.append(Block(self.project, self.sequences, clique))
    2.79 +                GCs.append(Block(self.alignment, self.sequences, clique))
    2.80                  if ac_count != -1 and len(GCs) >= ac_count:
    2.81                      break
    2.82          return GCs
    2.83 @@ -113,9 +113,9 @@
    2.84      def xstring(self, x='X', gap='-'):
    2.85          """ Returns string consisting of gap chars and chars x at self.positions
    2.86          
    2.87 -        Length of returning string = length of project
    2.88 +        Length of returning string = length of alignment
    2.89          """
    2.90 -        monomers = [False] * len(self.project)
    2.91 +        monomers = [False] * len(self.alignment)
    2.92          for i in self.positions:
    2.93              monomers[i] = True
    2.94          return ''.join([x if m else gap for m in monomers])
    2.95 @@ -126,7 +126,7 @@
    2.96      
    2.97      def monomers(self, sequence):
    2.98          """ Iterates monomers of this sequence from this block """
    2.99 -        alignment_sequence = self.project.body[sequence]
   2.100 +        alignment_sequence = self.alignment.body[sequence]
   2.101          return (alignment_sequence[i] for i in self.positions)
   2.102      
   2.103      def ca_atoms(self, sequence, pdb_chain):
   2.104 @@ -135,7 +135,7 @@
   2.105      
   2.106      def sequences_chains(self):
   2.107          """ Iterates pairs (sequence, chain) """
   2.108 -        for sequence in self.project.sequences:
   2.109 +        for sequence in self.alignment.sequences:
   2.110              if sequence in self.sequences:
   2.111                  for chain in sequence.pdb_chains:
   2.112                      yield (sequence, chain)

     3.1 --- a/lib/project.py	Wed Nov 03 19:08:20 2010 +0300
     3.2 +++ b/lib/project.py	Wed Nov 03 19:12:23 2010 +0300
     3.3 @@ -15,13 +15,13 @@
     3.4  import block
     3.5  from fasta import save_fasta
     3.6  
     3.7 -class Project(object):
     3.8 +class Alignment(object):
     3.9      """ Alignment
    3.10      
    3.11      Mandatory data:
    3.12      *   sequences -- list of Sequence objects. Sequences don't contain gaps
    3.13           - see sequence.py module
    3.14 -    *   alignment -- dict 
    3.15 +    *   body -- dict 
    3.16          {<Sequence object>:[<Monomer object>,None,<Monomer object>]}
    3.17          keys are the Sequence objects, values are the lists, which
    3.18          contain monomers of those sequences or None for gaps in the
    3.19 @@ -32,10 +32,10 @@
    3.20      def __init__(self, *args):
    3.21          """overloaded constructor
    3.22  
    3.23 -        Project() -> new empty Project
    3.24 -        Project(sequences, body) -> new Project with sequences and
    3.25 +        Alignment() -> new empty Alignment
    3.26 +        Alignment(sequences, body) -> new Alignment with sequences and
    3.27              body initialized from arguments
    3.28 -        Project(fasta_file) -> new Project, read body and sequences
    3.29 +        Alignment(fasta_file) -> new Alignment, read body and sequences
    3.30               from fasta file 
    3.31  
    3.32          """
    3.33 @@ -46,7 +46,7 @@
    3.34              self.sequences=[]
    3.35              self.body={}
    3.36          else:
    3.37 -            self.sequences, self.body = Project.from_fasta(args[0])
    3.38 +            self.sequences, self.body = Alignment.from_fasta(args[0])
    3.39  
    3.40      def __len__(self):
    3.41          """ Returns width, ie length of each sequence with gaps """
    3.42 @@ -61,7 +61,7 @@
    3.43  
    3.44          For every (row, column) in alignment the percentage of the exactly
    3.45          same residue in the same column in the alignment is calculated.
    3.46 -        The data structure is just like the Project.body, but istead of 
    3.47 +        The data structure is just like the Alignment.body, but istead of 
    3.48          monomers it contains float percentages.
    3.49          """
    3.50          # Oh, God, that's awful! Absolutely not understandable.
    3.51 @@ -101,8 +101,8 @@
    3.52          
    3.53          monomer_kind is class, inherited from MonomerType
    3.54          
    3.55 -        >>> import project
    3.56 -        >>> sequences,body=project.Project.from_fasta(open("test.fasta"))       
    3.57 +        >>> import alignment
    3.58 +        >>> sequences,body=alignment.Alignment.from_fasta(open("test.fasta"))       
    3.59          """
    3.60          import re
    3.61  
    3.62 @@ -156,13 +156,13 @@
    3.63          
    3.64          Add None's to right end to make equal lengthes of alignment sequences 
    3.65          """
    3.66 -        project = Project()
    3.67 -        project.sequences = sequences
    3.68 +        alignment = Alignment()
    3.69 +        alignment.sequences = sequences
    3.70          max_length = max(len(sequence) for sequence in sequences)
    3.71          for sequence in sequences:
    3.72              gaps_count = max_length - len(sequence)
    3.73 -            project.body[sequence] = sequence.monomers + [None] * gaps_count
    3.74 -        return project
    3.75 +            alignment.body[sequence] = sequence.monomers + [None] * gaps_count
    3.76 +        return alignment
    3.77      
    3.78      def save_fasta(self, out_file, long_line=70, gap='-'):
    3.79          """ Saves alignment to given file
    3.80 @@ -181,7 +181,7 @@
    3.81          self.save_fasta(tmp_file)
    3.82          tmp_file.close()
    3.83          os.system("muscle -in %(tmp)s -out %(tmp)s" % {'tmp': tmp_file.name})
    3.84 -        sequences, body = Project.from_fasta(open(tmp_file.name))
    3.85 +        sequences, body = Alignment.from_fasta(open(tmp_file.name))
    3.86          for sequence in self.sequences:
    3.87              try:
    3.88                  new_sequence = [i for i in sequences if sequence==i][0]

     4.1 --- a/lib/sequence.py	Wed Nov 03 19:08:20 2010 +0300
     4.2 +++ b/lib/sequence.py	Wed Nov 03 19:12:23 2010 +0300
     4.3 @@ -5,7 +5,7 @@
     4.4  from Bio.PDB import CaPPBuilder, PDBIO
     4.5  from Bio.PDB.DSSP import make_dssp_dict
     4.6  from allpy_pdb import std_id, pdb_id_parse, get_structure
     4.7 -import project
     4.8 +import alignment
     4.9  import sys
    4.10  import config
    4.11  import os.path
    4.12 @@ -82,7 +82,7 @@
    4.13          self.pdb_residues[chain] = {}
    4.14          self.pdb_secstr[chain] = {}
    4.15          pdb_sequence = Sequence.from_pdb_chain(chain)
    4.16 -        alignment = project.Project.from_sequences(self, pdb_sequence)
    4.17 +        alignment = alignment.Alignment.from_sequences(self, pdb_sequence)
    4.18          alignment.muscle_align()
    4.19          for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self):
    4.20              if pdb_sequence.pdb_has(chain, pdb_monomer):

     5.1 --- a/sec_str/sec_str.py	Wed Nov 03 19:08:20 2010 +0300
     5.2 +++ b/sec_str/sec_str.py	Wed Nov 03 19:12:23 2010 +0300
     5.3 @@ -3,9 +3,9 @@
     5.4  
     5.5  """
     5.6  
     5.7 -from allpy.lib import config, project, block
     5.8 +from allpy.lib import config, alignment, block
     5.9  Block = block.Block
    5.10 -Project = project.Project
    5.11 +Alignment = alignment.Alignment
    5.12  import argparse
    5.13  import sys
    5.14  from allpy.lib.fasta import determine_long_line
    5.15 @@ -29,7 +29,7 @@
    5.16  tmp_file = None
    5.17  
    5.18  try:
    5.19 -    project = Project(args.i)
    5.20 +    alignment = Alignment(args.i)
    5.21  except:
    5.22      args.i.close()
    5.23      tmp_file = NamedTemporaryFile(delete=False)
    5.24 @@ -37,11 +37,11 @@
    5.25      os.system('seqret %(msf)s %(fasta)s' % \
    5.26      {'msf': args.i.name, 'fasta': tmp_file.name})
    5.27      args.i = open(tmp_file.name)
    5.28 -    project = Project(args.i)
    5.29 +    alignment = Alignment(args.i)
    5.30  args.i.seek(0)
    5.31  long_line = determine_long_line(args.i)
    5.32  
    5.33 -block = Block(project)
    5.34 +block = Block(alignment)
    5.35  
    5.36  args.i.seek(0)
    5.37  f = args.f
    5.38 @@ -52,7 +52,7 @@
    5.39          sequence.pdb_auto_add()
    5.40          chain = sequence.pdb_chains[0]
    5.41          sequence.pdb_add_sec_str(chain)
    5.42 -        project.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line)
    5.43 +        alignment.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line)
    5.44          sequence.pdb_unload()
    5.45      except Exception, e:
    5.46          print "Warning: can'not process %s" % sequence.name

     6.1 --- a/test/test.py	Wed Nov 03 19:08:20 2010 +0300
     6.2 +++ b/test/test.py	Wed Nov 03 19:12:23 2010 +0300
     6.3 @@ -1,9 +1,9 @@
     6.4  import sys
     6.5  
     6.6 -from allpy.lib.project import Project
     6.7 +from allpy.lib.alignment import Alignment
     6.8  from allpy.lib.block import Block
     6.9  
    6.10 -p = Project(open('test.fasta'))
    6.11 +p = Alignment(open('test.fasta'))
    6.12  print "alignment length: %i" % len(p)
    6.13  print "sequence: %s" % str(p.sequences[0])
    6.14