allpy

changeset 395:5e666e28c348
merge
author: boris <bnagaev@gmail.com>
date: Thu, 03 Feb 2011 13:18:05 +0300
parents: ce77534f6594 7184863832b9
children: 3a802644d798
diffstat: 2 files changed, 47 insertions(+), 19 deletions(-) [+]
[-]

allpy/base.py 38

test/freqs.py 28 allpy/base.py 38 test/freqs.py 28
allpy/base.py 38
test/freqs.py 28
     1.1 --- a/allpy/base.py	Thu Feb 03 12:24:54 2011 +0300
     1.2 +++ b/allpy/base.py	Thu Feb 03 13:18:05 2011 +0300
     1.3 @@ -158,34 +158,31 @@
     1.4          If sequence is too short, pad it with gaps on the right.
     1.5          """
     1.6          self.sequences.append(sequence)
     1.7 -        for i, monomer in enumerate(sequence):
     1.8 -            self._column_at(i)[sequence] = monomer
     1.9 +        self._pad_to_width(len(sequence))
    1.10 +        for column, monomer in zip(self.columns, sequence):
    1.11 +            column[sequence] = monomer
    1.12          return self
    1.13  
    1.14      def append_row_from_string(self, string,
    1.15              name='', description='', source='', gaps=default_gaps):
    1.16          """Add row from a string of one-letter codes and gaps. Return self."""
    1.17          Sequence = self.types.Sequence
    1.18 -        not_gap = lambda (i, char): char not in gaps
    1.19          without_gaps = util.remove_each(string, gaps)
    1.20          sequence = Sequence.from_string(without_gaps, name, description, source)
    1.21 -        # The following line has some simple magic:
    1.22 -        # 1. attach natural numbers to monomers
    1.23 -        # 2. delete gaps
    1.24 -        # 3. attach numbers again
    1.25 -        # This way we have a pair of numbers attached to monomer:
    1.26 -        # - it's position in alignment (the first attached number, j)
    1.27 -        # - it's position in sequence (the second attached number, i)
    1.28 -        for i, (j, char) in enumerate(filter(not_gap, enumerate(string))):
    1.29 -            self._column_at(j)[sequence] = sequence[i]
    1.30 +        self._pad_to_width(len(string))
    1.31 +        non_gap_columns = [column
    1.32 +            for column, char in zip(self.columns, string)
    1.33 +            if char not in gaps
    1.34 +        ]
    1.35 +        for monomer, column in zip(sequence, non_gap_columns):
    1.36 +            column[sequence] = monomer
    1.37          self.sequences.append(sequence)
    1.38          return self
    1.39  
    1.40 -    def _column_at(self, n):
    1.41 -        """Return column by index. Create new columns if required."""
    1.42 -        for i in range(len(self.columns), n + 1):
    1.43 +    def _pad_to_width(self, n):
    1.44 +        """Pad alignment with empty columns on the right to width n."""
    1.45 +        for i in range(len(self.columns), n):
    1.46              self.columns.append(Column())
    1.47 -        return self.columns[n]
    1.48  
    1.49      def append_file(self, file, format='fasta', gaps=default_gaps):
    1.50          """Append sequences from file to alignment. Return self.
    1.51 @@ -351,9 +348,12 @@
    1.52          not_gap = lambda (a,b): a != None
    1.53          for sequence, new_row in zip(self.sequences, new.rows_as_lists()):
    1.54              assert len(sequence) == len(new_row.sequence)
    1.55 -            zipped = zip(sequence, filter(not_gap, enumerate(new_row)))
    1.56 -            for monomer, (i, _) in zipped:
    1.57 -                self._column_at(i)[sequence] = monomer
    1.58 +            non_gap_columns = [column
    1.59 +                for column, monomer in zip(self.columns, new_row)
    1.60 +                if monomer
    1.61 +            ]
    1.62 +            for monomer, column in zip(sequence, non_gap_columns):
    1.63 +                column[sequence] = monomer
    1.64  
    1.65      def _replace_contents(self, new, copy_descriptions, copy_contents):
    1.66          """Replace alignment contents with those of other alignment."""

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/test/freqs.py	Thu Feb 03 13:18:05 2011 +0300
     2.3 @@ -0,0 +1,28 @@
     2.4 +"""Read alignment on stdin. Print CSV table of letter frequences on stdout.
     2.5 +"""
     2.6 +from allpy import protein
     2.7 +from allpy.data import codes
     2.8 +import sys
     2.9 +
    2.10 +sys.stderr.write(__doc__)
    2.11 +
    2.12 +def freq(monomer):
    2.13 +	amount = freqs.get(monomer)
    2.14 +	if amount:
    2.15 +		return 100.0 * amount / width
    2.16 +	return ""
    2.17 +
    2.18 +aln = protein.Alignment().append_file(sys.stdin)
    2.19 +monomers = [code1 for code1, modified, _, _ in codes.protein if not modified]
    2.20 +monomers += ["-"]
    2.21 +width = len(aln.sequences)
    2.22 +print ", ".join(map(str, monomers))
    2.23 +for column in aln.columns_as_lists():
    2.24 +	freqs = {}
    2.25 +	for monomer in column:
    2.26 +		if monomer:
    2.27 +			monomer = monomer.code1
    2.28 +		else:
    2.29 +			monomer = "-"
    2.30 +		freqs[monomer] = freqs.get(monomer, 0) + 1
    2.31 +	print ", ".join(map(str, map(freq, monomers)))