Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/5e666e28c348
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 07:10:55 2012
Кодировка:

# HG changeset patch
# User boris
# Date 1296728285 -10800
# Node ID 5e666e28c34858341cc923c4ed29c2516067bf23
# Parent ce77534f659458eb3497c64046a2192581636608# Parent 7184863832b9dc9d71324ae6ecfeeb26c0c38476
merge

diff -r ce77534f6594 -r 5e666e28c348 allpy/base.py
--- a/allpy/base.py Thu Feb 03 12:24:54 2011 +0300
+++ b/allpy/base.py Thu Feb 03 13:18:05 2011 +0300
@@ -158,34 +158,31 @@
If sequence is too short, pad it with gaps on the right.
"""
self.sequences.append(sequence)
- for i, monomer in enumerate(sequence):
- self._column_at(i)[sequence] = monomer
+ self._pad_to_width(len(sequence))
+ for column, monomer in zip(self.columns, sequence):
+ column[sequence] = monomer
return self

def append_row_from_string(self, string,
name='', description='', source='', gaps=default_gaps):
"""Add row from a string of one-letter codes and gaps. Return self."""
Sequence = self.types.Sequence
- not_gap = lambda (i, char): char not in gaps
without_gaps = util.remove_each(string, gaps)
sequence = Sequence.from_string(without_gaps, name, description, source)
- # The following line has some simple magic:
- # 1. attach natural numbers to monomers
- # 2. delete gaps
- # 3. attach numbers again
- # This way we have a pair of numbers attached to monomer:
- # - it's position in alignment (the first attached number, j)
- # - it's position in sequence (the second attached number, i)
- for i, (j, char) in enumerate(filter(not_gap, enumerate(string))):
- self._column_at(j)[sequence] = sequence[i]
+ self._pad_to_width(len(string))
+ non_gap_columns = [column
+ for column, char in zip(self.columns, string)
+ if char not in gaps
+ ]
+ for monomer, column in zip(sequence, non_gap_columns):
+ column[sequence] = monomer
self.sequences.append(sequence)
return self

- def _column_at(self, n):
- """Return column by index. Create new columns if required."""
- for i in range(len(self.columns), n + 1):
+ def _pad_to_width(self, n):
+ """Pad alignment with empty columns on the right to width n."""
+ for i in range(len(self.columns), n):
self.columns.append(Column())
- return self.columns[n]

def append_file(self, file, format='fasta', gaps=default_gaps):
"""Append sequences from file to alignment. Return self.
@@ -351,9 +348,12 @@
not_gap = lambda (a,b): a != None
for sequence, new_row in zip(self.sequences, new.rows_as_lists()):
assert len(sequence) == len(new_row.sequence)
- zipped = zip(sequence, filter(not_gap, enumerate(new_row)))
- for monomer, (i, _) in zipped:
- self._column_at(i)[sequence] = monomer
+ non_gap_columns = [column
+ for column, monomer in zip(self.columns, new_row)
+ if monomer
+ ]
+ for monomer, column in zip(sequence, non_gap_columns):
+ column[sequence] = monomer

def _replace_contents(self, new, copy_descriptions, copy_contents):
"""Replace alignment contents with those of other alignment."""
diff -r ce77534f6594 -r 5e666e28c348 test/freqs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/freqs.py Thu Feb 03 13:18:05 2011 +0300
@@ -0,0 +1,28 @@
+"""Read alignment on stdin. Print CSV table of letter frequences on stdout.
+"""
+from allpy import protein
+from allpy.data import codes
+import sys
+
+sys.stderr.write(__doc__)
+
+def freq(monomer):
+ amount = freqs.get(monomer)
+ if amount:
+ return 100.0 * amount / width
+ return ""
+
+aln = protein.Alignment().append_file(sys.stdin)
+monomers = [code1 for code1, modified, _, _ in codes.protein if not modified]
+monomers += ["-"]
+width = len(aln.sequences)
+print ", ".join(map(str, monomers))
+for column in aln.columns_as_lists():
+ freqs = {}
+ for monomer in column:
+ if monomer:
+ monomer = monomer.code1
+ else:
+ monomer = "-"
+ freqs[monomer] = freqs.get(monomer, 0) + 1
+ print ", ".join(map(str, map(freq, monomers)))