allpy
changeset 395:5e666e28c348
merge
author | boris <bnagaev@gmail.com> |
---|---|
date | Thu, 03 Feb 2011 13:18:05 +0300 |
parents | ce77534f6594 7184863832b9 |
children | 3a802644d798 |
files | |
diffstat | 2 files changed, 47 insertions(+), 19 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Thu Feb 03 12:24:54 2011 +0300 1.2 +++ b/allpy/base.py Thu Feb 03 13:18:05 2011 +0300 1.3 @@ -158,34 +158,31 @@ 1.4 If sequence is too short, pad it with gaps on the right. 1.5 """ 1.6 self.sequences.append(sequence) 1.7 - for i, monomer in enumerate(sequence): 1.8 - self._column_at(i)[sequence] = monomer 1.9 + self._pad_to_width(len(sequence)) 1.10 + for column, monomer in zip(self.columns, sequence): 1.11 + column[sequence] = monomer 1.12 return self 1.13 1.14 def append_row_from_string(self, string, 1.15 name='', description='', source='', gaps=default_gaps): 1.16 """Add row from a string of one-letter codes and gaps. Return self.""" 1.17 Sequence = self.types.Sequence 1.18 - not_gap = lambda (i, char): char not in gaps 1.19 without_gaps = util.remove_each(string, gaps) 1.20 sequence = Sequence.from_string(without_gaps, name, description, source) 1.21 - # The following line has some simple magic: 1.22 - # 1. attach natural numbers to monomers 1.23 - # 2. delete gaps 1.24 - # 3. attach numbers again 1.25 - # This way we have a pair of numbers attached to monomer: 1.26 - # - it's position in alignment (the first attached number, j) 1.27 - # - it's position in sequence (the second attached number, i) 1.28 - for i, (j, char) in enumerate(filter(not_gap, enumerate(string))): 1.29 - self._column_at(j)[sequence] = sequence[i] 1.30 + self._pad_to_width(len(string)) 1.31 + non_gap_columns = [column 1.32 + for column, char in zip(self.columns, string) 1.33 + if char not in gaps 1.34 + ] 1.35 + for monomer, column in zip(sequence, non_gap_columns): 1.36 + column[sequence] = monomer 1.37 self.sequences.append(sequence) 1.38 return self 1.39 1.40 - def _column_at(self, n): 1.41 - """Return column by index. Create new columns if required.""" 1.42 - for i in range(len(self.columns), n + 1): 1.43 + def _pad_to_width(self, n): 1.44 + """Pad alignment with empty columns on the right to width n.""" 1.45 + for i in range(len(self.columns), n): 1.46 self.columns.append(Column()) 1.47 - return self.columns[n] 1.48 1.49 def append_file(self, file, format='fasta', gaps=default_gaps): 1.50 """Append sequences from file to alignment. Return self. 1.51 @@ -351,9 +348,12 @@ 1.52 not_gap = lambda (a,b): a != None 1.53 for sequence, new_row in zip(self.sequences, new.rows_as_lists()): 1.54 assert len(sequence) == len(new_row.sequence) 1.55 - zipped = zip(sequence, filter(not_gap, enumerate(new_row))) 1.56 - for monomer, (i, _) in zipped: 1.57 - self._column_at(i)[sequence] = monomer 1.58 + non_gap_columns = [column 1.59 + for column, monomer in zip(self.columns, new_row) 1.60 + if monomer 1.61 + ] 1.62 + for monomer, column in zip(sequence, non_gap_columns): 1.63 + column[sequence] = monomer 1.64 1.65 def _replace_contents(self, new, copy_descriptions, copy_contents): 1.66 """Replace alignment contents with those of other alignment."""
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/test/freqs.py Thu Feb 03 13:18:05 2011 +0300 2.3 @@ -0,0 +1,28 @@ 2.4 +"""Read alignment on stdin. Print CSV table of letter frequences on stdout. 2.5 +""" 2.6 +from allpy import protein 2.7 +from allpy.data import codes 2.8 +import sys 2.9 + 2.10 +sys.stderr.write(__doc__) 2.11 + 2.12 +def freq(monomer): 2.13 + amount = freqs.get(monomer) 2.14 + if amount: 2.15 + return 100.0 * amount / width 2.16 + return "" 2.17 + 2.18 +aln = protein.Alignment().append_file(sys.stdin) 2.19 +monomers = [code1 for code1, modified, _, _ in codes.protein if not modified] 2.20 +monomers += ["-"] 2.21 +width = len(aln.sequences) 2.22 +print ", ".join(map(str, monomers)) 2.23 +for column in aln.columns_as_lists(): 2.24 + freqs = {} 2.25 + for monomer in column: 2.26 + if monomer: 2.27 + monomer = monomer.code1 2.28 + else: 2.29 + monomer = "-" 2.30 + freqs[monomer] = freqs.get(monomer, 0) + 1 2.31 + print ", ".join(map(str, map(freq, monomers)))