Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/raw-rev/38888b46c342
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 07:27:55 2012
Кодировка:

# HG changeset patch
# User Daniil Alexeyevsky
# Date 1292535920 -10800
# Node ID 38888b46c342b9f51d12d4cd92947da07c7ad2c9
# Parent 2a6c2cffa891feca550fd7f18a9ad461600150c2
Added usecases from wiki to test/ directory

diff -r 2a6c2cffa891 -r 38888b46c342 test/usecase1.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/usecase1.py Fri Dec 17 00:45:20 2010 +0300
@@ -0,0 +1,35 @@
+from allpy import protein
+
+# Create sequences from string representation of sequence body
+sequence_1 = allpy.protein.Sequence.from_string("mkstf", name="E2E4")
+sequence_2 = allpy.protein.Sequence.from_string("mstkfff")
+
+# Create alignment from sequences
+alignment = protein.Alignment()
+alignment.append_sequence(sequence_1)
+alignment.append_sequence(sequence_2)
+alignment.realign("muscle")
+
+# For each sequence, print number of gaps and non-gaps in alignment
+for row in alignment.rows():
+ gaps = 0
+ monomers = 0
+ for column in alignment.columns:
+ if column in row:
+ monomers += 1
+ else:
+ gaps += 1
+ print "%s: %s gaps, %s non-gaps" % (row.sequence.name, gaps, monomers)
+
+# Print number of gaps in each column
+gaps = []
+for column in alignment.columns:
+ column_gaps = 0
+ for sequence in alignment.sequences:
+ if sequence not in column:
+ column_gaps += 1
+ gaps.append(column_gaps)
+print " ".join(map(str, gaps))
+
+# Write alignment to file
+alingment.to_fasta(open("new_file.fasta", "w"))
diff -r 2a6c2cffa891 -r 38888b46c342 test/usecase2.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/usecase2.py Fri Dec 17 00:45:20 2010 +0300
@@ -0,0 +1,72 @@
+# Fragments are in pair_repeat.fasta
+from allpy import dna
+width = 15
+treshold = 14
+
+def my_column_mark(column, threshold):
+ """Helper to mark column (given as dict) by identity."""
+ count = {}
+ for sequence, monomer in column:
+ code = monomer.code1
+ count[code] = count.get(code, 0) + 1
+ for code in count:
+ if count[code] > threshold:
+ return "+"
+ return "-"
+
+def my_pair_mark(column_as_list):
+ """Helper to mark column of 2 sequences (given as list) by identity."""
+ if column[0] is None or column[1] is None:
+ return "-"
+ if column[0].code1 == column[0].code1:
+ return "+"
+ return "-"
+
+def find_runs(markup):
+ """Fund long positive runs.
+
+ This obscure and probably broken function has nothing to do with allpy,
+ so it's presence in the example is unnecessary.
+ """
+ position = 0
+ count = 0
+ plus_positions=[]
+ for i in range(len(column_marks)):
+ position += 1
+ if position < width :
+ if column_marks[i]=="+":
+ count += 1
+ column_window_marks += "-"
+ continue
+ if position > width:
+ if column_marks[i-width] == "+":
+ count -=1
+ if count >= treshold:
+ plus_positions.append(position)
+ if len(positions)==0:
+ raise Exception("No blocks in alignment")
+ blocks=[]
+ start = positions[0]-width + 1
+ stop = positions[0]
+
+ for p in positions[1:]:
+ if p == stop +1:
+ stop = p
+ cuntinue
+ blocks.append((start,stop))
+ start = p - width + 1
+ stop = p
+
+def main():
+ alignment = dna.Alignment.from_fasta(open("pair_repeat.fasta"))
+ if alignment.height != 2:
+ raise Exception("Input must have exactly 2 sequences!")
+ alignment.realign("needle", gap_open = 0)
+ markup = alignment.map_columns(my_pair_mark)
+ print find_runs(markup)
+
+try:
+ main()
+except Exception, e:
+ print "An error has occured:", e
+
diff -r 2a6c2cffa891 -r 38888b46c342 test/usecase3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/usecase3.py Fri Dec 17 00:45:20 2010 +0300
@@ -0,0 +1,36 @@
+from allpy import protein
+alignment = protein.Alignment.from_fasta(open("aln.fasta"))
+#conservative = [(10,20), (40,50)]
+conservative = [(0,6),(18,37)]
+
+def ranges_to_markup(ranges):
+ """Convert list of ranges to line of markup.
+
+ This has nothing to do with allpy.
+ """
+ markup = ["-"] * len(alignment.columns)
+ for begin, end in ranges:
+ for i in range(begin, end+1):
+ markup[i] = "+"
+ return "".join(markup)
+
+def markup_to_blocks(markup):
+ """Convert markup line to a bunch of blocks, one for each sequential run."""
+ current = None
+ blocks = {}
+ for mark, column in zip(markup, alignment.columns):
+ if mark != current:
+ block = protein.Block.from_alignment(alignment, columns=[])
+ blocks[mark] = blocks.get(mark, []) + [block]
+ current = mark
+ blocks[mark][-1].columns.append(column)
+ return blocks
+
+def main():
+ markup = ranges_to_markup(conservative)
+ blocks = markup_to_blocks(markup)
+ for block in blocks["-"]:
+ block.flush_left()
+ alignment.to_fasta(open("output.fasta", "w"))
+
+main()