allpy
changeset 709:22587e0d54b6
Automated merge with ssh://kodomo.fbb.msu.ru/allpy
author | Andrei <aba@belozersky.msu.ru> |
---|---|
date | Thu, 07 Jul 2011 21:22:53 +0400 |
parents | 8b03bd2f919a 55bb5aa2c929 |
children | 9481d408ca93 |
files | |
diffstat | 3 files changed, 136 insertions(+), 61 deletions(-) [+] |
line diff
1.1 --- a/allpy/homology.py Thu Jul 07 19:32:44 2011 +0400 1.2 +++ b/allpy/homology.py Thu Jul 07 21:22:53 2011 +0400 1.3 @@ -1,4 +1,6 @@ 1.4 from allpy import protein 1.5 +from allpy import markups 1.6 + 1.7 1.8 class MonomerHomology(object): 1.9 """ Essentially, alignment is a set of monomer homology classes 1.10 @@ -36,8 +38,9 @@ 1.11 + .write_class(file,list_of_monomer_ids,monomer_homology_class_id) 1.12 + .write_block(file, block) 1.13 + .compare_with(class) 1.14 - .highest_blocks() 1.15 - .alignment_blocks(alignment) 1.16 + + .highest_blocks() 1.17 + + .alignment_blocks(alignment) 1.18 + .case_homology 1.19 """ 1.20 def __init__(self, next_class_id = 1): 1.21 self.classes = {} 1.22 @@ -347,4 +350,61 @@ 1.23 block = protein.Block.from_alignment(alignment, sequences = block_sequences, columns = block_columns) 1.24 blocks.append(block) 1.25 1.26 - return blocks 1.27 \ No newline at end of file 1.28 + return blocks 1.29 + 1.30 + @staticmethod 1.31 + def case_homology(in_file_name,out_file_name, case): 1.32 + """ Makes homology file from case sensitive input alignment 1.33 + RETURN number of classes 1.34 + """ 1.35 + try: 1.36 + f = open(in_file_name) 1.37 + except: 1.38 + raise Exception("Failed to open file %s" % fasta_file_name) 1.39 + exit() 1.40 + try: 1.41 + alignment = protein.Alignment().append_file(f) 1.42 + f.close() 1.43 + except: 1.44 + raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name) 1.45 + exit() 1.46 + try: 1.47 + g = open(out_file_name, 'w') 1.48 + except: 1.49 + raise Exception("Failed to open output file %s!" % out_file_name) 1.50 + exit() 1.51 + 1.52 + # MARKUPING 1.53 + markups.AlignmentNumberMarkup(alignment) 1.54 + 1.55 + for sequence in alignment.sequences: 1.56 + markups.SequenceNumberMarkup(sequence) 1.57 + if case: 1.58 + markups.SequenceCaseMarkup(sequence) 1.59 + 1.60 + 1.61 + #letters = ''.join(v[0] for v in seq.markups['case'].as_list()) 1.62 + 1.63 + # WRITING CLASSES 1.64 + next_class_id = 1 1.65 + 1.66 + for column in alignment.columns: 1.67 + column_number = alignment.markups['number'][column] 1.68 + column_class = [] 1.69 + 1.70 + for sequence in column: 1.71 + monomer = column[sequence] 1.72 + monomer_id = (sequence.name, monomer.number) 1.73 + if case: 1.74 + if monomer.case == "lower": 1.75 + MonomerHomology.write_monomer(g,monomer_id,next_class_id,column_number) 1.76 + next_class_id += 1 1.77 + continue 1.78 + column_class.append(monomer_id) 1.79 + 1.80 + if len(column_class) > 0: 1.81 + MonomerHomology.write_class(g,column_class,next_class_id, column_number) 1.82 + next_class_id += 1 1.83 + 1.84 + g.close() 1.85 + return next_class_id - 1
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/utils/make_homologies.py Thu Jul 07 21:22:53 2011 +0400 2.3 @@ -0,0 +1,72 @@ 2.4 +import sys 2.5 + 2.6 +from allpy import protein 2.7 +from allpy.homology import MonomerHomology 2.8 +from allpy import markups 2.9 + 2.10 +import optparse 2.11 +import sys 2.12 + 2.13 + 2.14 +if len(sys.argv) == 1: 2.15 + print("Makes homology files from (case sensitive by request) input alignments") 2.16 + print("Type 'python make_homologies.py -h' for parameters") 2.17 + exit() 2.18 + 2.19 +parser = optparse.OptionParser() 2.20 + 2.21 +parser.add_option("-c", "--case", action="store_true", help="Case sensitive interpretation of input alignment (False if -c missed)", dest="case") 2.22 +parser.add_option("-n", "--file_names", help="File with alignment fasta files names", dest="file_names") 2.23 + 2.24 +parser.add_option("-d", "--base_in", help="Name of directory with alignment files (default=\"\")", dest="base_in", default="") 2.25 +parser.add_option("-s", "--suffix", help="Suffix for out files", dest="suffix", default = "hom") 2.26 +parser.add_option("-o", "--base_out", help="Name of directory for outfiles (default=\"\")", dest="base_out", default="") 2.27 + 2.28 +# FOR ONE INPUT ALIGNMENT 2.29 +#parser.add_option("-i", "--in_file", help="File with an alignment in fasta", dest="in_file") 2.30 +#parser.add_option("-o", "--result", help="Output file with monomer homology classes (default: result.xls)", dest="out_file", default = "result.xls") 2.31 + 2.32 +options, args = parser.parse_args() 2.33 +vars().update(vars(options)) 2.34 + 2.35 + 2.36 +try: 2.37 + f = open(file_names) 2.38 +except: 2.39 + raise Exception("File %s does not exists! Use python make_homologies.py -h for parameters" % file_names) 2.40 + 2.41 + 2.42 + 2.43 +alignment_names = [] 2.44 +for line in f: 2.45 + line = line.strip() 2.46 + if len(line) == 0: 2.47 + continue 2.48 + line = line.strip().split("#")[0] 2.49 + if len(line) == 0: 2.50 + continue 2.51 + alignment_names.append(line.split()[0]) 2.52 + 2.53 + 2.54 +if len(base_in) > 0: 2.55 + if base_in[-1]!= "/": 2.56 + base_in = base_in + "/" 2.57 + 2.58 +if len(base_out) > 0: 2.59 + if base_out[-1]!= "/": 2.60 + base_out = base_out + "/" 2.61 + 2.62 + 2.63 +print("Wait...") 2.64 + 2.65 +for alignment_name in alignment_names: 2.66 + in_file_name = base_in + alignment_name 2.67 + out_file_name = base_out + alignment_name.partition(".fasta")[0] + ".xls" 2.68 + 2.69 + classes_number = MonomerHomology.case_homology(in_file_name,out_file_name, case) 2.70 + print ("File %s: %s monomer homology classes stored" % (out_file_name,classes_number)) 2.71 + 2.72 +print("...Done") 2.73 + 2.74 + 2.75 +
3.1 --- a/utils/make_homology.py Thu Jul 07 19:32:44 2011 +0400 3.2 +++ b/utils/make_homology.py Thu Jul 07 21:22:53 2011 +0400 3.3 @@ -5,63 +5,6 @@ 3.4 import optparse 3.5 import sys 3.6 3.7 -def case_homology(fasta_file_name,homology_file_name, case): 3.8 - """ Makes homology file from case sensitive input alignment 3.9 - RETURN number of classes 3.10 - """ 3.11 - try: 3.12 - f = open(in_file) 3.13 - except: 3.14 - raise Exception("Failed to open file %s!" % fasta_file_name) 3.15 - exit() 3.16 - try: 3.17 - alignment = protein.Alignment().append_file(f) 3.18 - f.close() 3.19 - except: 3.20 - raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name) 3.21 - exit() 3.22 - try: 3.23 - g = open(homology_file_name, 'w') 3.24 - except: 3.25 - raise Exception("Failed to open output file %s!" % homology_file_name) 3.26 - exit() 3.27 - 3.28 -# MARKUPING 3.29 - markups.AlignmentNumberMarkup(alignment) 3.30 - 3.31 - for sequence in alignment.sequences: 3.32 - markups.SequenceNumberMarkup(sequence) 3.33 - if case: 3.34 - markups.SequenceCaseMarkup(sequence) 3.35 - 3.36 - 3.37 -#letters = ''.join(v[0] for v in seq.markups['case'].as_list()) 3.38 - 3.39 -# WRITING CLASSES 3.40 - next_class_id = 1 3.41 - 3.42 - for column in alignment.columns: 3.43 - column_number = alignment.markups['number'][column] 3.44 - column_class = [] 3.45 - 3.46 - for sequence in column: 3.47 - monomer = column[sequence] 3.48 - monomer_id = (sequence.name, monomer.number) 3.49 - if case: 3.50 - if monomer.case == "lower": 3.51 - MonomerHomology.write_monomer(g,monomer_id,next_class_id,column_number) 3.52 - next_class_id += 1 3.53 - continue 3.54 - column_class.append(monomer_id) 3.55 - 3.56 - if len(column_class) > 0: 3.57 - MonomerHomology.write_class(g,column_class,next_class_id, column_number) 3.58 - next_class_id += 1 3.59 - 3.60 - g.close() 3.61 - return next_class_id - 1 3.62 - 3.63 -####################################################################################### 3.64 if len(sys.argv) == 1: 3.65 print("Makes homology file from case sensitive input alignment") 3.66 print("Type 'python test_homology.py -h' for parameters") 3.67 @@ -78,7 +21,7 @@ 3.68 3.69 print("Wait...") 3.70 3.71 -classes_number = case_homology(in_file,out_file, case) 3.72 +classes_number = MonomerHomology.case_homology(in_file,out_file, case) 3.73 3.74 print ("%s monomer homology classes stored" % classes_number) 3.75 print("...Done")