allpy
changeset 696:be07cbd25fd3
The util makes residue holomogy file from fasta alignment. May take in account case of letters
author | Andrei <aba@belozersky.msu.ru> |
---|---|
date | Wed, 06 Jul 2011 11:42:41 +0400 |
parents | fd3481013cd5 |
children | 53d142099931 |
files | utils/make_homology.py |
diffstat | 1 files changed, 85 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/utils/make_homology.py Wed Jul 06 11:42:41 2011 +0400 1.3 @@ -0,0 +1,85 @@ 1.4 +from allpy import protein 1.5 +from allpy.homology import ResidueHomology 1.6 +from allpy import markups 1.7 + 1.8 +import optparse 1.9 +import sys 1.10 + 1.11 +def case_homology(fasta_file_name,homology_file_name, case): 1.12 + """ Makes homology file from case sensitive input alignment 1.13 + RETURN number of classes 1.14 + """ 1.15 + try: 1.16 + f = open(in_file) 1.17 + except: 1.18 + raise Exception("Failed to open file %s!" % fasta_file_name) 1.19 + exit() 1.20 + try: 1.21 + alignment = protein.Alignment().append_file(f) 1.22 + f.close() 1.23 + except: 1.24 + raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name) 1.25 + exit() 1.26 + try: 1.27 + g = open(homology_file_name, 'w') 1.28 + except: 1.29 + raise Exception("Failed to open output file %s!" % homology_file_name) 1.30 + exit() 1.31 + 1.32 +# MARKUPING 1.33 + markups.AlignmentNumberMarkup(alignment) 1.34 + 1.35 + for sequence in alignment.sequences: 1.36 + markups.SequenceNumberMarkup(sequence) 1.37 + if case: 1.38 + markups.SequenceCaseMarkup(sequence) 1.39 + 1.40 + 1.41 +#letters = ''.join(v[0] for v in seq.markups['case'].as_list()) 1.42 + 1.43 +# WRITING CLASSES 1.44 + next_class_id = 1 1.45 + 1.46 + for column in alignment.columns: 1.47 + column_number = alignment.markups['number'][column] 1.48 + column_class = [] 1.49 + 1.50 + for sequence in column: 1.51 + monomer = column[sequence] 1.52 + monomer_id = (sequence.name, monomer.number) 1.53 + if case: 1.54 + if monomer.case == "lower": 1.55 + ResidueHomology.write_monomer(g,monomer_id,next_class_id,column_number) 1.56 + next_class_id += 1 1.57 + continue 1.58 + column_class.append(monomer_id) 1.59 + 1.60 + if len(column_class) > 0: 1.61 + ResidueHomology.write_class(g,column_class,next_class_id, column_number) 1.62 + next_class_id += 1 1.63 + 1.64 + g.close() 1.65 + return next_class_id - 1 1.66 + 1.67 +####################################################################################### 1.68 +if len(sys.argv) == 1: 1.69 + print("Makes homology file from case sensitive input alignment") 1.70 + print("Type 'python test_homology.py -h' for parameters") 1.71 + exit() 1.72 + 1.73 +parser = optparse.OptionParser() 1.74 +parser.add_option("-i", "--in_file", help="File with alignment in fasta", dest="in_file") 1.75 +parser.add_option("-o", "--result", help="Output file with residue homology classes (default: result.xls)", dest="out_file", default = "result.xls") 1.76 +parser.add_option("-c", "--case", action="store_true", help="Case sensitive interpretation of input alignment (False if -c missed)", dest="case") 1.77 + 1.78 +options, args = parser.parse_args() 1.79 +vars().update(vars(options)) 1.80 + 1.81 + 1.82 +print("Wait...") 1.83 + 1.84 +classes_number = case_homology(in_file,out_file, case) 1.85 + 1.86 +print ("%s residue homology classes stored" % classes_number) 1.87 +print("...Done") 1.88 +