Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/22587e0d54b6
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:09:09 2012
Кодировка:
allpy: 22587e0d54b6

allpy

changeset 709:22587e0d54b6

Automated merge with ssh://kodomo.fbb.msu.ru/allpy
author Andrei <aba@belozersky.msu.ru>
date Thu, 07 Jul 2011 21:22:53 +0400
parents 8b03bd2f919a 55bb5aa2c929
children 9481d408ca93
files
diffstat 3 files changed, 136 insertions(+), 61 deletions(-) [+]
line diff
     1.1 --- a/allpy/homology.py	Thu Jul 07 19:32:44 2011 +0400
     1.2 +++ b/allpy/homology.py	Thu Jul 07 21:22:53 2011 +0400
     1.3 @@ -1,4 +1,6 @@
     1.4  from allpy import protein
     1.5 +from allpy import markups
     1.6 +
     1.7  
     1.8  class  MonomerHomology(object):
     1.9      """ Essentially, alignment is a set of monomer homology classes
    1.10 @@ -36,8 +38,9 @@
    1.11         + .write_class(file,list_of_monomer_ids,monomer_homology_class_id)
    1.12         + .write_block(file, block)
    1.13         + .compare_with(class)
    1.14 -         .highest_blocks()
    1.15 -         .alignment_blocks(alignment)
    1.16 +       + .highest_blocks()
    1.17 +       + .alignment_blocks(alignment)
    1.18 +         .case_homology
    1.19      """
    1.20      def __init__(self, next_class_id = 1):
    1.21           self.classes = {}
    1.22 @@ -347,4 +350,61 @@
    1.23              block = protein.Block.from_alignment(alignment, sequences = block_sequences, columns = block_columns)
    1.24              blocks.append(block)
    1.25  
    1.26 -        return blocks
    1.27 \ No newline at end of file
    1.28 +        return blocks
    1.29 +
    1.30 +    @staticmethod
    1.31 +    def case_homology(in_file_name,out_file_name, case):
    1.32 +        """ Makes homology file from  case sensitive input alignment
    1.33 +        RETURN number of classes
    1.34 +        """
    1.35 +        try: 
    1.36 +            f = open(in_file_name) 
    1.37 +        except:
    1.38 +            raise Exception("Failed to open file %s" % fasta_file_name)
    1.39 +            exit()
    1.40 +        try:
    1.41 +            alignment = protein.Alignment().append_file(f) 
    1.42 +            f.close()
    1.43 +        except:
    1.44 +            raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name)
    1.45 +            exit()
    1.46 +        try:  
    1.47 +            g = open(out_file_name, 'w')
    1.48 +        except:
    1.49 +            raise Exception("Failed to open output file %s!" % out_file_name)
    1.50 +            exit()
    1.51 +    
    1.52 +    # MARKUPING    
    1.53 +        markups.AlignmentNumberMarkup(alignment)
    1.54 +    
    1.55 +        for sequence in alignment.sequences: 
    1.56 +            markups.SequenceNumberMarkup(sequence)
    1.57 +            if case:
    1.58 +                markups.SequenceCaseMarkup(sequence)
    1.59 +        
    1.60 +    
    1.61 +    #letters = ''.join(v[0] for v in seq.markups['case'].as_list())
    1.62 +    
    1.63 +    # WRITING CLASSES    
    1.64 +        next_class_id = 1
    1.65 +    
    1.66 +        for column in alignment.columns: 
    1.67 +            column_number = alignment.markups['number'][column]
    1.68 +            column_class = []
    1.69 +    
    1.70 +            for sequence in column:
    1.71 +                monomer = column[sequence]
    1.72 +                monomer_id = (sequence.name, monomer.number) 
    1.73 +                if case:
    1.74 +                    if monomer.case == "lower":              
    1.75 +                        MonomerHomology.write_monomer(g,monomer_id,next_class_id,column_number)
    1.76 +                        next_class_id += 1
    1.77 +                        continue
    1.78 +                column_class.append(monomer_id) 
    1.79 +    
    1.80 +            if len(column_class) > 0:
    1.81 +                MonomerHomology.write_class(g,column_class,next_class_id, column_number)
    1.82 +                next_class_id += 1
    1.83 +    
    1.84 +        g.close()    
    1.85 +        return next_class_id - 1
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/utils/make_homologies.py	Thu Jul 07 21:22:53 2011 +0400
     2.3 @@ -0,0 +1,72 @@
     2.4 +import sys 
     2.5 +
     2.6 +from allpy import protein
     2.7 +from allpy.homology import MonomerHomology
     2.8 +from allpy import markups
     2.9 +
    2.10 +import optparse
    2.11 +import sys
    2.12 +
    2.13 +
    2.14 +if len(sys.argv) == 1:                                                     
    2.15 +    print("Makes homology files from (case sensitive by request) input alignments")
    2.16 +    print("Type 'python make_homologies.py -h' for parameters")                                                                                                       
    2.17 +    exit()                                                                                                                                             
    2.18 +                                                                                                                                                       
    2.19 +parser = optparse.OptionParser()                                                                                                                       
    2.20 +
    2.21 +parser.add_option("-c", "--case", action="store_true", help="Case sensitive interpretation of input alignment (False if -c missed)", dest="case")
    2.22 +parser.add_option("-n", "--file_names", help="File with alignment fasta files names", dest="file_names")
    2.23 +
    2.24 +parser.add_option("-d", "--base_in", help="Name of directory with alignment files (default=\"\")", dest="base_in", default="")
    2.25 +parser.add_option("-s", "--suffix", help="Suffix for out files", dest="suffix", default = "hom")                                                           
    2.26 +parser.add_option("-o", "--base_out", help="Name of directory for outfiles (default=\"\")", dest="base_out", default="")
    2.27 +
    2.28 +# FOR ONE INPUT ALIGNMENT
    2.29 +#parser.add_option("-i", "--in_file", help="File with an alignment in fasta", dest="in_file")                                                           
    2.30 +#parser.add_option("-o", "--result", help="Output file with monomer homology classes (default: result.xls)", dest="out_file", default = "result.xls")
    2.31 +
    2.32 +options, args = parser.parse_args()                                                                                                                    
    2.33 +vars().update(vars(options))                              
    2.34 +                                                                                        
    2.35 +
    2.36 +try: 
    2.37 +    f = open(file_names)
    2.38 +except:
    2.39 +    raise Exception("File %s does not exists! Use python make_homologies.py -h for parameters" % file_names)
    2.40 +
    2.41 +
    2.42 +
    2.43 +alignment_names = []
    2.44 +for line in f:
    2.45 +    line = line.strip()
    2.46 +    if len(line) == 0:
    2.47 +        continue
    2.48 +    line = line.strip().split("#")[0]
    2.49 +    if len(line) == 0:
    2.50 +        continue
    2.51 +    alignment_names.append(line.split()[0])
    2.52 +
    2.53 +
    2.54 +if len(base_in) > 0:
    2.55 +    if base_in[-1]!= "/":
    2.56 +        base_in = base_in + "/"
    2.57 +
    2.58 +if len(base_out) > 0:
    2.59 +    if base_out[-1]!= "/":
    2.60 +        base_out = base_out + "/"
    2.61 +
    2.62 +
    2.63 +print("Wait...")
    2.64 +
    2.65 +for alignment_name in alignment_names:
    2.66 +    in_file_name = base_in + alignment_name
    2.67 +    out_file_name = base_out + alignment_name.partition(".fasta")[0] + ".xls"
    2.68 +
    2.69 +    classes_number = MonomerHomology.case_homology(in_file_name,out_file_name, case)
    2.70 +    print ("File %s: %s monomer homology classes stored" % (out_file_name,classes_number))
    2.71 +
    2.72 +print("...Done")
    2.73 +
    2.74 +
    2.75 +
     3.1 --- a/utils/make_homology.py	Thu Jul 07 19:32:44 2011 +0400
     3.2 +++ b/utils/make_homology.py	Thu Jul 07 21:22:53 2011 +0400
     3.3 @@ -5,63 +5,6 @@
     3.4  import optparse
     3.5  import sys
     3.6  
     3.7 -def case_homology(fasta_file_name,homology_file_name, case):
     3.8 -    """ Makes homology file from  case sensitive input alignment
     3.9 -    RETURN number of classes
    3.10 -    """
    3.11 -    try: 
    3.12 -        f = open(in_file) 
    3.13 -    except:
    3.14 -        raise Exception("Failed to open file %s!" % fasta_file_name)
    3.15 -        exit()
    3.16 -    try:
    3.17 -        alignment = protein.Alignment().append_file(f) 
    3.18 -        f.close()
    3.19 -    except:
    3.20 -        raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name)
    3.21 -        exit()
    3.22 -    try:  
    3.23 -        g = open(homology_file_name, 'w')
    3.24 -    except:
    3.25 -        raise Exception("Failed to open output file %s!" % homology_file_name)
    3.26 -        exit()
    3.27 -
    3.28 -# MARKUPING    
    3.29 -    markups.AlignmentNumberMarkup(alignment)
    3.30 -
    3.31 -    for sequence in alignment.sequences: 
    3.32 -        markups.SequenceNumberMarkup(sequence)
    3.33 -        if case:
    3.34 -            markups.SequenceCaseMarkup(sequence)
    3.35 -    
    3.36 -
    3.37 -#letters = ''.join(v[0] for v in seq.markups['case'].as_list())
    3.38 -
    3.39 -# WRITING CLASSES    
    3.40 -    next_class_id = 1
    3.41 -
    3.42 -    for column in alignment.columns: 
    3.43 -        column_number = alignment.markups['number'][column]
    3.44 -        column_class = []
    3.45 -
    3.46 -        for sequence in column:
    3.47 -            monomer = column[sequence]
    3.48 -            monomer_id = (sequence.name, monomer.number) 
    3.49 -            if case:
    3.50 -                if monomer.case == "lower":              
    3.51 -                    MonomerHomology.write_monomer(g,monomer_id,next_class_id,column_number)
    3.52 -                    next_class_id += 1
    3.53 -                    continue
    3.54 -            column_class.append(monomer_id) 
    3.55 -
    3.56 -        if len(column_class) > 0:
    3.57 -            MonomerHomology.write_class(g,column_class,next_class_id, column_number)
    3.58 -            next_class_id += 1
    3.59 -
    3.60 -    g.close()    
    3.61 -    return next_class_id - 1
    3.62 -    
    3.63 -#######################################################################################
    3.64  if len(sys.argv) == 1:                                                     
    3.65      print("Makes homology file from  case sensitive input alignment")
    3.66      print("Type 'python test_homology.py -h' for parameters")                                                                                                       
    3.67 @@ -78,7 +21,7 @@
    3.68                                                                                                                                                         
    3.69  print("Wait...")                                                                                                                                       
    3.70  
    3.71 -classes_number = case_homology(in_file,out_file, case)
    3.72 +classes_number = MonomerHomology.case_homology(in_file,out_file, case)
    3.73  
    3.74  print ("%s monomer homology classes stored" % classes_number)
    3.75  print("...Done")