allpy
changeset 175:fb3164f03984
Project --> Alignment
author | boris <bnagaev@gmail.com> |
---|---|
date | Wed, 03 Nov 2010 19:12:23 +0300 |
parents | afe1db2a19a2 |
children | 6dfc736f37a9 |
files | geometrical_core/geometrical_core.py lib/block.py lib/project.py lib/sequence.py sec_str/sec_str.py test/test.py |
diffstat | 6 files changed, 47 insertions(+), 47 deletions(-) [+] |
line diff
1.1 --- a/geometrical_core/geometrical_core.py Wed Nov 03 19:08:20 2010 +0300 1.2 +++ b/geometrical_core/geometrical_core.py Wed Nov 03 19:12:23 2010 +0300 1.3 @@ -3,9 +3,9 @@ 1.4 version 2.0 1.5 """ 1.6 1.7 -from allpy.lib import config, project, block 1.8 +from allpy.lib import config, alignment, block 1.9 Block = block.Block 1.10 -Project = project.Project 1.11 +Alignment = alignment.Alignment 1.12 import argparse 1.13 import os 1.14 from tempfile import NamedTemporaryFile 1.15 @@ -112,7 +112,7 @@ 1.16 exit() 1.17 1.18 try: 1.19 - project = Project(args.i) 1.20 + alignment = Alignment(args.i) 1.21 except: 1.22 args.i.close() 1.23 tmp_file = NamedTemporaryFile(delete=False) 1.24 @@ -120,9 +120,9 @@ 1.25 os.system('seqret %(msf)s %(fasta)s' % \ 1.26 {'msf': args.i.name, 'fasta': tmp_file.name}) 1.27 args.i = open(tmp_file.name) 1.28 - project = Project(args.i) 1.29 + alignment = Alignment(args.i) 1.30 1.31 - block = Block(project) 1.32 + block = Block(alignment) 1.33 GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 1.34 minsize=args.t, ac_new_atoms=args.n, ac_count=args.a) 1.35
2.1 --- a/lib/block.py Wed Nov 03 19:08:20 2010 +0300 2.2 +++ b/lib/block.py Wed Nov 03 19:12:23 2010 +0300 2.3 @@ -2,7 +2,7 @@ 2.4 2.5 import sys 2.6 2.7 -import project 2.8 +import alignment 2.9 import sequence 2.10 import monomer 2.11 import config 2.12 @@ -16,32 +16,32 @@ 2.13 """ Block of alignment 2.14 2.15 Mandatory data: 2.16 - * self.project -- project object, which the block belongs to 2.17 + * self.alignment -- alignment object, which the block belongs to 2.18 * self.sequences - set of sequence objects that contain monomers 2.19 and/or gaps, that constitute the block 2.20 - * self.positions -- list of positions of the project.body that 2.21 + * self.positions -- list of positions of the alignment.body that 2.22 are included in the block; position[i+1] is always to the right from position[i] 2.23 2.24 Don't change self.sequences -- it may be a link to other block.sequences 2.25 2.26 How to create a new block: 2.27 - >>> import project 2.28 + >>> import alignment 2.29 >>> import block 2.30 - >>> proj = project.Project(open("test.fasta")) 2.31 + >>> proj = alignment.Alignment(open("test.fasta")) 2.32 >>> block1 = block.Block(proj) 2.33 """ 2.34 2.35 - def __init__(self, project, sequences=None, positions=None): 2.36 - """ Builds new block from project 2.37 + def __init__(self, alignment, sequences=None, positions=None): 2.38 + """ Builds new block from alignment 2.39 2.40 if sequences==None, all sequences are used 2.41 if positions==None, all positions are used 2.42 """ 2.43 if sequences == None: 2.44 - sequences = set(project.sequences) # copy 2.45 + sequences = set(alignment.sequences) # copy 2.46 if positions == None: 2.47 - positions = range(len(project)) 2.48 - self.project = project 2.49 + positions = range(len(alignment)) 2.50 + self.alignment = alignment 2.51 self.sequences = sequences 2.52 self.positions = positions 2.53 2.54 @@ -52,7 +52,7 @@ 2.55 are made. 2.56 """ 2.57 for sequence in self.sequences: 2.58 - alignment_monomers = self.project.body[sequence] 2.59 + alignment_monomers = self.alignment.body[sequence] 2.60 block_monomers = [alignment_monomers[i] for i in self.positions] 2.61 string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) 2.62 save_fasta(out_file, string, sequence.name, sequence.description, long_line) 2.63 @@ -84,8 +84,8 @@ 2.64 distances = [] 2.65 for sequence in self.sequences: 2.66 for chain in sequence.pdb_chains: 2.67 - m1 = self.project.body[sequence][i] 2.68 - m2 = self.project.body[sequence][j] 2.69 + m1 = self.alignment.body[sequence][i] 2.70 + m2 = self.alignment.body[sequence][j] 2.71 if m1 and m2: 2.72 r1 = sequence.pdb_residues[chain][m1] 2.73 r2 = sequence.pdb_residues[chain][m2] 2.74 @@ -105,7 +105,7 @@ 2.75 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique): 2.76 break 2.77 else: 2.78 - GCs.append(Block(self.project, self.sequences, clique)) 2.79 + GCs.append(Block(self.alignment, self.sequences, clique)) 2.80 if ac_count != -1 and len(GCs) >= ac_count: 2.81 break 2.82 return GCs 2.83 @@ -113,9 +113,9 @@ 2.84 def xstring(self, x='X', gap='-'): 2.85 """ Returns string consisting of gap chars and chars x at self.positions 2.86 2.87 - Length of returning string = length of project 2.88 + Length of returning string = length of alignment 2.89 """ 2.90 - monomers = [False] * len(self.project) 2.91 + monomers = [False] * len(self.alignment) 2.92 for i in self.positions: 2.93 monomers[i] = True 2.94 return ''.join([x if m else gap for m in monomers]) 2.95 @@ -126,7 +126,7 @@ 2.96 2.97 def monomers(self, sequence): 2.98 """ Iterates monomers of this sequence from this block """ 2.99 - alignment_sequence = self.project.body[sequence] 2.100 + alignment_sequence = self.alignment.body[sequence] 2.101 return (alignment_sequence[i] for i in self.positions) 2.102 2.103 def ca_atoms(self, sequence, pdb_chain): 2.104 @@ -135,7 +135,7 @@ 2.105 2.106 def sequences_chains(self): 2.107 """ Iterates pairs (sequence, chain) """ 2.108 - for sequence in self.project.sequences: 2.109 + for sequence in self.alignment.sequences: 2.110 if sequence in self.sequences: 2.111 for chain in sequence.pdb_chains: 2.112 yield (sequence, chain)
3.1 --- a/lib/project.py Wed Nov 03 19:08:20 2010 +0300 3.2 +++ b/lib/project.py Wed Nov 03 19:12:23 2010 +0300 3.3 @@ -15,13 +15,13 @@ 3.4 import block 3.5 from fasta import save_fasta 3.6 3.7 -class Project(object): 3.8 +class Alignment(object): 3.9 """ Alignment 3.10 3.11 Mandatory data: 3.12 * sequences -- list of Sequence objects. Sequences don't contain gaps 3.13 - see sequence.py module 3.14 - * alignment -- dict 3.15 + * body -- dict 3.16 {<Sequence object>:[<Monomer object>,None,<Monomer object>]} 3.17 keys are the Sequence objects, values are the lists, which 3.18 contain monomers of those sequences or None for gaps in the 3.19 @@ -32,10 +32,10 @@ 3.20 def __init__(self, *args): 3.21 """overloaded constructor 3.22 3.23 - Project() -> new empty Project 3.24 - Project(sequences, body) -> new Project with sequences and 3.25 + Alignment() -> new empty Alignment 3.26 + Alignment(sequences, body) -> new Alignment with sequences and 3.27 body initialized from arguments 3.28 - Project(fasta_file) -> new Project, read body and sequences 3.29 + Alignment(fasta_file) -> new Alignment, read body and sequences 3.30 from fasta file 3.31 3.32 """ 3.33 @@ -46,7 +46,7 @@ 3.34 self.sequences=[] 3.35 self.body={} 3.36 else: 3.37 - self.sequences, self.body = Project.from_fasta(args[0]) 3.38 + self.sequences, self.body = Alignment.from_fasta(args[0]) 3.39 3.40 def __len__(self): 3.41 """ Returns width, ie length of each sequence with gaps """ 3.42 @@ -61,7 +61,7 @@ 3.43 3.44 For every (row, column) in alignment the percentage of the exactly 3.45 same residue in the same column in the alignment is calculated. 3.46 - The data structure is just like the Project.body, but istead of 3.47 + The data structure is just like the Alignment.body, but istead of 3.48 monomers it contains float percentages. 3.49 """ 3.50 # Oh, God, that's awful! Absolutely not understandable. 3.51 @@ -101,8 +101,8 @@ 3.52 3.53 monomer_kind is class, inherited from MonomerType 3.54 3.55 - >>> import project 3.56 - >>> sequences,body=project.Project.from_fasta(open("test.fasta")) 3.57 + >>> import alignment 3.58 + >>> sequences,body=alignment.Alignment.from_fasta(open("test.fasta")) 3.59 """ 3.60 import re 3.61 3.62 @@ -156,13 +156,13 @@ 3.63 3.64 Add None's to right end to make equal lengthes of alignment sequences 3.65 """ 3.66 - project = Project() 3.67 - project.sequences = sequences 3.68 + alignment = Alignment() 3.69 + alignment.sequences = sequences 3.70 max_length = max(len(sequence) for sequence in sequences) 3.71 for sequence in sequences: 3.72 gaps_count = max_length - len(sequence) 3.73 - project.body[sequence] = sequence.monomers + [None] * gaps_count 3.74 - return project 3.75 + alignment.body[sequence] = sequence.monomers + [None] * gaps_count 3.76 + return alignment 3.77 3.78 def save_fasta(self, out_file, long_line=70, gap='-'): 3.79 """ Saves alignment to given file 3.80 @@ -181,7 +181,7 @@ 3.81 self.save_fasta(tmp_file) 3.82 tmp_file.close() 3.83 os.system("muscle -in %(tmp)s -out %(tmp)s" % {'tmp': tmp_file.name}) 3.84 - sequences, body = Project.from_fasta(open(tmp_file.name)) 3.85 + sequences, body = Alignment.from_fasta(open(tmp_file.name)) 3.86 for sequence in self.sequences: 3.87 try: 3.88 new_sequence = [i for i in sequences if sequence==i][0]
4.1 --- a/lib/sequence.py Wed Nov 03 19:08:20 2010 +0300 4.2 +++ b/lib/sequence.py Wed Nov 03 19:12:23 2010 +0300 4.3 @@ -5,7 +5,7 @@ 4.4 from Bio.PDB import CaPPBuilder, PDBIO 4.5 from Bio.PDB.DSSP import make_dssp_dict 4.6 from allpy_pdb import std_id, pdb_id_parse, get_structure 4.7 -import project 4.8 +import alignment 4.9 import sys 4.10 import config 4.11 import os.path 4.12 @@ -82,7 +82,7 @@ 4.13 self.pdb_residues[chain] = {} 4.14 self.pdb_secstr[chain] = {} 4.15 pdb_sequence = Sequence.from_pdb_chain(chain) 4.16 - alignment = project.Project.from_sequences(self, pdb_sequence) 4.17 + alignment = alignment.Alignment.from_sequences(self, pdb_sequence) 4.18 alignment.muscle_align() 4.19 for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self): 4.20 if pdb_sequence.pdb_has(chain, pdb_monomer):
5.1 --- a/sec_str/sec_str.py Wed Nov 03 19:08:20 2010 +0300 5.2 +++ b/sec_str/sec_str.py Wed Nov 03 19:12:23 2010 +0300 5.3 @@ -3,9 +3,9 @@ 5.4 5.5 """ 5.6 5.7 -from allpy.lib import config, project, block 5.8 +from allpy.lib import config, alignment, block 5.9 Block = block.Block 5.10 -Project = project.Project 5.11 +Alignment = alignment.Alignment 5.12 import argparse 5.13 import sys 5.14 from allpy.lib.fasta import determine_long_line 5.15 @@ -29,7 +29,7 @@ 5.16 tmp_file = None 5.17 5.18 try: 5.19 - project = Project(args.i) 5.20 + alignment = Alignment(args.i) 5.21 except: 5.22 args.i.close() 5.23 tmp_file = NamedTemporaryFile(delete=False) 5.24 @@ -37,11 +37,11 @@ 5.25 os.system('seqret %(msf)s %(fasta)s' % \ 5.26 {'msf': args.i.name, 'fasta': tmp_file.name}) 5.27 args.i = open(tmp_file.name) 5.28 - project = Project(args.i) 5.29 + alignment = Alignment(args.i) 5.30 args.i.seek(0) 5.31 long_line = determine_long_line(args.i) 5.32 5.33 -block = Block(project) 5.34 +block = Block(alignment) 5.35 5.36 args.i.seek(0) 5.37 f = args.f 5.38 @@ -52,7 +52,7 @@ 5.39 sequence.pdb_auto_add() 5.40 chain = sequence.pdb_chains[0] 5.41 sequence.pdb_add_sec_str(chain) 5.42 - project.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line) 5.43 + alignment.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line) 5.44 sequence.pdb_unload() 5.45 except Exception, e: 5.46 print "Warning: can'not process %s" % sequence.name
6.1 --- a/test/test.py Wed Nov 03 19:08:20 2010 +0300 6.2 +++ b/test/test.py Wed Nov 03 19:12:23 2010 +0300 6.3 @@ -1,9 +1,9 @@ 6.4 import sys 6.5 6.6 -from allpy.lib.project import Project 6.7 +from allpy.lib.alignment import Alignment 6.8 from allpy.lib.block import Block 6.9 6.10 -p = Project(open('test.fasta')) 6.11 +p = Alignment(open('test.fasta')) 6.12 print "alignment length: %i" % len(p) 6.13 print "sequence: %s" % str(p.sequences[0]) 6.14