seqsearch.search.hmmer
Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4""" 5Written by Lucas Sinclair. 6MIT Licensed. 7Contact at www.sinclair.bio 8""" 9 10# Built-in modules # 11import warnings, multiprocessing 12 13# Internal modules # 14from seqsearch.databases.pfam import pfam 15from seqsearch.databases.tigrfam import tigrfam 16 17# First party modules # 18from fasta import FASTA 19from autopaths.file_path import FilePath 20 21# Third party modules # 22from seqsearch import sh 23 24# Warnings # 25warnings.filterwarnings("ignore", "Bio.SearchIO") 26warnings.filterwarnings("ignore", "BiopythonWarning") 27 28############################################################################### 29class HmmQuery(object): 30 """An `hmmsearch` job.""" 31 32 short_name = 'hmmsearch' 33 long_name = 'HMMER 3.1b2 (February 2015)' 34 executable = 'hmmsearch' 35 url = 'http://hmmer.org/' 36 license = 'GPLv3' 37 dependencies = [] 38 39 def __nonzero__(self): return bool(self.out_path) 40 41 def __repr__(self): 42 return '<%s object on %s>' % (self.__class__.__name__, self.query) 43 44 def __init__(self, query_path, # The input sequences 45 db_path = pfam.hmm_db, # The database to search 46 seq_type = 'prot' or 'nucl', # The seq type of the query_path file 47 e_value = 0.001, # The search threshold 48 params = None, # Add extra params for the command line 49 out_path = None, # Where the results will be dropped 50 executable = None, # If you want a specific binary give the path 51 cpus = None): # The number of threads to use 52 # Save attributes # 53 self.query = FASTA(query_path) 54 self.db = FilePath(db_path) 55 self.params = params if params else {} 56 self.e_value = e_value 57 self.seq_type = seq_type 58 self.executable = FilePath(executable) 59 # Cores to use # 60 if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32) 61 else: self.cpus = cpus 62 # Auto detect database short name # 63 if db_path == 'pfam': self.db = pfam.hmm_db 64 if db_path == 'tigrfam': self.db = tigrfam.hmm_db 65 # Output # 66 if out_path is None: 67 self.out_path = FilePath(self.query.prefix_path + '.hmmout') 68 elif out_path.endswith('/'): 69 self.out_path = FilePath(out_path + self.query.prefix + '.hmmout') 70 else: 71 self.out_path = FilePath(out_path) 72 73 @property 74 def command(self): 75 # Executable # 76 if self.executable: cmd = [self.executable.path] 77 else: cmd = ["hmmsearch"] 78 # Essentials # 79 cmd += ('-o', '/dev/null', # direct output to file <f>, not stdout 80 '--tblout', self.out_path, # parsable table of per-sequence hits 81 '--seed', 1, # set RNG seed to <n> 82 '--notextw', # unlimited ASCII text output line width 83 '--acc', # prefer accessions over names in output 84 self.db, 85 self.query) 86 # Options # 87 for k,v in self.params.items(): cmd += [k, v] 88 # Return # 89 return map(str, cmd) 90 91 def run(self, cpus=None): 92 """Simply run the HMM search locally.""" 93 # Number of threads # 94 if cpus is None: cpus = self.cpus 95 # Checks # 96 assert self.query.exists 97 assert self.db.exists 98 # Check if query is not empty # 99 if self.query.count_bytes == 0: 100 message = "Hmm search on a file with no sequences. File at '%s'" 101 warnings.warn(message % self.query, RuntimeWarning) 102 return False 103 # Do it # 104 sh.Command(self.command[0])(['--cpu', str(cpus)] + self.command[1:]) 105 106 @property 107 def hits(self): 108 if not self.out_path: 109 raise Exception("You can't access results from HMMER before running the algorithm.") 110 from Bio import SearchIO 111 return SearchIO.read(self.out_path, 'hmmer3-tab')
class
HmmQuery:
30class HmmQuery(object): 31 """An `hmmsearch` job.""" 32 33 short_name = 'hmmsearch' 34 long_name = 'HMMER 3.1b2 (February 2015)' 35 executable = 'hmmsearch' 36 url = 'http://hmmer.org/' 37 license = 'GPLv3' 38 dependencies = [] 39 40 def __nonzero__(self): return bool(self.out_path) 41 42 def __repr__(self): 43 return '<%s object on %s>' % (self.__class__.__name__, self.query) 44 45 def __init__(self, query_path, # The input sequences 46 db_path = pfam.hmm_db, # The database to search 47 seq_type = 'prot' or 'nucl', # The seq type of the query_path file 48 e_value = 0.001, # The search threshold 49 params = None, # Add extra params for the command line 50 out_path = None, # Where the results will be dropped 51 executable = None, # If you want a specific binary give the path 52 cpus = None): # The number of threads to use 53 # Save attributes # 54 self.query = FASTA(query_path) 55 self.db = FilePath(db_path) 56 self.params = params if params else {} 57 self.e_value = e_value 58 self.seq_type = seq_type 59 self.executable = FilePath(executable) 60 # Cores to use # 61 if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32) 62 else: self.cpus = cpus 63 # Auto detect database short name # 64 if db_path == 'pfam': self.db = pfam.hmm_db 65 if db_path == 'tigrfam': self.db = tigrfam.hmm_db 66 # Output # 67 if out_path is None: 68 self.out_path = FilePath(self.query.prefix_path + '.hmmout') 69 elif out_path.endswith('/'): 70 self.out_path = FilePath(out_path + self.query.prefix + '.hmmout') 71 else: 72 self.out_path = FilePath(out_path) 73 74 @property 75 def command(self): 76 # Executable # 77 if self.executable: cmd = [self.executable.path] 78 else: cmd = ["hmmsearch"] 79 # Essentials # 80 cmd += ('-o', '/dev/null', # direct output to file <f>, not stdout 81 '--tblout', self.out_path, # parsable table of per-sequence hits 82 '--seed', 1, # set RNG seed to <n> 83 '--notextw', # unlimited ASCII text output line width 84 '--acc', # prefer accessions over names in output 85 self.db, 86 self.query) 87 # Options # 88 for k,v in self.params.items(): cmd += [k, v] 89 # Return # 90 return map(str, cmd) 91 92 def run(self, cpus=None): 93 """Simply run the HMM search locally.""" 94 # Number of threads # 95 if cpus is None: cpus = self.cpus 96 # Checks # 97 assert self.query.exists 98 assert self.db.exists 99 # Check if query is not empty # 100 if self.query.count_bytes == 0: 101 message = "Hmm search on a file with no sequences. File at '%s'" 102 warnings.warn(message % self.query, RuntimeWarning) 103 return False 104 # Do it # 105 sh.Command(self.command[0])(['--cpu', str(cpus)] + self.command[1:]) 106 107 @property 108 def hits(self): 109 if not self.out_path: 110 raise Exception("You can't access results from HMMER before running the algorithm.") 111 from Bio import SearchIO 112 return SearchIO.read(self.out_path, 'hmmer3-tab')
An hmmsearch
job.
HmmQuery( query_path, db_path=<FilePath object "/Users/sinclair/databases/pfam/unzipped/Pfam-A.hmm">, seq_type='prot', e_value=0.001, params=None, out_path=None, executable=None, cpus=None)
45 def __init__(self, query_path, # The input sequences 46 db_path = pfam.hmm_db, # The database to search 47 seq_type = 'prot' or 'nucl', # The seq type of the query_path file 48 e_value = 0.001, # The search threshold 49 params = None, # Add extra params for the command line 50 out_path = None, # Where the results will be dropped 51 executable = None, # If you want a specific binary give the path 52 cpus = None): # The number of threads to use 53 # Save attributes # 54 self.query = FASTA(query_path) 55 self.db = FilePath(db_path) 56 self.params = params if params else {} 57 self.e_value = e_value 58 self.seq_type = seq_type 59 self.executable = FilePath(executable) 60 # Cores to use # 61 if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32) 62 else: self.cpus = cpus 63 # Auto detect database short name # 64 if db_path == 'pfam': self.db = pfam.hmm_db 65 if db_path == 'tigrfam': self.db = tigrfam.hmm_db 66 # Output # 67 if out_path is None: 68 self.out_path = FilePath(self.query.prefix_path + '.hmmout') 69 elif out_path.endswith('/'): 70 self.out_path = FilePath(out_path + self.query.prefix + '.hmmout') 71 else: 72 self.out_path = FilePath(out_path)
def
run(self, cpus=None):
92 def run(self, cpus=None): 93 """Simply run the HMM search locally.""" 94 # Number of threads # 95 if cpus is None: cpus = self.cpus 96 # Checks # 97 assert self.query.exists 98 assert self.db.exists 99 # Check if query is not empty # 100 if self.query.count_bytes == 0: 101 message = "Hmm search on a file with no sequences. File at '%s'" 102 warnings.warn(message % self.query, RuntimeWarning) 103 return False 104 # Do it # 105 sh.Command(self.command[0])(['--cpu', str(cpus)] + self.command[1:])
Simply run the HMM search locally.