seqsearch.search.hmmer

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3
  4"""
  5Written by Lucas Sinclair.
  6MIT Licensed.
  7Contact at www.sinclair.bio
  8"""
  9
 10# Built-in modules #
 11import warnings, multiprocessing
 12
 13# Internal modules #
 14from seqsearch.databases.pfam    import pfam
 15from seqsearch.databases.tigrfam import tigrfam
 16
 17# First party modules #
 18from fasta import FASTA
 19from autopaths.file_path import FilePath
 20
 21# Third party modules #
 22from seqsearch import sh
 23
 24# Warnings #
 25warnings.filterwarnings("ignore", "Bio.SearchIO")
 26warnings.filterwarnings("ignore", "BiopythonWarning")
 27
 28###############################################################################
 29class HmmQuery(object):
 30    """An `hmmsearch` job."""
 31
 32    short_name = 'hmmsearch'
 33    long_name  = 'HMMER 3.1b2 (February 2015)'
 34    executable = 'hmmsearch'
 35    url        = 'http://hmmer.org/'
 36    license    = 'GPLv3'
 37    dependencies = []
 38
 39    def __nonzero__(self): return bool(self.out_path)
 40
 41    def __repr__(self):
 42        return '<%s object on %s>' % (self.__class__.__name__, self.query)
 43
 44    def __init__(self, query_path,                    # The input sequences
 45                 db_path      = pfam.hmm_db,          # The database to search
 46                 seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
 47                 e_value      = 0.001,                # The search threshold
 48                 params       = None,                 # Add extra params for the command line
 49                 out_path     = None,                 # Where the results will be dropped
 50                 executable   = None,                 # If you want a specific binary give the path
 51                 cpus         = None):                # The number of threads to use
 52        # Save attributes #
 53        self.query      = FASTA(query_path)
 54        self.db         = FilePath(db_path)
 55        self.params     = params if params else {}
 56        self.e_value    = e_value
 57        self.seq_type   = seq_type
 58        self.executable = FilePath(executable)
 59        # Cores to use #
 60        if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
 61        else:            self.cpus = cpus
 62        # Auto detect database short name #
 63        if db_path == 'pfam':    self.db = pfam.hmm_db
 64        if db_path == 'tigrfam': self.db = tigrfam.hmm_db
 65        # Output #
 66        if out_path is None:
 67            self.out_path = FilePath(self.query.prefix_path + '.hmmout')
 68        elif out_path.endswith('/'):
 69            self.out_path = FilePath(out_path + self.query.prefix + '.hmmout')
 70        else:
 71            self.out_path = FilePath(out_path)
 72
 73    @property
 74    def command(self):
 75        # Executable #
 76        if self.executable: cmd = [self.executable.path]
 77        else:               cmd = ["hmmsearch"]
 78        # Essentials #
 79        cmd += ('-o',        '/dev/null',   # direct output to file <f>, not stdout
 80                '--tblout',  self.out_path, # parsable table of per-sequence hits
 81                '--seed',    1,             # set RNG seed to <n>
 82                '--notextw',                # unlimited ASCII text output line width
 83                '--acc',                    # prefer accessions over names in output
 84                self.db,
 85                self.query)
 86        # Options #
 87        for k,v in self.params.items(): cmd += [k, v]
 88        # Return #
 89        return map(str, cmd)
 90
 91    def run(self, cpus=None):
 92        """Simply run the HMM search locally."""
 93        # Number of threads #
 94        if cpus is None: cpus = self.cpus
 95        # Checks #
 96        assert self.query.exists
 97        assert self.db.exists
 98        # Check if query is not empty #
 99        if self.query.count_bytes == 0:
100            message = "Hmm search on a file with no sequences. File at '%s'"
101            warnings.warn(message % self.query, RuntimeWarning)
102            return False
103        # Do it #
104        sh.Command(self.command[0])(['--cpu', str(cpus)] + self.command[1:])
105
106    @property
107    def hits(self):
108        if not self.out_path:
109            raise Exception("You can't access results from HMMER before running the algorithm.")
110        from Bio import SearchIO
111        return SearchIO.read(self.out_path, 'hmmer3-tab')
class HmmQuery:
 30class HmmQuery(object):
 31    """An `hmmsearch` job."""
 32
 33    short_name = 'hmmsearch'
 34    long_name  = 'HMMER 3.1b2 (February 2015)'
 35    executable = 'hmmsearch'
 36    url        = 'http://hmmer.org/'
 37    license    = 'GPLv3'
 38    dependencies = []
 39
 40    def __nonzero__(self): return bool(self.out_path)
 41
 42    def __repr__(self):
 43        return '<%s object on %s>' % (self.__class__.__name__, self.query)
 44
 45    def __init__(self, query_path,                    # The input sequences
 46                 db_path      = pfam.hmm_db,          # The database to search
 47                 seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
 48                 e_value      = 0.001,                # The search threshold
 49                 params       = None,                 # Add extra params for the command line
 50                 out_path     = None,                 # Where the results will be dropped
 51                 executable   = None,                 # If you want a specific binary give the path
 52                 cpus         = None):                # The number of threads to use
 53        # Save attributes #
 54        self.query      = FASTA(query_path)
 55        self.db         = FilePath(db_path)
 56        self.params     = params if params else {}
 57        self.e_value    = e_value
 58        self.seq_type   = seq_type
 59        self.executable = FilePath(executable)
 60        # Cores to use #
 61        if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
 62        else:            self.cpus = cpus
 63        # Auto detect database short name #
 64        if db_path == 'pfam':    self.db = pfam.hmm_db
 65        if db_path == 'tigrfam': self.db = tigrfam.hmm_db
 66        # Output #
 67        if out_path is None:
 68            self.out_path = FilePath(self.query.prefix_path + '.hmmout')
 69        elif out_path.endswith('/'):
 70            self.out_path = FilePath(out_path + self.query.prefix + '.hmmout')
 71        else:
 72            self.out_path = FilePath(out_path)
 73
 74    @property
 75    def command(self):
 76        # Executable #
 77        if self.executable: cmd = [self.executable.path]
 78        else:               cmd = ["hmmsearch"]
 79        # Essentials #
 80        cmd += ('-o',        '/dev/null',   # direct output to file <f>, not stdout
 81                '--tblout',  self.out_path, # parsable table of per-sequence hits
 82                '--seed',    1,             # set RNG seed to <n>
 83                '--notextw',                # unlimited ASCII text output line width
 84                '--acc',                    # prefer accessions over names in output
 85                self.db,
 86                self.query)
 87        # Options #
 88        for k,v in self.params.items(): cmd += [k, v]
 89        # Return #
 90        return map(str, cmd)
 91
 92    def run(self, cpus=None):
 93        """Simply run the HMM search locally."""
 94        # Number of threads #
 95        if cpus is None: cpus = self.cpus
 96        # Checks #
 97        assert self.query.exists
 98        assert self.db.exists
 99        # Check if query is not empty #
100        if self.query.count_bytes == 0:
101            message = "Hmm search on a file with no sequences. File at '%s'"
102            warnings.warn(message % self.query, RuntimeWarning)
103            return False
104        # Do it #
105        sh.Command(self.command[0])(['--cpu', str(cpus)] + self.command[1:])
106
107    @property
108    def hits(self):
109        if not self.out_path:
110            raise Exception("You can't access results from HMMER before running the algorithm.")
111        from Bio import SearchIO
112        return SearchIO.read(self.out_path, 'hmmer3-tab')

An hmmsearch job.

HmmQuery( query_path, db_path=<FilePath object "/Users/sinclair/databases/pfam/unzipped/Pfam-A.hmm">, seq_type='prot', e_value=0.001, params=None, out_path=None, executable=None, cpus=None)
45    def __init__(self, query_path,                    # The input sequences
46                 db_path      = pfam.hmm_db,          # The database to search
47                 seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
48                 e_value      = 0.001,                # The search threshold
49                 params       = None,                 # Add extra params for the command line
50                 out_path     = None,                 # Where the results will be dropped
51                 executable   = None,                 # If you want a specific binary give the path
52                 cpus         = None):                # The number of threads to use
53        # Save attributes #
54        self.query      = FASTA(query_path)
55        self.db         = FilePath(db_path)
56        self.params     = params if params else {}
57        self.e_value    = e_value
58        self.seq_type   = seq_type
59        self.executable = FilePath(executable)
60        # Cores to use #
61        if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
62        else:            self.cpus = cpus
63        # Auto detect database short name #
64        if db_path == 'pfam':    self.db = pfam.hmm_db
65        if db_path == 'tigrfam': self.db = tigrfam.hmm_db
66        # Output #
67        if out_path is None:
68            self.out_path = FilePath(self.query.prefix_path + '.hmmout')
69        elif out_path.endswith('/'):
70            self.out_path = FilePath(out_path + self.query.prefix + '.hmmout')
71        else:
72            self.out_path = FilePath(out_path)
def run(self, cpus=None):
 92    def run(self, cpus=None):
 93        """Simply run the HMM search locally."""
 94        # Number of threads #
 95        if cpus is None: cpus = self.cpus
 96        # Checks #
 97        assert self.query.exists
 98        assert self.db.exists
 99        # Check if query is not empty #
100        if self.query.count_bytes == 0:
101            message = "Hmm search on a file with no sequences. File at '%s'"
102            warnings.warn(message % self.query, RuntimeWarning)
103            return False
104        # Do it #
105        sh.Command(self.command[0])(['--cpu', str(cpus)] + self.command[1:])

Simply run the HMM search locally.