seqsearch.search.core

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3
 4"""
 5Written by Lucas Sinclair.
 6MIT Licensed.
 7Contact at www.sinclair.bio
 8"""
 9
10# Built-in modules #
11import multiprocessing, threading
12
13# First party modules #
14from autopaths.file_path import FilePath
15from fasta import FASTA
16
17###############################################################################
18class CoreSearch(object):
19    """
20    A class to inherit from.
21    Contains methods that are common to all search algorithms implementation.
22    Currently: BLASTquery and VSEARCHquery inherit from this.
23    """
24
25    extension = 'out'
26
27    def __repr__(self):
28        return '<%s object on %s>' % (self.__class__.__name__, self.query)
29
30    def __bool__(self):
31        return bool(self.out_path)
32
33    def __init__(self,
34                 query_path,
35                 db_path,
36                 seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
37                 params       = None,                 # Add extra params for the command line
38                 algorithm    = "blastn" or "blastp", # Will be auto-determined with seq_type
39                 out_path     = None,                 # Where the results will be dropped
40                 executable   = None,                 # If you want a specific binary give the path
41                 cpus         = None,                 # The number of threads to use
42                 num          = None,                 # When parallelized, the number of this thread
43                 _out         = None,                 # Store the stdout at this path
44                 _err         = None):                # Store the stderr at this path
45        # Main input #
46        self.query = FASTA(query_path)
47        # The database to search against #
48        self.db = FilePath(db_path)
49        # Other attributes #
50        self.seq_type     = seq_type
51        self.algorithm    = algorithm
52        self.num          = num
53        self.params       = params if params else {}
54        # The standard output and error #
55        self._out         = _out
56        self._err         = _err
57        # Output defaults #
58        if out_path is None:
59            self.out_path = self.query.prefix_path + self.extension
60        elif out_path.endswith('/'):
61            self.out_path = out_path + self.query.prefix + self.extension
62        else:
63            self.out_path = out_path
64        # Make it a file path #
65        self.out_path = FilePath(self.out_path)
66        # Executable #
67        self.executable = FilePath(executable)
68        # Cores to use #
69        if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
70        else:            self.cpus = cpus
71        # Save the output somewhere #
72        if self._out is True:
73            self._out = self.out_path + '.stdout'
74        if self._err is True:
75            self._err = self.out_path + '.stderr'
76
77    #-------------------------------- RUNNING --------------------------------#
78    def non_block_run(self):
79        """Special method to run the query in a thread without blocking."""
80        self.thread = threading.Thread(target=self.run)
81        self.thread.daemon = True # So that they die when we die
82        self.thread.start()
83
84    def wait(self):
85        """
86        If you have run the query in a non-blocking way, call this method to pause
87        until the query is finished.
88        """
89        try:
90            # We set a large timeout so that we can Ctrl-C the process
91            self.thread.join(999999999)
92        except KeyboardInterrupt:
93            print("Stopped waiting on BLAST thread number %i" % self.num)
class CoreSearch:
19class CoreSearch(object):
20    """
21    A class to inherit from.
22    Contains methods that are common to all search algorithms implementation.
23    Currently: BLASTquery and VSEARCHquery inherit from this.
24    """
25
26    extension = 'out'
27
28    def __repr__(self):
29        return '<%s object on %s>' % (self.__class__.__name__, self.query)
30
31    def __bool__(self):
32        return bool(self.out_path)
33
34    def __init__(self,
35                 query_path,
36                 db_path,
37                 seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
38                 params       = None,                 # Add extra params for the command line
39                 algorithm    = "blastn" or "blastp", # Will be auto-determined with seq_type
40                 out_path     = None,                 # Where the results will be dropped
41                 executable   = None,                 # If you want a specific binary give the path
42                 cpus         = None,                 # The number of threads to use
43                 num          = None,                 # When parallelized, the number of this thread
44                 _out         = None,                 # Store the stdout at this path
45                 _err         = None):                # Store the stderr at this path
46        # Main input #
47        self.query = FASTA(query_path)
48        # The database to search against #
49        self.db = FilePath(db_path)
50        # Other attributes #
51        self.seq_type     = seq_type
52        self.algorithm    = algorithm
53        self.num          = num
54        self.params       = params if params else {}
55        # The standard output and error #
56        self._out         = _out
57        self._err         = _err
58        # Output defaults #
59        if out_path is None:
60            self.out_path = self.query.prefix_path + self.extension
61        elif out_path.endswith('/'):
62            self.out_path = out_path + self.query.prefix + self.extension
63        else:
64            self.out_path = out_path
65        # Make it a file path #
66        self.out_path = FilePath(self.out_path)
67        # Executable #
68        self.executable = FilePath(executable)
69        # Cores to use #
70        if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
71        else:            self.cpus = cpus
72        # Save the output somewhere #
73        if self._out is True:
74            self._out = self.out_path + '.stdout'
75        if self._err is True:
76            self._err = self.out_path + '.stderr'
77
78    #-------------------------------- RUNNING --------------------------------#
79    def non_block_run(self):
80        """Special method to run the query in a thread without blocking."""
81        self.thread = threading.Thread(target=self.run)
82        self.thread.daemon = True # So that they die when we die
83        self.thread.start()
84
85    def wait(self):
86        """
87        If you have run the query in a non-blocking way, call this method to pause
88        until the query is finished.
89        """
90        try:
91            # We set a large timeout so that we can Ctrl-C the process
92            self.thread.join(999999999)
93        except KeyboardInterrupt:
94            print("Stopped waiting on BLAST thread number %i" % self.num)

A class to inherit from. Contains methods that are common to all search algorithms implementation. Currently: BLASTquery and VSEARCHquery inherit from this.

CoreSearch( query_path, db_path, seq_type='prot', params=None, algorithm='blastn', out_path=None, executable=None, cpus=None, num=None, _out=None, _err=None)
34    def __init__(self,
35                 query_path,
36                 db_path,
37                 seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
38                 params       = None,                 # Add extra params for the command line
39                 algorithm    = "blastn" or "blastp", # Will be auto-determined with seq_type
40                 out_path     = None,                 # Where the results will be dropped
41                 executable   = None,                 # If you want a specific binary give the path
42                 cpus         = None,                 # The number of threads to use
43                 num          = None,                 # When parallelized, the number of this thread
44                 _out         = None,                 # Store the stdout at this path
45                 _err         = None):                # Store the stderr at this path
46        # Main input #
47        self.query = FASTA(query_path)
48        # The database to search against #
49        self.db = FilePath(db_path)
50        # Other attributes #
51        self.seq_type     = seq_type
52        self.algorithm    = algorithm
53        self.num          = num
54        self.params       = params if params else {}
55        # The standard output and error #
56        self._out         = _out
57        self._err         = _err
58        # Output defaults #
59        if out_path is None:
60            self.out_path = self.query.prefix_path + self.extension
61        elif out_path.endswith('/'):
62            self.out_path = out_path + self.query.prefix + self.extension
63        else:
64            self.out_path = out_path
65        # Make it a file path #
66        self.out_path = FilePath(self.out_path)
67        # Executable #
68        self.executable = FilePath(executable)
69        # Cores to use #
70        if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
71        else:            self.cpus = cpus
72        # Save the output somewhere #
73        if self._out is True:
74            self._out = self.out_path + '.stdout'
75        if self._err is True:
76            self._err = self.out_path + '.stderr'
def non_block_run(self):
79    def non_block_run(self):
80        """Special method to run the query in a thread without blocking."""
81        self.thread = threading.Thread(target=self.run)
82        self.thread.daemon = True # So that they die when we die
83        self.thread.start()

Special method to run the query in a thread without blocking.

def wait(self):
85    def wait(self):
86        """
87        If you have run the query in a non-blocking way, call this method to pause
88        until the query is finished.
89        """
90        try:
91            # We set a large timeout so that we can Ctrl-C the process
92            self.thread.join(999999999)
93        except KeyboardInterrupt:
94            print("Stopped waiting on BLAST thread number %i" % self.num)

If you have run the query in a non-blocking way, call this method to pause until the query is finished.