seqsearch.databases.nt

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3
 4"""
 5Written by Lucas Sinclair.
 6MIT Licensed.
 7Contact at www.sinclair.bio
 8"""
 9
10# Built-in modules #
11
12# Internal modules #
13from seqsearch.databases import Database
14from seqsearch.search    import SeqSearch
15
16# First party modules #
17from autopaths.tmp_path import new_temp_dir
18from fasta import FASTA
19
20###############################################################################
21class NucleotideDatabase(Database):
22    """
23    The Nucleotide database is a collection of sequences from several sources,
24     including GenBank, RefSeq, TPA and PDB.
25
26     To install:
27
28        from seqsearch.databases.nt import nt
29        nt.download()
30        nt.untargz()
31        nt.test()
32
33    It will put it in ~/databases/nt
34    """
35
36    short_name = "nt"
37    long_name  = "The Nucleotide database (NCBI)"
38    ftp_url    = "ftp.ncbi.nlm.nih.gov"
39    ftp_dir    = "/blast/db/"
40    pattern    = 'nt.*.tar.gz'
41
42    def test(self):
43        """Search one sequence, and see if it works."""
44        # New directory #
45        directory = new_temp_dir()
46        # A randomly chosen sequence (Homo sapiens mRNA for prepro cortistatin) #
47        seq = """ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC
48        CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC
49        CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG
50        AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC
51        CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG
52        TTTAATTACAGACCTGAA"""
53        seq = seq.replace('\n','')
54        seq = seq.replace(' ','')
55        # Make input #
56        input_fasta = FASTA(directory + 'input.fasta')
57        input_fasta.create()
58        input_fasta.add_str(seq, "My test sequence")
59        input_fasta.close()
60        # Make output #
61        out_path = directory + 'output.blast'
62        # Make extras parameters #
63        params = {'-outfmt': 0,
64                  '-evalue': 1e-5,
65                  '-perc_identity': 99}
66        # Make the search #
67        search = SeqSearch(input_fasta,
68                           self.blast_db,
69                           'nucl',
70                           'blast',
71                           num_threads = 1,
72                           out_path    = out_path,
73                           params      = params)
74        # Run it #
75        search.run()
76        # Print result #
77        print("Success", directory)
78
79###############################################################################
80nt = NucleotideDatabase("nucl")
class NucleotideDatabase(seqsearch.databases.Database):
22class NucleotideDatabase(Database):
23    """
24    The Nucleotide database is a collection of sequences from several sources,
25     including GenBank, RefSeq, TPA and PDB.
26
27     To install:
28
29        from seqsearch.databases.nt import nt
30        nt.download()
31        nt.untargz()
32        nt.test()
33
34    It will put it in ~/databases/nt
35    """
36
37    short_name = "nt"
38    long_name  = "The Nucleotide database (NCBI)"
39    ftp_url    = "ftp.ncbi.nlm.nih.gov"
40    ftp_dir    = "/blast/db/"
41    pattern    = 'nt.*.tar.gz'
42
43    def test(self):
44        """Search one sequence, and see if it works."""
45        # New directory #
46        directory = new_temp_dir()
47        # A randomly chosen sequence (Homo sapiens mRNA for prepro cortistatin) #
48        seq = """ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC
49        CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC
50        CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG
51        AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC
52        CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG
53        TTTAATTACAGACCTGAA"""
54        seq = seq.replace('\n','')
55        seq = seq.replace(' ','')
56        # Make input #
57        input_fasta = FASTA(directory + 'input.fasta')
58        input_fasta.create()
59        input_fasta.add_str(seq, "My test sequence")
60        input_fasta.close()
61        # Make output #
62        out_path = directory + 'output.blast'
63        # Make extras parameters #
64        params = {'-outfmt': 0,
65                  '-evalue': 1e-5,
66                  '-perc_identity': 99}
67        # Make the search #
68        search = SeqSearch(input_fasta,
69                           self.blast_db,
70                           'nucl',
71                           'blast',
72                           num_threads = 1,
73                           out_path    = out_path,
74                           params      = params)
75        # Run it #
76        search.run()
77        # Print result #
78        print("Success", directory)

The Nucleotide database is a collection of sequences from several sources, including GenBank, RefSeq, TPA and PDB.

To install:

from seqsearch.databases.nt import nt
nt.download()
nt.untargz()
nt.test()

It will put it in ~/databases/nt

def test(self):
43    def test(self):
44        """Search one sequence, and see if it works."""
45        # New directory #
46        directory = new_temp_dir()
47        # A randomly chosen sequence (Homo sapiens mRNA for prepro cortistatin) #
48        seq = """ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC
49        CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC
50        CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG
51        AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC
52        CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG
53        TTTAATTACAGACCTGAA"""
54        seq = seq.replace('\n','')
55        seq = seq.replace(' ','')
56        # Make input #
57        input_fasta = FASTA(directory + 'input.fasta')
58        input_fasta.create()
59        input_fasta.add_str(seq, "My test sequence")
60        input_fasta.close()
61        # Make output #
62        out_path = directory + 'output.blast'
63        # Make extras parameters #
64        params = {'-outfmt': 0,
65                  '-evalue': 1e-5,
66                  '-perc_identity': 99}
67        # Make the search #
68        search = SeqSearch(input_fasta,
69                           self.blast_db,
70                           'nucl',
71                           'blast',
72                           num_threads = 1,
73                           out_path    = out_path,
74                           params      = params)
75        # Run it #
76        search.run()
77        # Print result #
78        print("Success", directory)

Search one sequence, and see if it works.