seqsearch.databases.nt
Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4""" 5Written by Lucas Sinclair. 6MIT Licensed. 7Contact at www.sinclair.bio 8""" 9 10# Built-in modules # 11 12# Internal modules # 13from seqsearch.databases import Database 14from seqsearch.search import SeqSearch 15 16# First party modules # 17from autopaths.tmp_path import new_temp_dir 18from fasta import FASTA 19 20############################################################################### 21class NucleotideDatabase(Database): 22 """ 23 The Nucleotide database is a collection of sequences from several sources, 24 including GenBank, RefSeq, TPA and PDB. 25 26 To install: 27 28 from seqsearch.databases.nt import nt 29 nt.download() 30 nt.untargz() 31 nt.test() 32 33 It will put it in ~/databases/nt 34 """ 35 36 short_name = "nt" 37 long_name = "The Nucleotide database (NCBI)" 38 ftp_url = "ftp.ncbi.nlm.nih.gov" 39 ftp_dir = "/blast/db/" 40 pattern = 'nt.*.tar.gz' 41 42 def test(self): 43 """Search one sequence, and see if it works.""" 44 # New directory # 45 directory = new_temp_dir() 46 # A randomly chosen sequence (Homo sapiens mRNA for prepro cortistatin) # 47 seq = """ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC 48 CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC 49 CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG 50 AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC 51 CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG 52 TTTAATTACAGACCTGAA""" 53 seq = seq.replace('\n','') 54 seq = seq.replace(' ','') 55 # Make input # 56 input_fasta = FASTA(directory + 'input.fasta') 57 input_fasta.create() 58 input_fasta.add_str(seq, "My test sequence") 59 input_fasta.close() 60 # Make output # 61 out_path = directory + 'output.blast' 62 # Make extras parameters # 63 params = {'-outfmt': 0, 64 '-evalue': 1e-5, 65 '-perc_identity': 99} 66 # Make the search # 67 search = SeqSearch(input_fasta, 68 self.blast_db, 69 'nucl', 70 'blast', 71 num_threads = 1, 72 out_path = out_path, 73 params = params) 74 # Run it # 75 search.run() 76 # Print result # 77 print("Success", directory) 78 79############################################################################### 80nt = NucleotideDatabase("nucl")
22class NucleotideDatabase(Database): 23 """ 24 The Nucleotide database is a collection of sequences from several sources, 25 including GenBank, RefSeq, TPA and PDB. 26 27 To install: 28 29 from seqsearch.databases.nt import nt 30 nt.download() 31 nt.untargz() 32 nt.test() 33 34 It will put it in ~/databases/nt 35 """ 36 37 short_name = "nt" 38 long_name = "The Nucleotide database (NCBI)" 39 ftp_url = "ftp.ncbi.nlm.nih.gov" 40 ftp_dir = "/blast/db/" 41 pattern = 'nt.*.tar.gz' 42 43 def test(self): 44 """Search one sequence, and see if it works.""" 45 # New directory # 46 directory = new_temp_dir() 47 # A randomly chosen sequence (Homo sapiens mRNA for prepro cortistatin) # 48 seq = """ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC 49 CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC 50 CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG 51 AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC 52 CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG 53 TTTAATTACAGACCTGAA""" 54 seq = seq.replace('\n','') 55 seq = seq.replace(' ','') 56 # Make input # 57 input_fasta = FASTA(directory + 'input.fasta') 58 input_fasta.create() 59 input_fasta.add_str(seq, "My test sequence") 60 input_fasta.close() 61 # Make output # 62 out_path = directory + 'output.blast' 63 # Make extras parameters # 64 params = {'-outfmt': 0, 65 '-evalue': 1e-5, 66 '-perc_identity': 99} 67 # Make the search # 68 search = SeqSearch(input_fasta, 69 self.blast_db, 70 'nucl', 71 'blast', 72 num_threads = 1, 73 out_path = out_path, 74 params = params) 75 # Run it # 76 search.run() 77 # Print result # 78 print("Success", directory)
The Nucleotide database is a collection of sequences from several sources, including GenBank, RefSeq, TPA and PDB.
To install:
from seqsearch.databases.nt import nt
nt.download()
nt.untargz()
nt.test()
It will put it in ~/databases/nt
def
test(self):
43 def test(self): 44 """Search one sequence, and see if it works.""" 45 # New directory # 46 directory = new_temp_dir() 47 # A randomly chosen sequence (Homo sapiens mRNA for prepro cortistatin) # 48 seq = """ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC 49 CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC 50 CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG 51 AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC 52 CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG 53 TTTAATTACAGACCTGAA""" 54 seq = seq.replace('\n','') 55 seq = seq.replace(' ','') 56 # Make input # 57 input_fasta = FASTA(directory + 'input.fasta') 58 input_fasta.create() 59 input_fasta.add_str(seq, "My test sequence") 60 input_fasta.close() 61 # Make output # 62 out_path = directory + 'output.blast' 63 # Make extras parameters # 64 params = {'-outfmt': 0, 65 '-evalue': 1e-5, 66 '-perc_identity': 99} 67 # Make the search # 68 search = SeqSearch(input_fasta, 69 self.blast_db, 70 'nucl', 71 'blast', 72 num_threads = 1, 73 out_path = out_path, 74 params = params) 75 # Run it # 76 search.run() 77 # Print result # 78 print("Success", directory)
Search one sequence, and see if it works.