seqsearch.databases.refseq

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3
 4"""
 5Written by Lucas Sinclair.
 6MIT Licensed.
 7Contact at www.sinclair.bio
 8"""
 9
10# Built-in modules #
11
12# Internal modules #
13from seqsearch.databases import Database
14
15###############################################################################
16class RefSeqBacteriaProtNR(Database):
17    """
18    The RefSeq sequences for only bacteria, only protein, and only the
19    non-redundant version. We will download the raw sequences by FTP
20    and format them as a blast database.
21    """
22
23    short_name = "refseq_bact_prot_nr"
24    ftp_url    = "ftp.ncbi.nlm.nih.gov"
25    ftp_dir    = "/refseq/release/bacteria/"
26    pattern    = 'bacteria.nonredundant_protein.*.protein.faa.gz'
27
28###############################################################################
29class RefSeqArchaeaProtNR(Database):
30    """
31    The RefSeq sequences for only bacteria, only protein, and only the
32    non-redundant version. We will download the raw sequences by FTP
33    and format them as a blast database.
34    """
35
36    short_name = "refseq_arch_prot_nr"
37    ftp_url    = "ftp.ncbi.nlm.nih.gov"
38    ftp_dir    = "/refseq/release/archaea/"
39    pattern    = 'archaea.nonredundant_protein.*.protein.faa.gz'
40
41###############################################################################
42refseq_bact_prot_nr = RefSeqBacteriaProtNR('prot')
43refseq_arch_prot_nr = RefSeqArchaeaProtNR('prot')
class RefSeqBacteriaProtNR(seqsearch.databases.Database):
17class RefSeqBacteriaProtNR(Database):
18    """
19    The RefSeq sequences for only bacteria, only protein, and only the
20    non-redundant version. We will download the raw sequences by FTP
21    and format them as a blast database.
22    """
23
24    short_name = "refseq_bact_prot_nr"
25    ftp_url    = "ftp.ncbi.nlm.nih.gov"
26    ftp_dir    = "/refseq/release/bacteria/"
27    pattern    = 'bacteria.nonredundant_protein.*.protein.faa.gz'

The RefSeq sequences for only bacteria, only protein, and only the non-redundant version. We will download the raw sequences by FTP and format them as a blast database.

class RefSeqArchaeaProtNR(seqsearch.databases.Database):
30class RefSeqArchaeaProtNR(Database):
31    """
32    The RefSeq sequences for only bacteria, only protein, and only the
33    non-redundant version. We will download the raw sequences by FTP
34    and format them as a blast database.
35    """
36
37    short_name = "refseq_arch_prot_nr"
38    ftp_url    = "ftp.ncbi.nlm.nih.gov"
39    ftp_dir    = "/refseq/release/archaea/"
40    pattern    = 'archaea.nonredundant_protein.*.protein.faa.gz'

The RefSeq sequences for only bacteria, only protein, and only the non-redundant version. We will download the raw sequences by FTP and format them as a blast database.