seqsearch.databases.silva

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3
 4"""
 5Written by Lucas Sinclair.
 6MIT Licensed.
 7Contact at www.sinclair.bio
 8"""
 9
10# Built-in modules #
11import os
12
13# First party modules #
14from seqsearch.databases import Database
15from fasta import FASTA
16from autopaths.auto_paths import AutoPaths
17
18# Third party modules #
19
20# Constants #
21home = os.environ.get('HOME', '~') + '/'
22
23###############################################################################
24class Silva(Database):
25    """
26    SILVA provides comprehensive, quality checked and regularly updated
27    datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU)
28    ribosomal RNA (rRNA) sequences for all three domains of life
29    (Bacteria, Archaea and Eukarya).
30    SILVA are the official databases of the software package ARB.
31
32    https://www.arb-silva.de
33
34    To install:
35        from seqsearch.databases.silva import silva
36        silva.download()
37        silva.unzip()
38
39    It will put it in ~/databases/silva_xxx/
40    """
41
42    view_url   = "https://www.arb-silva.de/no_cache/download/archive/"
43    base_url   = "https://www.arb-silva.de/fileadmin/silva_databases/"
44    short_name = "silva"
45
46    all_paths = """
47    /test.txt
48    """
49
50    def __init__(self, version, seq_type, base_dir=None):
51        # Attributes #
52        self.version    = version
53        self.seq_type   = seq_type
54        self.short_name = self.short_name + "_" + self.version
55        # Base directory #
56        if base_dir is None: base_dir = home
57        self.base_dir = base_dir + 'databases/' + self.short_name + '/'
58        self.p        = AutoPaths(self.base_dir, self.all_paths)
59        # URL #
60        self.url  = "release_%s/Exports/"  % self.version
61        # The database #
62        self.nr99_name = "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz" % self.version
63        self.nr99_dest = FASTA(self.base_dir + self.nr99_name)
64        self.nr99      = FASTA(self.base_dir + self.nr99_name[:-3])
65        # The alignment #
66        self.aligned_name = "SILVA_%s_SSURef_Nr99_tax_silva_full_align_trunc.fasta.gz" % self.version
67        self.aligned_dest = FASTA(self.base_dir + self.aligned_name)
68        self.aligned      = FASTA(self.base_dir + self.aligned_name[:-3])
69
70    def download(self):
71        self.nr99_dest.directory.create(safe=True)
72        self.nr99_dest.remove(safe=True)
73        self.aligned_dest.remove(safe=True)
74        import wget
75        print("\nDownloading", self.base_url + self.url + self.nr99_name)
76        wget.download(self.base_url + self.url + self.nr99_name,    out=self.nr99_dest.path)
77        print("\nDownloading", self.base_url + self.url + self.aligned_name)
78        wget.download(self.base_url + self.url + self.aligned_name, out=self.aligned_dest.path)
79
80    def unzip(self):
81        self.nr99_dest.ungzip_to(self.nr99)
82        self.nr99.permissions.only_readable()
83        self.aligned_dest.ungzip_to(self.aligned)
84        self.aligned.permissions.only_readable()
85
86###############################################################################
87silva = Silva("128", "nucl")
class Silva(seqsearch.databases.Database):
25class Silva(Database):
26    """
27    SILVA provides comprehensive, quality checked and regularly updated
28    datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU)
29    ribosomal RNA (rRNA) sequences for all three domains of life
30    (Bacteria, Archaea and Eukarya).
31    SILVA are the official databases of the software package ARB.
32
33    https://www.arb-silva.de
34
35    To install:
36        from seqsearch.databases.silva import silva
37        silva.download()
38        silva.unzip()
39
40    It will put it in ~/databases/silva_xxx/
41    """
42
43    view_url   = "https://www.arb-silva.de/no_cache/download/archive/"
44    base_url   = "https://www.arb-silva.de/fileadmin/silva_databases/"
45    short_name = "silva"
46
47    all_paths = """
48    /test.txt
49    """
50
51    def __init__(self, version, seq_type, base_dir=None):
52        # Attributes #
53        self.version    = version
54        self.seq_type   = seq_type
55        self.short_name = self.short_name + "_" + self.version
56        # Base directory #
57        if base_dir is None: base_dir = home
58        self.base_dir = base_dir + 'databases/' + self.short_name + '/'
59        self.p        = AutoPaths(self.base_dir, self.all_paths)
60        # URL #
61        self.url  = "release_%s/Exports/"  % self.version
62        # The database #
63        self.nr99_name = "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz" % self.version
64        self.nr99_dest = FASTA(self.base_dir + self.nr99_name)
65        self.nr99      = FASTA(self.base_dir + self.nr99_name[:-3])
66        # The alignment #
67        self.aligned_name = "SILVA_%s_SSURef_Nr99_tax_silva_full_align_trunc.fasta.gz" % self.version
68        self.aligned_dest = FASTA(self.base_dir + self.aligned_name)
69        self.aligned      = FASTA(self.base_dir + self.aligned_name[:-3])
70
71    def download(self):
72        self.nr99_dest.directory.create(safe=True)
73        self.nr99_dest.remove(safe=True)
74        self.aligned_dest.remove(safe=True)
75        import wget
76        print("\nDownloading", self.base_url + self.url + self.nr99_name)
77        wget.download(self.base_url + self.url + self.nr99_name,    out=self.nr99_dest.path)
78        print("\nDownloading", self.base_url + self.url + self.aligned_name)
79        wget.download(self.base_url + self.url + self.aligned_name, out=self.aligned_dest.path)
80
81    def unzip(self):
82        self.nr99_dest.ungzip_to(self.nr99)
83        self.nr99.permissions.only_readable()
84        self.aligned_dest.ungzip_to(self.aligned)
85        self.aligned.permissions.only_readable()

SILVA provides comprehensive, quality checked and regularly updated datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU) ribosomal RNA (rRNA) sequences for all three domains of life (Bacteria, Archaea and Eukarya). SILVA are the official databases of the software package ARB.

https://www.arb-silva.de

To install: from seqsearch.databases.silva import silva silva.download() silva.unzip()

It will put it in ~/databases/silva_xxx/

Silva(version, seq_type, base_dir=None)
51    def __init__(self, version, seq_type, base_dir=None):
52        # Attributes #
53        self.version    = version
54        self.seq_type   = seq_type
55        self.short_name = self.short_name + "_" + self.version
56        # Base directory #
57        if base_dir is None: base_dir = home
58        self.base_dir = base_dir + 'databases/' + self.short_name + '/'
59        self.p        = AutoPaths(self.base_dir, self.all_paths)
60        # URL #
61        self.url  = "release_%s/Exports/"  % self.version
62        # The database #
63        self.nr99_name = "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz" % self.version
64        self.nr99_dest = FASTA(self.base_dir + self.nr99_name)
65        self.nr99      = FASTA(self.base_dir + self.nr99_name[:-3])
66        # The alignment #
67        self.aligned_name = "SILVA_%s_SSURef_Nr99_tax_silva_full_align_trunc.fasta.gz" % self.version
68        self.aligned_dest = FASTA(self.base_dir + self.aligned_name)
69        self.aligned      = FASTA(self.base_dir + self.aligned_name[:-3])
def download(self):
71    def download(self):
72        self.nr99_dest.directory.create(safe=True)
73        self.nr99_dest.remove(safe=True)
74        self.aligned_dest.remove(safe=True)
75        import wget
76        print("\nDownloading", self.base_url + self.url + self.nr99_name)
77        wget.download(self.base_url + self.url + self.nr99_name,    out=self.nr99_dest.path)
78        print("\nDownloading", self.base_url + self.url + self.aligned_name)
79        wget.download(self.base_url + self.url + self.aligned_name, out=self.aligned_dest.path)

Retrieve all files from the FTP site.

def unzip(self):
81    def unzip(self):
82        self.nr99_dest.ungzip_to(self.nr99)
83        self.nr99.permissions.only_readable()
84        self.aligned_dest.ungzip_to(self.aligned)
85        self.aligned.permissions.only_readable()