seqsearch.databases.silva
Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4""" 5Written by Lucas Sinclair. 6MIT Licensed. 7Contact at www.sinclair.bio 8""" 9 10# Built-in modules # 11import os 12 13# First party modules # 14from seqsearch.databases import Database 15from fasta import FASTA 16from autopaths.auto_paths import AutoPaths 17 18# Third party modules # 19 20# Constants # 21home = os.environ.get('HOME', '~') + '/' 22 23############################################################################### 24class Silva(Database): 25 """ 26 SILVA provides comprehensive, quality checked and regularly updated 27 datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU) 28 ribosomal RNA (rRNA) sequences for all three domains of life 29 (Bacteria, Archaea and Eukarya). 30 SILVA are the official databases of the software package ARB. 31 32 https://www.arb-silva.de 33 34 To install: 35 from seqsearch.databases.silva import silva 36 silva.download() 37 silva.unzip() 38 39 It will put it in ~/databases/silva_xxx/ 40 """ 41 42 view_url = "https://www.arb-silva.de/no_cache/download/archive/" 43 base_url = "https://www.arb-silva.de/fileadmin/silva_databases/" 44 short_name = "silva" 45 46 all_paths = """ 47 /test.txt 48 """ 49 50 def __init__(self, version, seq_type, base_dir=None): 51 # Attributes # 52 self.version = version 53 self.seq_type = seq_type 54 self.short_name = self.short_name + "_" + self.version 55 # Base directory # 56 if base_dir is None: base_dir = home 57 self.base_dir = base_dir + 'databases/' + self.short_name + '/' 58 self.p = AutoPaths(self.base_dir, self.all_paths) 59 # URL # 60 self.url = "release_%s/Exports/" % self.version 61 # The database # 62 self.nr99_name = "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz" % self.version 63 self.nr99_dest = FASTA(self.base_dir + self.nr99_name) 64 self.nr99 = FASTA(self.base_dir + self.nr99_name[:-3]) 65 # The alignment # 66 self.aligned_name = "SILVA_%s_SSURef_Nr99_tax_silva_full_align_trunc.fasta.gz" % self.version 67 self.aligned_dest = FASTA(self.base_dir + self.aligned_name) 68 self.aligned = FASTA(self.base_dir + self.aligned_name[:-3]) 69 70 def download(self): 71 self.nr99_dest.directory.create(safe=True) 72 self.nr99_dest.remove(safe=True) 73 self.aligned_dest.remove(safe=True) 74 import wget 75 print("\nDownloading", self.base_url + self.url + self.nr99_name) 76 wget.download(self.base_url + self.url + self.nr99_name, out=self.nr99_dest.path) 77 print("\nDownloading", self.base_url + self.url + self.aligned_name) 78 wget.download(self.base_url + self.url + self.aligned_name, out=self.aligned_dest.path) 79 80 def unzip(self): 81 self.nr99_dest.ungzip_to(self.nr99) 82 self.nr99.permissions.only_readable() 83 self.aligned_dest.ungzip_to(self.aligned) 84 self.aligned.permissions.only_readable() 85 86############################################################################### 87silva = Silva("128", "nucl")
25class Silva(Database): 26 """ 27 SILVA provides comprehensive, quality checked and regularly updated 28 datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU) 29 ribosomal RNA (rRNA) sequences for all three domains of life 30 (Bacteria, Archaea and Eukarya). 31 SILVA are the official databases of the software package ARB. 32 33 https://www.arb-silva.de 34 35 To install: 36 from seqsearch.databases.silva import silva 37 silva.download() 38 silva.unzip() 39 40 It will put it in ~/databases/silva_xxx/ 41 """ 42 43 view_url = "https://www.arb-silva.de/no_cache/download/archive/" 44 base_url = "https://www.arb-silva.de/fileadmin/silva_databases/" 45 short_name = "silva" 46 47 all_paths = """ 48 /test.txt 49 """ 50 51 def __init__(self, version, seq_type, base_dir=None): 52 # Attributes # 53 self.version = version 54 self.seq_type = seq_type 55 self.short_name = self.short_name + "_" + self.version 56 # Base directory # 57 if base_dir is None: base_dir = home 58 self.base_dir = base_dir + 'databases/' + self.short_name + '/' 59 self.p = AutoPaths(self.base_dir, self.all_paths) 60 # URL # 61 self.url = "release_%s/Exports/" % self.version 62 # The database # 63 self.nr99_name = "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz" % self.version 64 self.nr99_dest = FASTA(self.base_dir + self.nr99_name) 65 self.nr99 = FASTA(self.base_dir + self.nr99_name[:-3]) 66 # The alignment # 67 self.aligned_name = "SILVA_%s_SSURef_Nr99_tax_silva_full_align_trunc.fasta.gz" % self.version 68 self.aligned_dest = FASTA(self.base_dir + self.aligned_name) 69 self.aligned = FASTA(self.base_dir + self.aligned_name[:-3]) 70 71 def download(self): 72 self.nr99_dest.directory.create(safe=True) 73 self.nr99_dest.remove(safe=True) 74 self.aligned_dest.remove(safe=True) 75 import wget 76 print("\nDownloading", self.base_url + self.url + self.nr99_name) 77 wget.download(self.base_url + self.url + self.nr99_name, out=self.nr99_dest.path) 78 print("\nDownloading", self.base_url + self.url + self.aligned_name) 79 wget.download(self.base_url + self.url + self.aligned_name, out=self.aligned_dest.path) 80 81 def unzip(self): 82 self.nr99_dest.ungzip_to(self.nr99) 83 self.nr99.permissions.only_readable() 84 self.aligned_dest.ungzip_to(self.aligned) 85 self.aligned.permissions.only_readable()
SILVA provides comprehensive, quality checked and regularly updated datasets of aligned small (16S/18S, SSU) and large subunit (23S/28S, LSU) ribosomal RNA (rRNA) sequences for all three domains of life (Bacteria, Archaea and Eukarya). SILVA are the official databases of the software package ARB.
To install: from seqsearch.databases.silva import silva silva.download() silva.unzip()
It will put it in ~/databases/silva_xxx/
Silva(version, seq_type, base_dir=None)
51 def __init__(self, version, seq_type, base_dir=None): 52 # Attributes # 53 self.version = version 54 self.seq_type = seq_type 55 self.short_name = self.short_name + "_" + self.version 56 # Base directory # 57 if base_dir is None: base_dir = home 58 self.base_dir = base_dir + 'databases/' + self.short_name + '/' 59 self.p = AutoPaths(self.base_dir, self.all_paths) 60 # URL # 61 self.url = "release_%s/Exports/" % self.version 62 # The database # 63 self.nr99_name = "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz" % self.version 64 self.nr99_dest = FASTA(self.base_dir + self.nr99_name) 65 self.nr99 = FASTA(self.base_dir + self.nr99_name[:-3]) 66 # The alignment # 67 self.aligned_name = "SILVA_%s_SSURef_Nr99_tax_silva_full_align_trunc.fasta.gz" % self.version 68 self.aligned_dest = FASTA(self.base_dir + self.aligned_name) 69 self.aligned = FASTA(self.base_dir + self.aligned_name[:-3])
def
download(self):
71 def download(self): 72 self.nr99_dest.directory.create(safe=True) 73 self.nr99_dest.remove(safe=True) 74 self.aligned_dest.remove(safe=True) 75 import wget 76 print("\nDownloading", self.base_url + self.url + self.nr99_name) 77 wget.download(self.base_url + self.url + self.nr99_name, out=self.nr99_dest.path) 78 print("\nDownloading", self.base_url + self.url + self.aligned_name) 79 wget.download(self.base_url + self.url + self.aligned_name, out=self.aligned_dest.path)
Retrieve all files from the FTP site.