seqsearch.databases.pr_two
Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4""" 5Written by Lucas Sinclair. 6MIT Licensed. 7Contact at www.sinclair.bio 8""" 9 10# Built-in modules # 11import os 12 13# First party modules # 14from seqsearch.databases import Database 15from autopaths.auto_paths import AutoPaths 16from autopaths.file_path import FilePath 17 18# Third party modules # 19 20# Constants # 21home = os.environ.get('HOME', '~') + '/' 22 23############################################################################### 24class PrTwo(Database): 25 """ 26 This is the PR2 database. 27 28 https://figshare.com/articles/PR2_rRNA_gene_database/3803709 29 30 To install: 31 32 from seqsearch.databases.pr_two import pr_two 33 pr_two.download() 34 pr_two.unzip() 35 print pr_two.tax_depth_freq 36 37 It will put it in ~/databases/pr_two_11/ 38 """ 39 40 base_url = "https://ndownloader.figshare.com/articles/3803709/versions/" 41 short_name = "pr_two" 42 long_name = 'Protist Ribosomal Reference database (PR2) - SSU rRNA gene database' 43 44 all_paths = """ 45 /archive.zip 46 /pr2_gb203_version_4.5.zip 47 /pr2_gb203_version_4.5.fasta 48 /pr2_gb203_version_4.5.taxo 49 """ 50 51 @property 52 def rank_names(self): 53 """The names of the ranks. Total 9 ranks.""" 54 return ['Domain', # 0 55 'Kingdom', # 1 56 'Phylum', # 2 57 'Class', # 3 58 'Order', # 4 59 'Family', # 5 60 'Tribe', # 6 61 'Genus', # 7 62 'Species'] # 8 63 64 def __init__(self, version, base_dir=None): 65 # Attributes # 66 self.version = version 67 self.short_name = self.short_name + "_" + self.version 68 # Base directory # 69 if base_dir is None: base_dir = home 70 self.base_dir = base_dir + 'databases/' + self.short_name + '/' 71 self.p = AutoPaths(self.base_dir, self.all_paths) 72 # URL # 73 self.url = self.base_url + self.version 74 # The archive # 75 self.dest = self.p.archive 76 # The results # 77 self.alignment = FilePath(self.base_dir + "pr_two.gb203_v%s.align" % self.version) 78 self.taxonomy = FilePath(self.base_dir + "pr_two.gb203_v%s.tax" % self.version) 79 # The part that mothur will use for naming files # 80 self.nickname = "gb203_v%s" % self.version 81 82 def download(self): 83 self.dest.directory.create(safe=True) 84 self.dest.remove() 85 print("\nDownloading", self.url) 86 import wget 87 wget.download(self.url, out=self.dest.path) 88 89 def unzip(self): 90 self.dest.unzip_to(self.base_dir, single=False) 91 self.p.archive_zip.unzip_to(self.base_dir, single=False) 92 self.p.pr2_zip.unzip_to(self.base_dir, single=False) 93 self.p.fasta.move_to(self.alignment) 94 self.p.taxo.move_to(self.taxonomy) 95 96############################################################################### 97pr_two = PrTwo("11")
25class PrTwo(Database): 26 """ 27 This is the PR2 database. 28 29 https://figshare.com/articles/PR2_rRNA_gene_database/3803709 30 31 To install: 32 33 from seqsearch.databases.pr_two import pr_two 34 pr_two.download() 35 pr_two.unzip() 36 print pr_two.tax_depth_freq 37 38 It will put it in ~/databases/pr_two_11/ 39 """ 40 41 base_url = "https://ndownloader.figshare.com/articles/3803709/versions/" 42 short_name = "pr_two" 43 long_name = 'Protist Ribosomal Reference database (PR2) - SSU rRNA gene database' 44 45 all_paths = """ 46 /archive.zip 47 /pr2_gb203_version_4.5.zip 48 /pr2_gb203_version_4.5.fasta 49 /pr2_gb203_version_4.5.taxo 50 """ 51 52 @property 53 def rank_names(self): 54 """The names of the ranks. Total 9 ranks.""" 55 return ['Domain', # 0 56 'Kingdom', # 1 57 'Phylum', # 2 58 'Class', # 3 59 'Order', # 4 60 'Family', # 5 61 'Tribe', # 6 62 'Genus', # 7 63 'Species'] # 8 64 65 def __init__(self, version, base_dir=None): 66 # Attributes # 67 self.version = version 68 self.short_name = self.short_name + "_" + self.version 69 # Base directory # 70 if base_dir is None: base_dir = home 71 self.base_dir = base_dir + 'databases/' + self.short_name + '/' 72 self.p = AutoPaths(self.base_dir, self.all_paths) 73 # URL # 74 self.url = self.base_url + self.version 75 # The archive # 76 self.dest = self.p.archive 77 # The results # 78 self.alignment = FilePath(self.base_dir + "pr_two.gb203_v%s.align" % self.version) 79 self.taxonomy = FilePath(self.base_dir + "pr_two.gb203_v%s.tax" % self.version) 80 # The part that mothur will use for naming files # 81 self.nickname = "gb203_v%s" % self.version 82 83 def download(self): 84 self.dest.directory.create(safe=True) 85 self.dest.remove() 86 print("\nDownloading", self.url) 87 import wget 88 wget.download(self.url, out=self.dest.path) 89 90 def unzip(self): 91 self.dest.unzip_to(self.base_dir, single=False) 92 self.p.archive_zip.unzip_to(self.base_dir, single=False) 93 self.p.pr2_zip.unzip_to(self.base_dir, single=False) 94 self.p.fasta.move_to(self.alignment) 95 self.p.taxo.move_to(self.taxonomy)
This is the PR2 database.
https://figshare.com/articles/PR2_rRNA_gene_database/3803709
To install:
from seqsearch.databases.pr_two import pr_two
pr_two.download()
pr_two.unzip()
print pr_two.tax_depth_freq
It will put it in ~/databases/pr_two_11/
PrTwo(version, base_dir=None)
65 def __init__(self, version, base_dir=None): 66 # Attributes # 67 self.version = version 68 self.short_name = self.short_name + "_" + self.version 69 # Base directory # 70 if base_dir is None: base_dir = home 71 self.base_dir = base_dir + 'databases/' + self.short_name + '/' 72 self.p = AutoPaths(self.base_dir, self.all_paths) 73 # URL # 74 self.url = self.base_url + self.version 75 # The archive # 76 self.dest = self.p.archive 77 # The results # 78 self.alignment = FilePath(self.base_dir + "pr_two.gb203_v%s.align" % self.version) 79 self.taxonomy = FilePath(self.base_dir + "pr_two.gb203_v%s.tax" % self.version) 80 # The part that mothur will use for naming files # 81 self.nickname = "gb203_v%s" % self.version
def
download(self):
83 def download(self): 84 self.dest.directory.create(safe=True) 85 self.dest.remove() 86 print("\nDownloading", self.url) 87 import wget 88 wget.download(self.url, out=self.dest.path)
Retrieve all files from the FTP site.