seqsearch.databases.pr_two

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3
 4"""
 5Written by Lucas Sinclair.
 6MIT Licensed.
 7Contact at www.sinclair.bio
 8"""
 9
10# Built-in modules #
11import os
12
13# First party modules #
14from seqsearch.databases import Database
15from autopaths.auto_paths import AutoPaths
16from autopaths.file_path import FilePath
17
18# Third party modules #
19
20# Constants #
21home = os.environ.get('HOME', '~') + '/'
22
23###############################################################################
24class PrTwo(Database):
25    """
26    This is the PR2 database.
27
28    https://figshare.com/articles/PR2_rRNA_gene_database/3803709
29
30    To install:
31
32        from seqsearch.databases.pr_two import pr_two
33        pr_two.download()
34        pr_two.unzip()
35        print pr_two.tax_depth_freq
36
37    It will put it in ~/databases/pr_two_11/
38    """
39
40    base_url   = "https://ndownloader.figshare.com/articles/3803709/versions/"
41    short_name = "pr_two"
42    long_name  = 'Protist Ribosomal Reference database (PR2) - SSU rRNA gene database'
43
44    all_paths = """
45    /archive.zip
46    /pr2_gb203_version_4.5.zip
47    /pr2_gb203_version_4.5.fasta
48    /pr2_gb203_version_4.5.taxo
49    """
50
51    @property
52    def rank_names(self):
53        """The names of the ranks. Total 9 ranks."""
54        return ['Domain',   # 0
55                'Kingdom',  # 1
56                'Phylum',   # 2
57                'Class',    # 3
58                'Order',    # 4
59                'Family',   # 5
60                'Tribe',    # 6
61                'Genus',    # 7
62                'Species']  # 8
63
64    def __init__(self, version, base_dir=None):
65        # Attributes #
66        self.version    = version
67        self.short_name = self.short_name + "_" + self.version
68        # Base directory #
69        if base_dir is None: base_dir = home
70        self.base_dir = base_dir + 'databases/' + self.short_name + '/'
71        self.p        = AutoPaths(self.base_dir, self.all_paths)
72        # URL #
73        self.url = self.base_url + self.version
74        # The archive #
75        self.dest = self.p.archive
76        # The results #
77        self.alignment = FilePath(self.base_dir + "pr_two.gb203_v%s.align" % self.version)
78        self.taxonomy  = FilePath(self.base_dir + "pr_two.gb203_v%s.tax"   % self.version)
79        # The part that mothur will use for naming files #
80        self.nickname = "gb203_v%s" % self.version
81
82    def download(self):
83        self.dest.directory.create(safe=True)
84        self.dest.remove()
85        print("\nDownloading", self.url)
86        import wget
87        wget.download(self.url, out=self.dest.path)
88
89    def unzip(self):
90        self.dest.unzip_to(self.base_dir, single=False)
91        self.p.archive_zip.unzip_to(self.base_dir, single=False)
92        self.p.pr2_zip.unzip_to(self.base_dir, single=False)
93        self.p.fasta.move_to(self.alignment)
94        self.p.taxo.move_to(self.taxonomy)
95
96###############################################################################
97pr_two = PrTwo("11")
class PrTwo(seqsearch.databases.Database):
25class PrTwo(Database):
26    """
27    This is the PR2 database.
28
29    https://figshare.com/articles/PR2_rRNA_gene_database/3803709
30
31    To install:
32
33        from seqsearch.databases.pr_two import pr_two
34        pr_two.download()
35        pr_two.unzip()
36        print pr_two.tax_depth_freq
37
38    It will put it in ~/databases/pr_two_11/
39    """
40
41    base_url   = "https://ndownloader.figshare.com/articles/3803709/versions/"
42    short_name = "pr_two"
43    long_name  = 'Protist Ribosomal Reference database (PR2) - SSU rRNA gene database'
44
45    all_paths = """
46    /archive.zip
47    /pr2_gb203_version_4.5.zip
48    /pr2_gb203_version_4.5.fasta
49    /pr2_gb203_version_4.5.taxo
50    """
51
52    @property
53    def rank_names(self):
54        """The names of the ranks. Total 9 ranks."""
55        return ['Domain',   # 0
56                'Kingdom',  # 1
57                'Phylum',   # 2
58                'Class',    # 3
59                'Order',    # 4
60                'Family',   # 5
61                'Tribe',    # 6
62                'Genus',    # 7
63                'Species']  # 8
64
65    def __init__(self, version, base_dir=None):
66        # Attributes #
67        self.version    = version
68        self.short_name = self.short_name + "_" + self.version
69        # Base directory #
70        if base_dir is None: base_dir = home
71        self.base_dir = base_dir + 'databases/' + self.short_name + '/'
72        self.p        = AutoPaths(self.base_dir, self.all_paths)
73        # URL #
74        self.url = self.base_url + self.version
75        # The archive #
76        self.dest = self.p.archive
77        # The results #
78        self.alignment = FilePath(self.base_dir + "pr_two.gb203_v%s.align" % self.version)
79        self.taxonomy  = FilePath(self.base_dir + "pr_two.gb203_v%s.tax"   % self.version)
80        # The part that mothur will use for naming files #
81        self.nickname = "gb203_v%s" % self.version
82
83    def download(self):
84        self.dest.directory.create(safe=True)
85        self.dest.remove()
86        print("\nDownloading", self.url)
87        import wget
88        wget.download(self.url, out=self.dest.path)
89
90    def unzip(self):
91        self.dest.unzip_to(self.base_dir, single=False)
92        self.p.archive_zip.unzip_to(self.base_dir, single=False)
93        self.p.pr2_zip.unzip_to(self.base_dir, single=False)
94        self.p.fasta.move_to(self.alignment)
95        self.p.taxo.move_to(self.taxonomy)

This is the PR2 database.

https://figshare.com/articles/PR2_rRNA_gene_database/3803709

To install:

from seqsearch.databases.pr_two import pr_two
pr_two.download()
pr_two.unzip()
print pr_two.tax_depth_freq

It will put it in ~/databases/pr_two_11/

PrTwo(version, base_dir=None)
65    def __init__(self, version, base_dir=None):
66        # Attributes #
67        self.version    = version
68        self.short_name = self.short_name + "_" + self.version
69        # Base directory #
70        if base_dir is None: base_dir = home
71        self.base_dir = base_dir + 'databases/' + self.short_name + '/'
72        self.p        = AutoPaths(self.base_dir, self.all_paths)
73        # URL #
74        self.url = self.base_url + self.version
75        # The archive #
76        self.dest = self.p.archive
77        # The results #
78        self.alignment = FilePath(self.base_dir + "pr_two.gb203_v%s.align" % self.version)
79        self.taxonomy  = FilePath(self.base_dir + "pr_two.gb203_v%s.tax"   % self.version)
80        # The part that mothur will use for naming files #
81        self.nickname = "gb203_v%s" % self.version
rank_names

The names of the ranks. Total 9 ranks.

def download(self):
83    def download(self):
84        self.dest.directory.create(safe=True)
85        self.dest.remove()
86        print("\nDownloading", self.url)
87        import wget
88        wget.download(self.url, out=self.dest.path)

Retrieve all files from the FTP site.

def unzip(self):
90    def unzip(self):
91        self.dest.unzip_to(self.base_dir, single=False)
92        self.p.archive_zip.unzip_to(self.base_dir, single=False)
93        self.p.pr2_zip.unzip_to(self.base_dir, single=False)
94        self.p.fasta.move_to(self.alignment)
95        self.p.taxo.move_to(self.taxonomy)