fasta.fastqc

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3
  4"""
  5Written by Lucas Sinclair.
  6MIT Licensed.
  7Contact at www.sinclair.bio
  8"""
  9
 10# Built-in modules #
 11import os, shutil, multiprocessing, platform
 12
 13# First party modules #
 14from fasta import FASTQ
 15from autopaths.dir_path       import DirectoryPath
 16from autopaths.tmp_path       import new_temp_dir
 17from plumbing.cache           import property_cached
 18from plumbing.check_cmd_found import check_cmd
 19from plumbing.apt_pkg         import get_apt_packages
 20from plumbing.scraping        import download_from_url
 21
 22# Third party modules #
 23if platform.system() == 'Windows': import pbs3 as sh
 24else: import sh
 25
 26###############################################################################
 27class FastQC:
 28    """
 29    Takes care of running the FastQC program on a given FASTQ file.
 30    See http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
 31    Expects version 0.11.9.
 32    """
 33
 34    def __repr__(self):
 35        msg = '<%s object on "%s">'
 36        return msg % (self.__class__.__name__, self.source.path)
 37
 38    def __init__(self, source, dest=None):
 39        # Source and destination #
 40        self.source = FASTQ(source)
 41        self.dest = DirectoryPath(dest)
 42        # Default case #
 43        if dest is None:
 44            self.dest = DirectoryPath(self.source.prefix_path + '.fastqc')
 45
 46    #---------------------------- Installing ---------------------------------#
 47    apt_packages = ['default-jre']
 48    zip_url = "http://www.bioinformatics.babraham.ac.uk/projects/" \
 49              "fastqc/fastqc_v0.11.9.zip"
 50
 51    @classmethod
 52    def check_installed(cls, exception=True):
 53        """
 54        Try to determine if the FastQC software is installed and
 55        accessible.
 56        """
 57        return check_cmd('fastqc', exception, cls.install.__doc__)
 58
 59    @classmethod
 60    def install(cls, prefix="~/programs/FastQC/"):
 61        """
 62        To automatically download and install the FastQC software on this
 63        computer and for the current user, type these commands in python:
 64
 65            >>> from fasta.fastqc import FastQC
 66            >>> FastQC.install()
 67
 68        If you are on macOS you can just type: "brew install fastqc"
 69        """
 70        # Start with required apt packages #
 71        get_apt_packages(cls.apt_packages, verbose=True)
 72        # Make a temporary directory #
 73        tmp_dir = new_temp_dir()
 74        # Download tarball #
 75        zip_loc = download_from_url(cls.zip_url, tmp_dir, stream=True,
 76                                    progress=True)
 77        # Uncompress #
 78        src_dir = zip_loc.unzip_to(prefix, single=False).sub_directory
 79        # Set executable permissions #
 80        bin_loc = src_dir + 'fastqc'
 81        bin_loc.permissions.make_executable()
 82        # The directory that contains the executable #
 83        bin_dir = src_dir.with_tilda[:-1].replace('~', '$HOME')
 84        # Suggest adding to the $PATH #
 85        print("\nFastQC was installed successfully. You should now "
 86              "add this line to your .bash_profile: \n\n    "
 87              "export PATH=%s:$PATH\n" % bin_dir)
 88
 89    #---------------------------- Running ------------------------------------#
 90    def __call__(self, cpus=None):
 91        # Check it is installed #
 92        self.check_installed()
 93        # Check version #
 94        assert "v0.11.9" in sh.fastqc('-version')
 95        # Number of cores #
 96        if cpus is None: cpus = min(multiprocessing.cpu_count(), 32)
 97        # Make a temporary directory #
 98        self.tmp_dir = new_temp_dir()
 99        # Run it #
100        sh.fastqc(self.source, '-o', self.tmp_dir, '-t', cpus, '--extract')
101        # Get location of results #
102        components = self.source.prefix.split('.')
103        if components[-1] == 'gz':    components.pop()
104        if components[-1] == 'fastq': components.pop()
105        # Reassemble the components #
106        created_name = '.'.join(components) + '_fastqc/'
107        # This will be the name of the directory that fastqc created #
108        created_dir  = self.tmp_dir + created_name
109        # Move results #
110        if self.dest.exists: shutil.rmtree(self.dest)
111        shutil.move(created_dir, self.dest)
112        self.tmp_dir.remove()
113        # Return #
114        return self.results
115
116    #------------------------------- Results ---------------------------------#
117    def __bool__(self):
118        """
119        Return True if the FastQC software was run already and the results are
120        stored on the filesystem. Return False if it was not yet run.
121        """
122        return os.path.exists(self.dest + 'Images/per_base_quality.png')
123
124    @property_cached
125    def results(self):
126        # Check it was run #
127        if not self:
128            msg = "You can't access results from FastQC " \
129                  "before running the tool."
130            raise Exception(msg)
131        # Return the results #
132        return FastQCResults(self.dest)
133
134###############################################################################
135class FastQCResults(DirectoryPath):
136    """A directory with the results from FastQC."""
137
138    all_paths = """
139                /Images/per_base_quality.png
140                /Images/per_sequence_quality.png
141                """
142
143    def __bool__(self): return self.per_base_qual.exists
144
145    @property
146    def per_base_qual(self): return self.p.per_base_quality
147
148    @property
149    def per_seq_qual(self): return self.p.per_sequence_quality
class FastQC:
 28class FastQC:
 29    """
 30    Takes care of running the FastQC program on a given FASTQ file.
 31    See http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
 32    Expects version 0.11.9.
 33    """
 34
 35    def __repr__(self):
 36        msg = '<%s object on "%s">'
 37        return msg % (self.__class__.__name__, self.source.path)
 38
 39    def __init__(self, source, dest=None):
 40        # Source and destination #
 41        self.source = FASTQ(source)
 42        self.dest = DirectoryPath(dest)
 43        # Default case #
 44        if dest is None:
 45            self.dest = DirectoryPath(self.source.prefix_path + '.fastqc')
 46
 47    #---------------------------- Installing ---------------------------------#
 48    apt_packages = ['default-jre']
 49    zip_url = "http://www.bioinformatics.babraham.ac.uk/projects/" \
 50              "fastqc/fastqc_v0.11.9.zip"
 51
 52    @classmethod
 53    def check_installed(cls, exception=True):
 54        """
 55        Try to determine if the FastQC software is installed and
 56        accessible.
 57        """
 58        return check_cmd('fastqc', exception, cls.install.__doc__)
 59
 60    @classmethod
 61    def install(cls, prefix="~/programs/FastQC/"):
 62        """
 63        To automatically download and install the FastQC software on this
 64        computer and for the current user, type these commands in python:
 65
 66            >>> from fasta.fastqc import FastQC
 67            >>> FastQC.install()
 68
 69        If you are on macOS you can just type: "brew install fastqc"
 70        """
 71        # Start with required apt packages #
 72        get_apt_packages(cls.apt_packages, verbose=True)
 73        # Make a temporary directory #
 74        tmp_dir = new_temp_dir()
 75        # Download tarball #
 76        zip_loc = download_from_url(cls.zip_url, tmp_dir, stream=True,
 77                                    progress=True)
 78        # Uncompress #
 79        src_dir = zip_loc.unzip_to(prefix, single=False).sub_directory
 80        # Set executable permissions #
 81        bin_loc = src_dir + 'fastqc'
 82        bin_loc.permissions.make_executable()
 83        # The directory that contains the executable #
 84        bin_dir = src_dir.with_tilda[:-1].replace('~', '$HOME')
 85        # Suggest adding to the $PATH #
 86        print("\nFastQC was installed successfully. You should now "
 87              "add this line to your .bash_profile: \n\n    "
 88              "export PATH=%s:$PATH\n" % bin_dir)
 89
 90    #---------------------------- Running ------------------------------------#
 91    def __call__(self, cpus=None):
 92        # Check it is installed #
 93        self.check_installed()
 94        # Check version #
 95        assert "v0.11.9" in sh.fastqc('-version')
 96        # Number of cores #
 97        if cpus is None: cpus = min(multiprocessing.cpu_count(), 32)
 98        # Make a temporary directory #
 99        self.tmp_dir = new_temp_dir()
100        # Run it #
101        sh.fastqc(self.source, '-o', self.tmp_dir, '-t', cpus, '--extract')
102        # Get location of results #
103        components = self.source.prefix.split('.')
104        if components[-1] == 'gz':    components.pop()
105        if components[-1] == 'fastq': components.pop()
106        # Reassemble the components #
107        created_name = '.'.join(components) + '_fastqc/'
108        # This will be the name of the directory that fastqc created #
109        created_dir  = self.tmp_dir + created_name
110        # Move results #
111        if self.dest.exists: shutil.rmtree(self.dest)
112        shutil.move(created_dir, self.dest)
113        self.tmp_dir.remove()
114        # Return #
115        return self.results
116
117    #------------------------------- Results ---------------------------------#
118    def __bool__(self):
119        """
120        Return True if the FastQC software was run already and the results are
121        stored on the filesystem. Return False if it was not yet run.
122        """
123        return os.path.exists(self.dest + 'Images/per_base_quality.png')
124
125    @property_cached
126    def results(self):
127        # Check it was run #
128        if not self:
129            msg = "You can't access results from FastQC " \
130                  "before running the tool."
131            raise Exception(msg)
132        # Return the results #
133        return FastQCResults(self.dest)

Takes care of running the FastQC program on a given FASTQ file. See http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ Expects version 0.11.9.

FastQC(source, dest=None)
39    def __init__(self, source, dest=None):
40        # Source and destination #
41        self.source = FASTQ(source)
42        self.dest = DirectoryPath(dest)
43        # Default case #
44        if dest is None:
45            self.dest = DirectoryPath(self.source.prefix_path + '.fastqc')
source
dest
apt_packages = ['default-jre']
zip_url = 'http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.9.zip'
@classmethod
def check_installed(cls, exception=True):
52    @classmethod
53    def check_installed(cls, exception=True):
54        """
55        Try to determine if the FastQC software is installed and
56        accessible.
57        """
58        return check_cmd('fastqc', exception, cls.install.__doc__)

Try to determine if the FastQC software is installed and accessible.

@classmethod
def install(cls, prefix='~/programs/FastQC/'):
60    @classmethod
61    def install(cls, prefix="~/programs/FastQC/"):
62        """
63        To automatically download and install the FastQC software on this
64        computer and for the current user, type these commands in python:
65
66            >>> from fasta.fastqc import FastQC
67            >>> FastQC.install()
68
69        If you are on macOS you can just type: "brew install fastqc"
70        """
71        # Start with required apt packages #
72        get_apt_packages(cls.apt_packages, verbose=True)
73        # Make a temporary directory #
74        tmp_dir = new_temp_dir()
75        # Download tarball #
76        zip_loc = download_from_url(cls.zip_url, tmp_dir, stream=True,
77                                    progress=True)
78        # Uncompress #
79        src_dir = zip_loc.unzip_to(prefix, single=False).sub_directory
80        # Set executable permissions #
81        bin_loc = src_dir + 'fastqc'
82        bin_loc.permissions.make_executable()
83        # The directory that contains the executable #
84        bin_dir = src_dir.with_tilda[:-1].replace('~', '$HOME')
85        # Suggest adding to the $PATH #
86        print("\nFastQC was installed successfully. You should now "
87              "add this line to your .bash_profile: \n\n    "
88              "export PATH=%s:$PATH\n" % bin_dir)

To automatically download and install the FastQC software on this computer and for the current user, type these commands in python:

>>> from fasta.fastqc import FastQC
>>> FastQC.install()

If you are on macOS you can just type: "brew install fastqc"

results
class FastQCResults(autopaths.dir_path.DirectoryPath):
136class FastQCResults(DirectoryPath):
137    """A directory with the results from FastQC."""
138
139    all_paths = """
140                /Images/per_base_quality.png
141                /Images/per_sequence_quality.png
142                """
143
144    def __bool__(self): return self.per_base_qual.exists
145
146    @property
147    def per_base_qual(self): return self.p.per_base_quality
148
149    @property
150    def per_seq_qual(self): return self.p.per_sequence_quality

A directory with the results from FastQC.

all_paths = '\n /Images/per_base_quality.png\n /Images/per_sequence_quality.png\n '
per_base_qual
per_seq_qual
Inherited Members
autopaths.base_path.BasePath
BasePath
clean_path
path
short_prefix
extension
escaped
physical_path
relative_path
rel_path_from
exists
permissions
mdate
mdate_iso
cdate
cdate_iso
unix_style
wsl_style
win_style
with_tilda
with_home
autopaths.dir_path.DirectoryPath
p
name
prefix_path
absolute_path
directory
sub_directory
empty
size
contents
files
directories
flat_contents
flat_files
flat_directories
must_exist
remove
create
create_if_not_exists
move_to
copy
glob
find
unnest
remove_empty_dirs
tar_to
targz_to
members
tar_top_dirs
zip_to
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans