fasta.graphs

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3
 4"""
 5Written by Lucas Sinclair.
 6MIT Licensed.
 7Contact at www.sinclair.bio
 8"""
 9
10# Internal modules #
11
12# First party modules #
13from plumbing.graphs import Graph
14from autopaths.file_path import FilePath
15
16# Third party modules #
17
18# Constants #
19__all__ = ['LengthDist', 'LengthHist']
20
21################################################################################
22class LengthDist(Graph):
23    """The length distribution of the sequences with a bar plot."""
24
25    short_name   = 'length_dist'
26    sep          = ('x', 'y')
27    y_grid       = True
28    width        = 10
29    height       = 6
30    remove_frame = True
31
32    def __init__(self, parent):
33        self.parent = parent
34        self.path = FilePath(self.parent.prefix_path + '_len_dist.pdf')
35
36    def plot(self, **kwargs):
37        # Data #
38        counts = self.parent.lengths_counter
39        # Plot #
40        from matplotlib import pyplot
41        fig = pyplot.figure()
42        pyplot.bar(counts.keys(), counts.values(), 1.0,
43                   color='gray', align='center')
44        axes = pyplot.gca()
45        # Information #
46        title = 'Distribution of sequence lengths'
47        axes.set_title(title)
48        axes.set_xlabel('Length of sequence in nucleotides')
49        axes.set_ylabel('Number of sequences with this length')
50        # Save it #
51        self.save_plot(fig, axes, **kwargs)
52        pyplot.close(fig)
53        # For convenience #
54        return self
55
56################################################################################
57class LengthHist(Graph):
58    """The length distribution of the sequences with a histogram."""
59
60    short_name   = 'length_hist'
61    sep          = ('x', 'y')
62    y_grid       = True
63    width        = 10
64    height       = 6
65    remove_frame = True
66
67    def __init__(self, parent):
68        self.parent = parent
69        self.path = FilePath(self.parent.prefix_path + '_len_hist.pdf')
70
71    def plot(self, bins=80, **kwargs):
72        # Import numpy #
73        import numpy
74        # Data #
75        counts = list(self.parent.lengths)
76        # Linear bins in logarithmic space #
77        if 'log' in kwargs.get('x_scale', ''):
78            start, stop = numpy.log10(1), numpy.log10(max(counts))
79            bins = list(numpy.logspace(start=start, stop=stop, num=bins))
80            bins.insert(0, 0)
81        # Plot #
82        from matplotlib import pyplot
83        fig = pyplot.figure()
84        pyplot.hist(counts, bins=bins, color='gray')
85        axes = pyplot.gca()
86        # Information #
87        title = 'Histogram of sequence lengths'
88        axes.set_title(title)
89        axes.set_xlabel('Length of sequence in nucleotides')
90        axes.set_ylabel('Number of sequences with this length')
91        # X lim #
92        axes.set_xlim(min(counts), axes.get_xlim()[1])
93        # Save it #
94        self.save_plot(fig, axes, **kwargs)
95        pyplot.close(fig)
96        # For convenience #
97        return self
class LengthDist(plumbing.graphs.Graph):
23class LengthDist(Graph):
24    """The length distribution of the sequences with a bar plot."""
25
26    short_name   = 'length_dist'
27    sep          = ('x', 'y')
28    y_grid       = True
29    width        = 10
30    height       = 6
31    remove_frame = True
32
33    def __init__(self, parent):
34        self.parent = parent
35        self.path = FilePath(self.parent.prefix_path + '_len_dist.pdf')
36
37    def plot(self, **kwargs):
38        # Data #
39        counts = self.parent.lengths_counter
40        # Plot #
41        from matplotlib import pyplot
42        fig = pyplot.figure()
43        pyplot.bar(counts.keys(), counts.values(), 1.0,
44                   color='gray', align='center')
45        axes = pyplot.gca()
46        # Information #
47        title = 'Distribution of sequence lengths'
48        axes.set_title(title)
49        axes.set_xlabel('Length of sequence in nucleotides')
50        axes.set_ylabel('Number of sequences with this length')
51        # Save it #
52        self.save_plot(fig, axes, **kwargs)
53        pyplot.close(fig)
54        # For convenience #
55        return self

The length distribution of the sequences with a bar plot.

LengthDist(parent)
33    def __init__(self, parent):
34        self.parent = parent
35        self.path = FilePath(self.parent.prefix_path + '_len_dist.pdf')
short_name = 'length_dist'
sep = ('x', 'y')
y_grid = True
width = 10
height = 6
remove_frame = True
parent
path
def plot(self, **kwargs):
37    def plot(self, **kwargs):
38        # Data #
39        counts = self.parent.lengths_counter
40        # Plot #
41        from matplotlib import pyplot
42        fig = pyplot.figure()
43        pyplot.bar(counts.keys(), counts.values(), 1.0,
44                   color='gray', align='center')
45        axes = pyplot.gca()
46        # Information #
47        title = 'Distribution of sequence lengths'
48        axes.set_title(title)
49        axes.set_xlabel('Length of sequence in nucleotides')
50        axes.set_ylabel('Number of sequences with this length')
51        # Save it #
52        self.save_plot(fig, axes, **kwargs)
53        pyplot.close(fig)
54        # For convenience #
55        return self

An example plot function. You have to subclass this method.

Inherited Members
plumbing.graphs.Graph
default_params
save_plot
plot_and_save
save_anim
class LengthHist(plumbing.graphs.Graph):
58class LengthHist(Graph):
59    """The length distribution of the sequences with a histogram."""
60
61    short_name   = 'length_hist'
62    sep          = ('x', 'y')
63    y_grid       = True
64    width        = 10
65    height       = 6
66    remove_frame = True
67
68    def __init__(self, parent):
69        self.parent = parent
70        self.path = FilePath(self.parent.prefix_path + '_len_hist.pdf')
71
72    def plot(self, bins=80, **kwargs):
73        # Import numpy #
74        import numpy
75        # Data #
76        counts = list(self.parent.lengths)
77        # Linear bins in logarithmic space #
78        if 'log' in kwargs.get('x_scale', ''):
79            start, stop = numpy.log10(1), numpy.log10(max(counts))
80            bins = list(numpy.logspace(start=start, stop=stop, num=bins))
81            bins.insert(0, 0)
82        # Plot #
83        from matplotlib import pyplot
84        fig = pyplot.figure()
85        pyplot.hist(counts, bins=bins, color='gray')
86        axes = pyplot.gca()
87        # Information #
88        title = 'Histogram of sequence lengths'
89        axes.set_title(title)
90        axes.set_xlabel('Length of sequence in nucleotides')
91        axes.set_ylabel('Number of sequences with this length')
92        # X lim #
93        axes.set_xlim(min(counts), axes.get_xlim()[1])
94        # Save it #
95        self.save_plot(fig, axes, **kwargs)
96        pyplot.close(fig)
97        # For convenience #
98        return self

The length distribution of the sequences with a histogram.

LengthHist(parent)
68    def __init__(self, parent):
69        self.parent = parent
70        self.path = FilePath(self.parent.prefix_path + '_len_hist.pdf')
short_name = 'length_hist'
sep = ('x', 'y')
y_grid = True
width = 10
height = 6
remove_frame = True
parent
path
def plot(self, bins=80, **kwargs):
72    def plot(self, bins=80, **kwargs):
73        # Import numpy #
74        import numpy
75        # Data #
76        counts = list(self.parent.lengths)
77        # Linear bins in logarithmic space #
78        if 'log' in kwargs.get('x_scale', ''):
79            start, stop = numpy.log10(1), numpy.log10(max(counts))
80            bins = list(numpy.logspace(start=start, stop=stop, num=bins))
81            bins.insert(0, 0)
82        # Plot #
83        from matplotlib import pyplot
84        fig = pyplot.figure()
85        pyplot.hist(counts, bins=bins, color='gray')
86        axes = pyplot.gca()
87        # Information #
88        title = 'Histogram of sequence lengths'
89        axes.set_title(title)
90        axes.set_xlabel('Length of sequence in nucleotides')
91        axes.set_ylabel('Number of sequences with this length')
92        # X lim #
93        axes.set_xlim(min(counts), axes.get_xlim()[1])
94        # Save it #
95        self.save_plot(fig, axes, **kwargs)
96        pyplot.close(fig)
97        # For convenience #
98        return self

An example plot function. You have to subclass this method.

Inherited Members
plumbing.graphs.Graph
default_params
save_plot
plot_and_save
save_anim