fasta.paired

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3
  4"""
  5Written by Lucas Sinclair.
  6MIT Licensed.
  7Contact at www.sinclair.bio
  8"""
  9
 10# Built-in modules #
 11from six.moves import zip as izip
 12
 13# Internal modules #
 14from fasta import FASTA, FASTQ
 15from plumbing.common import isubsample, GenWithLength
 16from plumbing.cache import property_cached
 17
 18# Third party modules #
 19from tqdm import tqdm
 20
 21###############################################################################
 22class PairedFASTA:
 23    """Read and write FASTA file pairs without using too much RAM."""
 24
 25    format = 'fasta'
 26
 27    def __len__(self):  return self.count
 28    def __iter__(self): return self.parse()
 29    def __bool__(self): return bool(self.fwd) and bool(self.rev)
 30    def __repr__(self): return '<%s object on "%s" and "%s">' % \
 31                        (self.__class__.__name__, self.fwd.path, self.rev.path)
 32
 33    def __enter__(self): return self.create()
 34    def __exit__(self, exc_type, exc_value, traceback): self.close()
 35
 36    @property
 37    def exists(self): return self.fwd.exists and self.rev.exists
 38
 39    def __init__(self, fwd, rev, parent=None):
 40        # FASTA objects #
 41        self.fwd = FASTA(fwd)
 42        self.rev = FASTA(rev)
 43        # Extra #
 44        self.gzipped = self.fwd.gzipped
 45        self.parent = parent
 46
 47    @property_cached
 48    def count(self):
 49        """Check both read counts are equal and return that number."""
 50        assert self.fwd.count == self.rev.count
 51        return self.fwd.count
 52
 53    def open(self):
 54        self.fwd.open()
 55        self.rev.open()
 56
 57    def parse(self):
 58        return izip(self.fwd.parse(), self.rev.parse())
 59
 60    def close(self):
 61        self.fwd.close()
 62        self.rev.close()
 63
 64    def create(self):
 65        self.fwd.create()
 66        self.rev.create()
 67        return self
 68
 69    def add(self, f, r):
 70        return self.add_pair((f,r))
 71
 72    def add_pair(self, pair):
 73        self.fwd.add_seq(pair[0])
 74        self.rev.add_seq(pair[1])
 75
 76    def remove(self):
 77        self.fwd.remove()
 78        self.rev.remove()
 79
 80    @property
 81    def progress(self):
 82        """Just like self.parse but display a progress bar."""
 83        return tqdm(self, total=len(self))
 84
 85    def subsample(self, down_to, dest_pair=None):
 86        # Check size #
 87        assert down_to < len(self)
 88        # Make new pair of files #
 89        if dest_pair is None:
 90            dest_fwd_path = self.fwd.path.new_name_insert("subsampled")
 91            dest_rev_path = self.rev.path.new_name_insert("subsampled")
 92            dest_pair = self.__class__(dest_fwd_path, dest_rev_path)
 93        # Do it #
 94        dest_pair.create()
 95        for pair in isubsample(self, down_to): dest_pair.add_pair(pair)
 96        dest_pair.close()
 97        # Did it work #
 98        assert len(dest_pair) == down_to
 99        # Return #
100        return dest_pair
101
102    #------------------------------- Extensions ------------------------------#
103    def parse_primers(self, *args, **kwargs):
104        fwd_gen = self.fwd.parse_primers(*args, **kwargs)
105        rev_gen = self.rev.parse_primers(*args, **kwargs)
106        generator = izip(fwd_gen, rev_gen)
107        return GenWithLength(generator, len(fwd_gen))
108
109###############################################################################
110class PairedFASTQ(PairedFASTA):
111    """Read and write FASTQ file pairs without using too much RAM."""
112
113    format = 'fastq'
114
115    def __init__(self, fwd, rev, parent=None):
116        # FASTQ objects #
117        self.fwd = FASTQ(fwd)
118        self.rev = FASTQ(rev)
119        # Extra #
120        self.gzipped = self.fwd.gzipped
121        self.parent = parent
122
123    def validate(self):
124        """Call fastQValidator on these files."""
125        self.fwd.validator()
126        self.rev.validator()
class PairedFASTA:
 23class PairedFASTA:
 24    """Read and write FASTA file pairs without using too much RAM."""
 25
 26    format = 'fasta'
 27
 28    def __len__(self):  return self.count
 29    def __iter__(self): return self.parse()
 30    def __bool__(self): return bool(self.fwd) and bool(self.rev)
 31    def __repr__(self): return '<%s object on "%s" and "%s">' % \
 32                        (self.__class__.__name__, self.fwd.path, self.rev.path)
 33
 34    def __enter__(self): return self.create()
 35    def __exit__(self, exc_type, exc_value, traceback): self.close()
 36
 37    @property
 38    def exists(self): return self.fwd.exists and self.rev.exists
 39
 40    def __init__(self, fwd, rev, parent=None):
 41        # FASTA objects #
 42        self.fwd = FASTA(fwd)
 43        self.rev = FASTA(rev)
 44        # Extra #
 45        self.gzipped = self.fwd.gzipped
 46        self.parent = parent
 47
 48    @property_cached
 49    def count(self):
 50        """Check both read counts are equal and return that number."""
 51        assert self.fwd.count == self.rev.count
 52        return self.fwd.count
 53
 54    def open(self):
 55        self.fwd.open()
 56        self.rev.open()
 57
 58    def parse(self):
 59        return izip(self.fwd.parse(), self.rev.parse())
 60
 61    def close(self):
 62        self.fwd.close()
 63        self.rev.close()
 64
 65    def create(self):
 66        self.fwd.create()
 67        self.rev.create()
 68        return self
 69
 70    def add(self, f, r):
 71        return self.add_pair((f,r))
 72
 73    def add_pair(self, pair):
 74        self.fwd.add_seq(pair[0])
 75        self.rev.add_seq(pair[1])
 76
 77    def remove(self):
 78        self.fwd.remove()
 79        self.rev.remove()
 80
 81    @property
 82    def progress(self):
 83        """Just like self.parse but display a progress bar."""
 84        return tqdm(self, total=len(self))
 85
 86    def subsample(self, down_to, dest_pair=None):
 87        # Check size #
 88        assert down_to < len(self)
 89        # Make new pair of files #
 90        if dest_pair is None:
 91            dest_fwd_path = self.fwd.path.new_name_insert("subsampled")
 92            dest_rev_path = self.rev.path.new_name_insert("subsampled")
 93            dest_pair = self.__class__(dest_fwd_path, dest_rev_path)
 94        # Do it #
 95        dest_pair.create()
 96        for pair in isubsample(self, down_to): dest_pair.add_pair(pair)
 97        dest_pair.close()
 98        # Did it work #
 99        assert len(dest_pair) == down_to
100        # Return #
101        return dest_pair
102
103    #------------------------------- Extensions ------------------------------#
104    def parse_primers(self, *args, **kwargs):
105        fwd_gen = self.fwd.parse_primers(*args, **kwargs)
106        rev_gen = self.rev.parse_primers(*args, **kwargs)
107        generator = izip(fwd_gen, rev_gen)
108        return GenWithLength(generator, len(fwd_gen))

Read and write FASTA file pairs without using too much RAM.

PairedFASTA(fwd, rev, parent=None)
40    def __init__(self, fwd, rev, parent=None):
41        # FASTA objects #
42        self.fwd = FASTA(fwd)
43        self.rev = FASTA(rev)
44        # Extra #
45        self.gzipped = self.fwd.gzipped
46        self.parent = parent
format = 'fasta'
exists
fwd
rev
gzipped
parent
count

Check both read counts are equal and return that number.

def open(self):
54    def open(self):
55        self.fwd.open()
56        self.rev.open()
def parse(self):
58    def parse(self):
59        return izip(self.fwd.parse(), self.rev.parse())
def close(self):
61    def close(self):
62        self.fwd.close()
63        self.rev.close()
def create(self):
65    def create(self):
66        self.fwd.create()
67        self.rev.create()
68        return self
def add(self, f, r):
70    def add(self, f, r):
71        return self.add_pair((f,r))
def add_pair(self, pair):
73    def add_pair(self, pair):
74        self.fwd.add_seq(pair[0])
75        self.rev.add_seq(pair[1])
def remove(self):
77    def remove(self):
78        self.fwd.remove()
79        self.rev.remove()
progress

Just like self.parse but display a progress bar.

def subsample(self, down_to, dest_pair=None):
 86    def subsample(self, down_to, dest_pair=None):
 87        # Check size #
 88        assert down_to < len(self)
 89        # Make new pair of files #
 90        if dest_pair is None:
 91            dest_fwd_path = self.fwd.path.new_name_insert("subsampled")
 92            dest_rev_path = self.rev.path.new_name_insert("subsampled")
 93            dest_pair = self.__class__(dest_fwd_path, dest_rev_path)
 94        # Do it #
 95        dest_pair.create()
 96        for pair in isubsample(self, down_to): dest_pair.add_pair(pair)
 97        dest_pair.close()
 98        # Did it work #
 99        assert len(dest_pair) == down_to
100        # Return #
101        return dest_pair
def parse_primers(self, *args, **kwargs):
104    def parse_primers(self, *args, **kwargs):
105        fwd_gen = self.fwd.parse_primers(*args, **kwargs)
106        rev_gen = self.rev.parse_primers(*args, **kwargs)
107        generator = izip(fwd_gen, rev_gen)
108        return GenWithLength(generator, len(fwd_gen))
class PairedFASTQ(PairedFASTA):
111class PairedFASTQ(PairedFASTA):
112    """Read and write FASTQ file pairs without using too much RAM."""
113
114    format = 'fastq'
115
116    def __init__(self, fwd, rev, parent=None):
117        # FASTQ objects #
118        self.fwd = FASTQ(fwd)
119        self.rev = FASTQ(rev)
120        # Extra #
121        self.gzipped = self.fwd.gzipped
122        self.parent = parent
123
124    def validate(self):
125        """Call fastQValidator on these files."""
126        self.fwd.validator()
127        self.rev.validator()

Read and write FASTQ file pairs without using too much RAM.

PairedFASTQ(fwd, rev, parent=None)
116    def __init__(self, fwd, rev, parent=None):
117        # FASTQ objects #
118        self.fwd = FASTQ(fwd)
119        self.rev = FASTQ(rev)
120        # Extra #
121        self.gzipped = self.fwd.gzipped
122        self.parent = parent
format = 'fastq'
fwd
rev
gzipped
parent
def validate(self):
124    def validate(self):
125        """Call fastQValidator on these files."""
126        self.fwd.validator()
127        self.rev.validator()

Call fastQValidator on these files.