fasta.paired
Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4""" 5Written by Lucas Sinclair. 6MIT Licensed. 7Contact at www.sinclair.bio 8""" 9 10# Built-in modules # 11from six.moves import zip as izip 12 13# Internal modules # 14from fasta import FASTA, FASTQ 15from plumbing.common import isubsample, GenWithLength 16from plumbing.cache import property_cached 17 18# Third party modules # 19from tqdm import tqdm 20 21############################################################################### 22class PairedFASTA: 23 """Read and write FASTA file pairs without using too much RAM.""" 24 25 format = 'fasta' 26 27 def __len__(self): return self.count 28 def __iter__(self): return self.parse() 29 def __bool__(self): return bool(self.fwd) and bool(self.rev) 30 def __repr__(self): return '<%s object on "%s" and "%s">' % \ 31 (self.__class__.__name__, self.fwd.path, self.rev.path) 32 33 def __enter__(self): return self.create() 34 def __exit__(self, exc_type, exc_value, traceback): self.close() 35 36 @property 37 def exists(self): return self.fwd.exists and self.rev.exists 38 39 def __init__(self, fwd, rev, parent=None): 40 # FASTA objects # 41 self.fwd = FASTA(fwd) 42 self.rev = FASTA(rev) 43 # Extra # 44 self.gzipped = self.fwd.gzipped 45 self.parent = parent 46 47 @property_cached 48 def count(self): 49 """Check both read counts are equal and return that number.""" 50 assert self.fwd.count == self.rev.count 51 return self.fwd.count 52 53 def open(self): 54 self.fwd.open() 55 self.rev.open() 56 57 def parse(self): 58 return izip(self.fwd.parse(), self.rev.parse()) 59 60 def close(self): 61 self.fwd.close() 62 self.rev.close() 63 64 def create(self): 65 self.fwd.create() 66 self.rev.create() 67 return self 68 69 def add(self, f, r): 70 return self.add_pair((f,r)) 71 72 def add_pair(self, pair): 73 self.fwd.add_seq(pair[0]) 74 self.rev.add_seq(pair[1]) 75 76 def remove(self): 77 self.fwd.remove() 78 self.rev.remove() 79 80 @property 81 def progress(self): 82 """Just like self.parse but display a progress bar.""" 83 return tqdm(self, total=len(self)) 84 85 def subsample(self, down_to, dest_pair=None): 86 # Check size # 87 assert down_to < len(self) 88 # Make new pair of files # 89 if dest_pair is None: 90 dest_fwd_path = self.fwd.path.new_name_insert("subsampled") 91 dest_rev_path = self.rev.path.new_name_insert("subsampled") 92 dest_pair = self.__class__(dest_fwd_path, dest_rev_path) 93 # Do it # 94 dest_pair.create() 95 for pair in isubsample(self, down_to): dest_pair.add_pair(pair) 96 dest_pair.close() 97 # Did it work # 98 assert len(dest_pair) == down_to 99 # Return # 100 return dest_pair 101 102 #------------------------------- Extensions ------------------------------# 103 def parse_primers(self, *args, **kwargs): 104 fwd_gen = self.fwd.parse_primers(*args, **kwargs) 105 rev_gen = self.rev.parse_primers(*args, **kwargs) 106 generator = izip(fwd_gen, rev_gen) 107 return GenWithLength(generator, len(fwd_gen)) 108 109############################################################################### 110class PairedFASTQ(PairedFASTA): 111 """Read and write FASTQ file pairs without using too much RAM.""" 112 113 format = 'fastq' 114 115 def __init__(self, fwd, rev, parent=None): 116 # FASTQ objects # 117 self.fwd = FASTQ(fwd) 118 self.rev = FASTQ(rev) 119 # Extra # 120 self.gzipped = self.fwd.gzipped 121 self.parent = parent 122 123 def validate(self): 124 """Call fastQValidator on these files.""" 125 self.fwd.validator() 126 self.rev.validator()
class
PairedFASTA:
23class PairedFASTA: 24 """Read and write FASTA file pairs without using too much RAM.""" 25 26 format = 'fasta' 27 28 def __len__(self): return self.count 29 def __iter__(self): return self.parse() 30 def __bool__(self): return bool(self.fwd) and bool(self.rev) 31 def __repr__(self): return '<%s object on "%s" and "%s">' % \ 32 (self.__class__.__name__, self.fwd.path, self.rev.path) 33 34 def __enter__(self): return self.create() 35 def __exit__(self, exc_type, exc_value, traceback): self.close() 36 37 @property 38 def exists(self): return self.fwd.exists and self.rev.exists 39 40 def __init__(self, fwd, rev, parent=None): 41 # FASTA objects # 42 self.fwd = FASTA(fwd) 43 self.rev = FASTA(rev) 44 # Extra # 45 self.gzipped = self.fwd.gzipped 46 self.parent = parent 47 48 @property_cached 49 def count(self): 50 """Check both read counts are equal and return that number.""" 51 assert self.fwd.count == self.rev.count 52 return self.fwd.count 53 54 def open(self): 55 self.fwd.open() 56 self.rev.open() 57 58 def parse(self): 59 return izip(self.fwd.parse(), self.rev.parse()) 60 61 def close(self): 62 self.fwd.close() 63 self.rev.close() 64 65 def create(self): 66 self.fwd.create() 67 self.rev.create() 68 return self 69 70 def add(self, f, r): 71 return self.add_pair((f,r)) 72 73 def add_pair(self, pair): 74 self.fwd.add_seq(pair[0]) 75 self.rev.add_seq(pair[1]) 76 77 def remove(self): 78 self.fwd.remove() 79 self.rev.remove() 80 81 @property 82 def progress(self): 83 """Just like self.parse but display a progress bar.""" 84 return tqdm(self, total=len(self)) 85 86 def subsample(self, down_to, dest_pair=None): 87 # Check size # 88 assert down_to < len(self) 89 # Make new pair of files # 90 if dest_pair is None: 91 dest_fwd_path = self.fwd.path.new_name_insert("subsampled") 92 dest_rev_path = self.rev.path.new_name_insert("subsampled") 93 dest_pair = self.__class__(dest_fwd_path, dest_rev_path) 94 # Do it # 95 dest_pair.create() 96 for pair in isubsample(self, down_to): dest_pair.add_pair(pair) 97 dest_pair.close() 98 # Did it work # 99 assert len(dest_pair) == down_to 100 # Return # 101 return dest_pair 102 103 #------------------------------- Extensions ------------------------------# 104 def parse_primers(self, *args, **kwargs): 105 fwd_gen = self.fwd.parse_primers(*args, **kwargs) 106 rev_gen = self.rev.parse_primers(*args, **kwargs) 107 generator = izip(fwd_gen, rev_gen) 108 return GenWithLength(generator, len(fwd_gen))
Read and write FASTA file pairs without using too much RAM.
def
subsample(self, down_to, dest_pair=None):
86 def subsample(self, down_to, dest_pair=None): 87 # Check size # 88 assert down_to < len(self) 89 # Make new pair of files # 90 if dest_pair is None: 91 dest_fwd_path = self.fwd.path.new_name_insert("subsampled") 92 dest_rev_path = self.rev.path.new_name_insert("subsampled") 93 dest_pair = self.__class__(dest_fwd_path, dest_rev_path) 94 # Do it # 95 dest_pair.create() 96 for pair in isubsample(self, down_to): dest_pair.add_pair(pair) 97 dest_pair.close() 98 # Did it work # 99 assert len(dest_pair) == down_to 100 # Return # 101 return dest_pair
111class PairedFASTQ(PairedFASTA): 112 """Read and write FASTQ file pairs without using too much RAM.""" 113 114 format = 'fastq' 115 116 def __init__(self, fwd, rev, parent=None): 117 # FASTQ objects # 118 self.fwd = FASTQ(fwd) 119 self.rev = FASTQ(rev) 120 # Extra # 121 self.gzipped = self.fwd.gzipped 122 self.parent = parent 123 124 def validate(self): 125 """Call fastQValidator on these files.""" 126 self.fwd.validator() 127 self.rev.validator()
Read and write FASTQ file pairs without using too much RAM.