fasta.primers
Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4""" 5Written by Lucas Sinclair. 6MIT Licensed. 7Contact at www.sinclair.bio 8""" 9 10# Built-in modules # 11import re 12 13# Internal modules # 14from plumbing.color import Color 15 16# Third party modules # 17 18# Constants # 19iupac = {'A':'A', 'G':'G', 'T':'T', 'C':'C', 20 'M':'AC', 'R':'AG', 'W':'AT', 'S':'CG', 'Y':'CT', 'K':'GT', 21 'V':'ACG', 'H':'ACT', 'D':'AGT', 'B':'CGT', 22 'X':'ACGT', 'N':'ACGT'} 23 24# Function to create a regex pattern from a sequence # 25iupac_pattern = lambda seq: ''.join(['[' + iupac[char] + ']' for char in seq]) 26 27############################################################################### 28class TwoPrimers: 29 """ 30 A container for the two primers of a sample. 31 Has methods for generating regexes to search for these primers. 32 """ 33 34 def __len__(self): return 2 35 36 def __init__(self, fwd_str, rev_str): 37 from Bio.Seq import Seq 38 # Original strings # 39 self.fwd_str = fwd_str 40 self.rev_str = rev_str 41 # Lengths in base pairs # 42 self.fwd_len = len(self.fwd_str) 43 self.rev_len = len(self.rev_str) 44 # Sequences as biopython objects # 45 self.fwd_seq = Seq(self.fwd_str) 46 self.rev_seq = Seq(self.rev_str) 47 # Create search patterns in regex syntax # 48 self.fwd_pat = iupac_pattern(self.fwd_seq) 49 self.rev_pat = iupac_pattern(self.rev_seq) 50 # Reverse complemented sequences # 51 self.fwd_revcomp = self.fwd_seq.reverse_complement() 52 self.rev_revcomp = self.rev_seq.reverse_complement() 53 # Search patterns when reverse complemented # 54 self.fwd_pat_revcomp = iupac_pattern(self.fwd_revcomp) 55 self.rev_pat_revcomp = iupac_pattern(self.rev_revcomp) 56 # Simple search expression (without any mismatches authorized yet) # 57 self.fwd_search = re.compile(self.fwd_pat) 58 self.rev_search = re.compile(self.rev_pat) 59 60 def make_regex(self, pat, mismatches): 61 """Complex search expression with mismatches this time.""" 62 import regex 63 return regex.compile("(%s){s<=%i}" % (pat, mismatches)) 64 65 def make_fwd_regex(self, mismatches): 66 return self.make_regex(self.fwd_pat, mismatches) 67 68 def make_rev_regex(self, mismatches): 69 return self.make_regex(self.rev_pat, mismatches) 70 71 def make_fwd_revcompl_regex(self, mismatches): 72 return self.make_regex(self.fwd_pat_revcomp, mismatches) 73 74 def make_rev_revcompl_regex(self, mismatches): 75 return self.make_regex(self.rev_pat_revcomp, mismatches) 76 77############################################################################### 78class PrimersRegexes: 79 """ 80 A container for the regular expression search patterns 81 that enable us to find primers inside a sequence. 82 These regexes depend on the number of mismatches authorized. 83 """ 84 85 def __init__(self, primers, mismatches): 86 """ 87 We need to know the primers and the number of mismatches tolerated 88 in the search. 89 """ 90 # Base attributes # 91 self.primers = primers 92 self.mismatches = mismatches 93 # Search patterns # 94 self.fwd = primers.make_fwd_regex(mismatches) 95 self.rev = primers.make_rev_regex(mismatches) 96 # Search patterns reverse complemented # 97 self.fwd_rc = primers.make_fwd_revcompl_regex(mismatches) 98 self.rev_rc = primers.make_rev_revcompl_regex(mismatches) 99 100############################################################################### 101class ReadWithPrimers: 102 def __init__(self, read, regexes): 103 """ 104 Uses regex patterns to search the given read. 105 Records the start and end positions of primers if they are found. 106 Both the forward and reverse primers are searched for. 107 Both the original sequences and their reverse complements are 108 searched for, in case the read is in the opposite direction. 109 """ 110 # The read itself # 111 self.read = read 112 # The sequence as a string # 113 self.seq = str(read.seq) 114 # Searches # 115 self.fwd = regexes.fwd.search(self.seq) 116 self.rev = regexes.rev.search(self.seq) 117 self.fwd_rc = regexes.fwd_rc.search(self.seq) 118 self.rev_rc = regexes.rev_rc.search(self.seq) 119 # Positions found in standard search # 120 self.fwd_srt = self.fwd.start() if self.fwd else None 121 self.fwd_end = self.fwd.end() if self.fwd else None 122 self.rev_srt = self.rev.start() if self.rev else None 123 self.rev_end = self.rev.end() if self.rev else None 124 # Positions found in reverse complement search # 125 self.fwd_rc_srt = self.fwd_rc.start() if self.fwd_rc else None 126 self.fwd_rc_end = self.fwd_rc.end() if self.fwd_rc else None 127 self.rev_rc_srt = self.rev_rc.start() if self.rev_rc else None 128 self.rev_rc_end = self.rev_rc.end() if self.rev_rc else None 129 130 @property 131 def pretty_visualization(self): 132 """ 133 This property is useful for debugging. 134 It will return a nicely formatted string showing the original read 135 with all primers found highlighted with bash color codes. 136 """ 137 # Make a copy of the read for convenience # 138 seq = self.seq 139 # Initialize output # 140 out = "" 141 # Iterate over every position in the original sequence # 142 for i, nuc in enumerate(seq): 143 if i == self.fwd_srt: out += Color.b_grn 144 if i == self.rev_srt: out += Color.grn 145 if i == self.fwd_rc_srt: out += Color.red 146 if i == self.rev_rc_srt: out += Color.b_red 147 if i == self.fwd_end: out += Color.end 148 if i == self.rev_end: out += Color.end 149 if i == self.fwd_rc_end: out += Color.end 150 if i == self.rev_rc_end: out += Color.end 151 out += nuc 152 # Summary of found positions # 153 summary = f""" 154 Forward start: {self.fwd_srt} 155 Forward end: {self.fwd_end} 156 Reverse start: {self.rev_srt} 157 Reverse end: {self.rev_end} 158 Forward revcompl start: {self.fwd_rc_srt} 159 Forward revcompl end: {self.fwd_rc_end} 160 Reverse revcompl start: {self.rev_rc_srt} 161 Reverse revcompl end: {self.rev_rc_end} 162 """ 163 # Return # 164 return summary + out + '\n'
26iupac_pattern = lambda seq: ''.join(['[' + iupac[char] + ']' for char in seq])
29class TwoPrimers: 30 """ 31 A container for the two primers of a sample. 32 Has methods for generating regexes to search for these primers. 33 """ 34 35 def __len__(self): return 2 36 37 def __init__(self, fwd_str, rev_str): 38 from Bio.Seq import Seq 39 # Original strings # 40 self.fwd_str = fwd_str 41 self.rev_str = rev_str 42 # Lengths in base pairs # 43 self.fwd_len = len(self.fwd_str) 44 self.rev_len = len(self.rev_str) 45 # Sequences as biopython objects # 46 self.fwd_seq = Seq(self.fwd_str) 47 self.rev_seq = Seq(self.rev_str) 48 # Create search patterns in regex syntax # 49 self.fwd_pat = iupac_pattern(self.fwd_seq) 50 self.rev_pat = iupac_pattern(self.rev_seq) 51 # Reverse complemented sequences # 52 self.fwd_revcomp = self.fwd_seq.reverse_complement() 53 self.rev_revcomp = self.rev_seq.reverse_complement() 54 # Search patterns when reverse complemented # 55 self.fwd_pat_revcomp = iupac_pattern(self.fwd_revcomp) 56 self.rev_pat_revcomp = iupac_pattern(self.rev_revcomp) 57 # Simple search expression (without any mismatches authorized yet) # 58 self.fwd_search = re.compile(self.fwd_pat) 59 self.rev_search = re.compile(self.rev_pat) 60 61 def make_regex(self, pat, mismatches): 62 """Complex search expression with mismatches this time.""" 63 import regex 64 return regex.compile("(%s){s<=%i}" % (pat, mismatches)) 65 66 def make_fwd_regex(self, mismatches): 67 return self.make_regex(self.fwd_pat, mismatches) 68 69 def make_rev_regex(self, mismatches): 70 return self.make_regex(self.rev_pat, mismatches) 71 72 def make_fwd_revcompl_regex(self, mismatches): 73 return self.make_regex(self.fwd_pat_revcomp, mismatches) 74 75 def make_rev_revcompl_regex(self, mismatches): 76 return self.make_regex(self.rev_pat_revcomp, mismatches)
A container for the two primers of a sample. Has methods for generating regexes to search for these primers.
37 def __init__(self, fwd_str, rev_str): 38 from Bio.Seq import Seq 39 # Original strings # 40 self.fwd_str = fwd_str 41 self.rev_str = rev_str 42 # Lengths in base pairs # 43 self.fwd_len = len(self.fwd_str) 44 self.rev_len = len(self.rev_str) 45 # Sequences as biopython objects # 46 self.fwd_seq = Seq(self.fwd_str) 47 self.rev_seq = Seq(self.rev_str) 48 # Create search patterns in regex syntax # 49 self.fwd_pat = iupac_pattern(self.fwd_seq) 50 self.rev_pat = iupac_pattern(self.rev_seq) 51 # Reverse complemented sequences # 52 self.fwd_revcomp = self.fwd_seq.reverse_complement() 53 self.rev_revcomp = self.rev_seq.reverse_complement() 54 # Search patterns when reverse complemented # 55 self.fwd_pat_revcomp = iupac_pattern(self.fwd_revcomp) 56 self.rev_pat_revcomp = iupac_pattern(self.rev_revcomp) 57 # Simple search expression (without any mismatches authorized yet) # 58 self.fwd_search = re.compile(self.fwd_pat) 59 self.rev_search = re.compile(self.rev_pat)
61 def make_regex(self, pat, mismatches): 62 """Complex search expression with mismatches this time.""" 63 import regex 64 return regex.compile("(%s){s<=%i}" % (pat, mismatches))
Complex search expression with mismatches this time.
79class PrimersRegexes: 80 """ 81 A container for the regular expression search patterns 82 that enable us to find primers inside a sequence. 83 These regexes depend on the number of mismatches authorized. 84 """ 85 86 def __init__(self, primers, mismatches): 87 """ 88 We need to know the primers and the number of mismatches tolerated 89 in the search. 90 """ 91 # Base attributes # 92 self.primers = primers 93 self.mismatches = mismatches 94 # Search patterns # 95 self.fwd = primers.make_fwd_regex(mismatches) 96 self.rev = primers.make_rev_regex(mismatches) 97 # Search patterns reverse complemented # 98 self.fwd_rc = primers.make_fwd_revcompl_regex(mismatches) 99 self.rev_rc = primers.make_rev_revcompl_regex(mismatches)
A container for the regular expression search patterns that enable us to find primers inside a sequence. These regexes depend on the number of mismatches authorized.
86 def __init__(self, primers, mismatches): 87 """ 88 We need to know the primers and the number of mismatches tolerated 89 in the search. 90 """ 91 # Base attributes # 92 self.primers = primers 93 self.mismatches = mismatches 94 # Search patterns # 95 self.fwd = primers.make_fwd_regex(mismatches) 96 self.rev = primers.make_rev_regex(mismatches) 97 # Search patterns reverse complemented # 98 self.fwd_rc = primers.make_fwd_revcompl_regex(mismatches) 99 self.rev_rc = primers.make_rev_revcompl_regex(mismatches)
We need to know the primers and the number of mismatches tolerated in the search.
102class ReadWithPrimers: 103 def __init__(self, read, regexes): 104 """ 105 Uses regex patterns to search the given read. 106 Records the start and end positions of primers if they are found. 107 Both the forward and reverse primers are searched for. 108 Both the original sequences and their reverse complements are 109 searched for, in case the read is in the opposite direction. 110 """ 111 # The read itself # 112 self.read = read 113 # The sequence as a string # 114 self.seq = str(read.seq) 115 # Searches # 116 self.fwd = regexes.fwd.search(self.seq) 117 self.rev = regexes.rev.search(self.seq) 118 self.fwd_rc = regexes.fwd_rc.search(self.seq) 119 self.rev_rc = regexes.rev_rc.search(self.seq) 120 # Positions found in standard search # 121 self.fwd_srt = self.fwd.start() if self.fwd else None 122 self.fwd_end = self.fwd.end() if self.fwd else None 123 self.rev_srt = self.rev.start() if self.rev else None 124 self.rev_end = self.rev.end() if self.rev else None 125 # Positions found in reverse complement search # 126 self.fwd_rc_srt = self.fwd_rc.start() if self.fwd_rc else None 127 self.fwd_rc_end = self.fwd_rc.end() if self.fwd_rc else None 128 self.rev_rc_srt = self.rev_rc.start() if self.rev_rc else None 129 self.rev_rc_end = self.rev_rc.end() if self.rev_rc else None 130 131 @property 132 def pretty_visualization(self): 133 """ 134 This property is useful for debugging. 135 It will return a nicely formatted string showing the original read 136 with all primers found highlighted with bash color codes. 137 """ 138 # Make a copy of the read for convenience # 139 seq = self.seq 140 # Initialize output # 141 out = "" 142 # Iterate over every position in the original sequence # 143 for i, nuc in enumerate(seq): 144 if i == self.fwd_srt: out += Color.b_grn 145 if i == self.rev_srt: out += Color.grn 146 if i == self.fwd_rc_srt: out += Color.red 147 if i == self.rev_rc_srt: out += Color.b_red 148 if i == self.fwd_end: out += Color.end 149 if i == self.rev_end: out += Color.end 150 if i == self.fwd_rc_end: out += Color.end 151 if i == self.rev_rc_end: out += Color.end 152 out += nuc 153 # Summary of found positions # 154 summary = f""" 155 Forward start: {self.fwd_srt} 156 Forward end: {self.fwd_end} 157 Reverse start: {self.rev_srt} 158 Reverse end: {self.rev_end} 159 Forward revcompl start: {self.fwd_rc_srt} 160 Forward revcompl end: {self.fwd_rc_end} 161 Reverse revcompl start: {self.rev_rc_srt} 162 Reverse revcompl end: {self.rev_rc_end} 163 """ 164 # Return # 165 return summary + out + '\n'
103 def __init__(self, read, regexes): 104 """ 105 Uses regex patterns to search the given read. 106 Records the start and end positions of primers if they are found. 107 Both the forward and reverse primers are searched for. 108 Both the original sequences and their reverse complements are 109 searched for, in case the read is in the opposite direction. 110 """ 111 # The read itself # 112 self.read = read 113 # The sequence as a string # 114 self.seq = str(read.seq) 115 # Searches # 116 self.fwd = regexes.fwd.search(self.seq) 117 self.rev = regexes.rev.search(self.seq) 118 self.fwd_rc = regexes.fwd_rc.search(self.seq) 119 self.rev_rc = regexes.rev_rc.search(self.seq) 120 # Positions found in standard search # 121 self.fwd_srt = self.fwd.start() if self.fwd else None 122 self.fwd_end = self.fwd.end() if self.fwd else None 123 self.rev_srt = self.rev.start() if self.rev else None 124 self.rev_end = self.rev.end() if self.rev else None 125 # Positions found in reverse complement search # 126 self.fwd_rc_srt = self.fwd_rc.start() if self.fwd_rc else None 127 self.fwd_rc_end = self.fwd_rc.end() if self.fwd_rc else None 128 self.rev_rc_srt = self.rev_rc.start() if self.rev_rc else None 129 self.rev_rc_end = self.rev_rc.end() if self.rev_rc else None
Uses regex patterns to search the given read. Records the start and end positions of primers if they are found. Both the forward and reverse primers are searched for. Both the original sequences and their reverse complements are searched for, in case the read is in the opposite direction.