fasta.primers

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3
  4"""
  5Written by Lucas Sinclair.
  6MIT Licensed.
  7Contact at www.sinclair.bio
  8"""
  9
 10# Built-in modules #
 11import re
 12
 13# Internal modules #
 14from plumbing.color  import Color
 15
 16# Third party modules #
 17
 18# Constants #
 19iupac = {'A':'A',    'G':'G',   'T':'T',   'C':'C',
 20         'M':'AC',   'R':'AG',  'W':'AT',  'S':'CG',   'Y':'CT',   'K':'GT',
 21         'V':'ACG',  'H':'ACT', 'D':'AGT', 'B':'CGT',
 22         'X':'ACGT', 'N':'ACGT'}
 23
 24# Function to create a regex pattern from a sequence #
 25iupac_pattern = lambda seq: ''.join(['[' + iupac[char] + ']' for char in seq])
 26
 27###############################################################################
 28class TwoPrimers:
 29    """
 30    A container for the two primers of a sample.
 31    Has methods for generating regexes to search for these primers.
 32    """
 33
 34    def __len__(self): return 2
 35
 36    def __init__(self, fwd_str, rev_str):
 37        from Bio.Seq import Seq
 38        # Original strings #
 39        self.fwd_str = fwd_str
 40        self.rev_str = rev_str
 41        # Lengths in base pairs #
 42        self.fwd_len = len(self.fwd_str)
 43        self.rev_len = len(self.rev_str)
 44        # Sequences as biopython objects #
 45        self.fwd_seq = Seq(self.fwd_str)
 46        self.rev_seq = Seq(self.rev_str)
 47        # Create search patterns in regex syntax #
 48        self.fwd_pat = iupac_pattern(self.fwd_seq)
 49        self.rev_pat = iupac_pattern(self.rev_seq)
 50        # Reverse complemented sequences #
 51        self.fwd_revcomp = self.fwd_seq.reverse_complement()
 52        self.rev_revcomp = self.rev_seq.reverse_complement()
 53        # Search patterns when reverse complemented #
 54        self.fwd_pat_revcomp = iupac_pattern(self.fwd_revcomp)
 55        self.rev_pat_revcomp = iupac_pattern(self.rev_revcomp)
 56        # Simple search expression (without any mismatches authorized yet) #
 57        self.fwd_search = re.compile(self.fwd_pat)
 58        self.rev_search = re.compile(self.rev_pat)
 59
 60    def make_regex(self, pat, mismatches):
 61        """Complex search expression with mismatches this time."""
 62        import regex
 63        return regex.compile("(%s){s<=%i}" % (pat, mismatches))
 64
 65    def make_fwd_regex(self, mismatches):
 66        return self.make_regex(self.fwd_pat, mismatches)
 67
 68    def make_rev_regex(self, mismatches):
 69        return self.make_regex(self.rev_pat, mismatches)
 70
 71    def make_fwd_revcompl_regex(self, mismatches):
 72        return self.make_regex(self.fwd_pat_revcomp, mismatches)
 73
 74    def make_rev_revcompl_regex(self, mismatches):
 75        return self.make_regex(self.rev_pat_revcomp, mismatches)
 76
 77###############################################################################
 78class PrimersRegexes:
 79    """
 80    A container for the regular expression search patterns
 81    that enable us to find primers inside a sequence.
 82    These regexes depend on the number of mismatches authorized.
 83    """
 84
 85    def __init__(self, primers, mismatches):
 86        """
 87        We need to know the primers and the number of mismatches tolerated
 88        in the search.
 89        """
 90        # Base attributes #
 91        self.primers    = primers
 92        self.mismatches = mismatches
 93        # Search patterns #
 94        self.fwd    = primers.make_fwd_regex(mismatches)
 95        self.rev    = primers.make_rev_regex(mismatches)
 96        # Search patterns reverse complemented #
 97        self.fwd_rc = primers.make_fwd_revcompl_regex(mismatches)
 98        self.rev_rc = primers.make_rev_revcompl_regex(mismatches)
 99
100###############################################################################
101class ReadWithPrimers:
102    def __init__(self, read, regexes):
103        """
104        Uses regex patterns to search the given read.
105        Records the start and end positions of primers if they are found.
106        Both the forward and reverse primers are searched for.
107        Both the original sequences and their reverse complements are
108        searched for, in case the read is in the opposite direction.
109        """
110        # The read itself #
111        self.read = read
112        # The sequence as a string #
113        self.seq = str(read.seq)
114        # Searches #
115        self.fwd    = regexes.fwd.search(self.seq)
116        self.rev    = regexes.rev.search(self.seq)
117        self.fwd_rc = regexes.fwd_rc.search(self.seq)
118        self.rev_rc = regexes.rev_rc.search(self.seq)
119        # Positions found in standard search #
120        self.fwd_srt = self.fwd.start() if self.fwd else None
121        self.fwd_end = self.fwd.end()   if self.fwd else None
122        self.rev_srt = self.rev.start() if self.rev else None
123        self.rev_end = self.rev.end()   if self.rev else None
124        # Positions found in reverse complement search #
125        self.fwd_rc_srt = self.fwd_rc.start() if self.fwd_rc else None
126        self.fwd_rc_end = self.fwd_rc.end()   if self.fwd_rc else None
127        self.rev_rc_srt = self.rev_rc.start() if self.rev_rc else None
128        self.rev_rc_end = self.rev_rc.end()   if self.rev_rc else None
129
130    @property
131    def pretty_visualization(self):
132        """
133        This property is useful for debugging.
134        It will return a nicely formatted string showing the original read
135        with all primers found highlighted with bash color codes.
136        """
137        # Make a copy of the read for convenience #
138        seq = self.seq
139        # Initialize output #
140        out = ""
141        # Iterate over every position in the original sequence #
142        for i, nuc in enumerate(seq):
143            if i == self.fwd_srt:    out += Color.b_grn
144            if i == self.rev_srt:    out += Color.grn
145            if i == self.fwd_rc_srt: out += Color.red
146            if i == self.rev_rc_srt: out += Color.b_red
147            if i == self.fwd_end:    out += Color.end
148            if i == self.rev_end:    out += Color.end
149            if i == self.fwd_rc_end: out += Color.end
150            if i == self.rev_rc_end: out += Color.end
151            out += nuc
152        # Summary of found positions #
153        summary = f"""
154        Forward start:           {self.fwd_srt}
155        Forward end:             {self.fwd_end}
156        Reverse start:           {self.rev_srt}
157        Reverse end:             {self.rev_end}
158        Forward revcompl start:  {self.fwd_rc_srt}
159        Forward revcompl end:    {self.fwd_rc_end}
160        Reverse revcompl start:  {self.rev_rc_srt}
161        Reverse revcompl end:    {self.rev_rc_end}
162        """
163        # Return #
164        return summary + out + '\n'
iupac = {'A': 'A', 'G': 'G', 'T': 'T', 'C': 'C', 'M': 'AC', 'R': 'AG', 'W': 'AT', 'S': 'CG', 'Y': 'CT', 'K': 'GT', 'V': 'ACG', 'H': 'ACT', 'D': 'AGT', 'B': 'CGT', 'X': 'ACGT', 'N': 'ACGT'}
def iupac_pattern(seq):
26iupac_pattern = lambda seq: ''.join(['[' + iupac[char] + ']' for char in seq])
class TwoPrimers:
29class TwoPrimers:
30    """
31    A container for the two primers of a sample.
32    Has methods for generating regexes to search for these primers.
33    """
34
35    def __len__(self): return 2
36
37    def __init__(self, fwd_str, rev_str):
38        from Bio.Seq import Seq
39        # Original strings #
40        self.fwd_str = fwd_str
41        self.rev_str = rev_str
42        # Lengths in base pairs #
43        self.fwd_len = len(self.fwd_str)
44        self.rev_len = len(self.rev_str)
45        # Sequences as biopython objects #
46        self.fwd_seq = Seq(self.fwd_str)
47        self.rev_seq = Seq(self.rev_str)
48        # Create search patterns in regex syntax #
49        self.fwd_pat = iupac_pattern(self.fwd_seq)
50        self.rev_pat = iupac_pattern(self.rev_seq)
51        # Reverse complemented sequences #
52        self.fwd_revcomp = self.fwd_seq.reverse_complement()
53        self.rev_revcomp = self.rev_seq.reverse_complement()
54        # Search patterns when reverse complemented #
55        self.fwd_pat_revcomp = iupac_pattern(self.fwd_revcomp)
56        self.rev_pat_revcomp = iupac_pattern(self.rev_revcomp)
57        # Simple search expression (without any mismatches authorized yet) #
58        self.fwd_search = re.compile(self.fwd_pat)
59        self.rev_search = re.compile(self.rev_pat)
60
61    def make_regex(self, pat, mismatches):
62        """Complex search expression with mismatches this time."""
63        import regex
64        return regex.compile("(%s){s<=%i}" % (pat, mismatches))
65
66    def make_fwd_regex(self, mismatches):
67        return self.make_regex(self.fwd_pat, mismatches)
68
69    def make_rev_regex(self, mismatches):
70        return self.make_regex(self.rev_pat, mismatches)
71
72    def make_fwd_revcompl_regex(self, mismatches):
73        return self.make_regex(self.fwd_pat_revcomp, mismatches)
74
75    def make_rev_revcompl_regex(self, mismatches):
76        return self.make_regex(self.rev_pat_revcomp, mismatches)

A container for the two primers of a sample. Has methods for generating regexes to search for these primers.

TwoPrimers(fwd_str, rev_str)
37    def __init__(self, fwd_str, rev_str):
38        from Bio.Seq import Seq
39        # Original strings #
40        self.fwd_str = fwd_str
41        self.rev_str = rev_str
42        # Lengths in base pairs #
43        self.fwd_len = len(self.fwd_str)
44        self.rev_len = len(self.rev_str)
45        # Sequences as biopython objects #
46        self.fwd_seq = Seq(self.fwd_str)
47        self.rev_seq = Seq(self.rev_str)
48        # Create search patterns in regex syntax #
49        self.fwd_pat = iupac_pattern(self.fwd_seq)
50        self.rev_pat = iupac_pattern(self.rev_seq)
51        # Reverse complemented sequences #
52        self.fwd_revcomp = self.fwd_seq.reverse_complement()
53        self.rev_revcomp = self.rev_seq.reverse_complement()
54        # Search patterns when reverse complemented #
55        self.fwd_pat_revcomp = iupac_pattern(self.fwd_revcomp)
56        self.rev_pat_revcomp = iupac_pattern(self.rev_revcomp)
57        # Simple search expression (without any mismatches authorized yet) #
58        self.fwd_search = re.compile(self.fwd_pat)
59        self.rev_search = re.compile(self.rev_pat)
fwd_str
rev_str
fwd_len
rev_len
fwd_seq
rev_seq
fwd_pat
rev_pat
fwd_revcomp
rev_revcomp
fwd_pat_revcomp
rev_pat_revcomp
def make_regex(self, pat, mismatches):
61    def make_regex(self, pat, mismatches):
62        """Complex search expression with mismatches this time."""
63        import regex
64        return regex.compile("(%s){s<=%i}" % (pat, mismatches))

Complex search expression with mismatches this time.

def make_fwd_regex(self, mismatches):
66    def make_fwd_regex(self, mismatches):
67        return self.make_regex(self.fwd_pat, mismatches)
def make_rev_regex(self, mismatches):
69    def make_rev_regex(self, mismatches):
70        return self.make_regex(self.rev_pat, mismatches)
def make_fwd_revcompl_regex(self, mismatches):
72    def make_fwd_revcompl_regex(self, mismatches):
73        return self.make_regex(self.fwd_pat_revcomp, mismatches)
def make_rev_revcompl_regex(self, mismatches):
75    def make_rev_revcompl_regex(self, mismatches):
76        return self.make_regex(self.rev_pat_revcomp, mismatches)
class PrimersRegexes:
79class PrimersRegexes:
80    """
81    A container for the regular expression search patterns
82    that enable us to find primers inside a sequence.
83    These regexes depend on the number of mismatches authorized.
84    """
85
86    def __init__(self, primers, mismatches):
87        """
88        We need to know the primers and the number of mismatches tolerated
89        in the search.
90        """
91        # Base attributes #
92        self.primers    = primers
93        self.mismatches = mismatches
94        # Search patterns #
95        self.fwd    = primers.make_fwd_regex(mismatches)
96        self.rev    = primers.make_rev_regex(mismatches)
97        # Search patterns reverse complemented #
98        self.fwd_rc = primers.make_fwd_revcompl_regex(mismatches)
99        self.rev_rc = primers.make_rev_revcompl_regex(mismatches)

A container for the regular expression search patterns that enable us to find primers inside a sequence. These regexes depend on the number of mismatches authorized.

PrimersRegexes(primers, mismatches)
86    def __init__(self, primers, mismatches):
87        """
88        We need to know the primers and the number of mismatches tolerated
89        in the search.
90        """
91        # Base attributes #
92        self.primers    = primers
93        self.mismatches = mismatches
94        # Search patterns #
95        self.fwd    = primers.make_fwd_regex(mismatches)
96        self.rev    = primers.make_rev_regex(mismatches)
97        # Search patterns reverse complemented #
98        self.fwd_rc = primers.make_fwd_revcompl_regex(mismatches)
99        self.rev_rc = primers.make_rev_revcompl_regex(mismatches)

We need to know the primers and the number of mismatches tolerated in the search.

primers
mismatches
fwd
rev
fwd_rc
rev_rc
class ReadWithPrimers:
102class ReadWithPrimers:
103    def __init__(self, read, regexes):
104        """
105        Uses regex patterns to search the given read.
106        Records the start and end positions of primers if they are found.
107        Both the forward and reverse primers are searched for.
108        Both the original sequences and their reverse complements are
109        searched for, in case the read is in the opposite direction.
110        """
111        # The read itself #
112        self.read = read
113        # The sequence as a string #
114        self.seq = str(read.seq)
115        # Searches #
116        self.fwd    = regexes.fwd.search(self.seq)
117        self.rev    = regexes.rev.search(self.seq)
118        self.fwd_rc = regexes.fwd_rc.search(self.seq)
119        self.rev_rc = regexes.rev_rc.search(self.seq)
120        # Positions found in standard search #
121        self.fwd_srt = self.fwd.start() if self.fwd else None
122        self.fwd_end = self.fwd.end()   if self.fwd else None
123        self.rev_srt = self.rev.start() if self.rev else None
124        self.rev_end = self.rev.end()   if self.rev else None
125        # Positions found in reverse complement search #
126        self.fwd_rc_srt = self.fwd_rc.start() if self.fwd_rc else None
127        self.fwd_rc_end = self.fwd_rc.end()   if self.fwd_rc else None
128        self.rev_rc_srt = self.rev_rc.start() if self.rev_rc else None
129        self.rev_rc_end = self.rev_rc.end()   if self.rev_rc else None
130
131    @property
132    def pretty_visualization(self):
133        """
134        This property is useful for debugging.
135        It will return a nicely formatted string showing the original read
136        with all primers found highlighted with bash color codes.
137        """
138        # Make a copy of the read for convenience #
139        seq = self.seq
140        # Initialize output #
141        out = ""
142        # Iterate over every position in the original sequence #
143        for i, nuc in enumerate(seq):
144            if i == self.fwd_srt:    out += Color.b_grn
145            if i == self.rev_srt:    out += Color.grn
146            if i == self.fwd_rc_srt: out += Color.red
147            if i == self.rev_rc_srt: out += Color.b_red
148            if i == self.fwd_end:    out += Color.end
149            if i == self.rev_end:    out += Color.end
150            if i == self.fwd_rc_end: out += Color.end
151            if i == self.rev_rc_end: out += Color.end
152            out += nuc
153        # Summary of found positions #
154        summary = f"""
155        Forward start:           {self.fwd_srt}
156        Forward end:             {self.fwd_end}
157        Reverse start:           {self.rev_srt}
158        Reverse end:             {self.rev_end}
159        Forward revcompl start:  {self.fwd_rc_srt}
160        Forward revcompl end:    {self.fwd_rc_end}
161        Reverse revcompl start:  {self.rev_rc_srt}
162        Reverse revcompl end:    {self.rev_rc_end}
163        """
164        # Return #
165        return summary + out + '\n'
ReadWithPrimers(read, regexes)
103    def __init__(self, read, regexes):
104        """
105        Uses regex patterns to search the given read.
106        Records the start and end positions of primers if they are found.
107        Both the forward and reverse primers are searched for.
108        Both the original sequences and their reverse complements are
109        searched for, in case the read is in the opposite direction.
110        """
111        # The read itself #
112        self.read = read
113        # The sequence as a string #
114        self.seq = str(read.seq)
115        # Searches #
116        self.fwd    = regexes.fwd.search(self.seq)
117        self.rev    = regexes.rev.search(self.seq)
118        self.fwd_rc = regexes.fwd_rc.search(self.seq)
119        self.rev_rc = regexes.rev_rc.search(self.seq)
120        # Positions found in standard search #
121        self.fwd_srt = self.fwd.start() if self.fwd else None
122        self.fwd_end = self.fwd.end()   if self.fwd else None
123        self.rev_srt = self.rev.start() if self.rev else None
124        self.rev_end = self.rev.end()   if self.rev else None
125        # Positions found in reverse complement search #
126        self.fwd_rc_srt = self.fwd_rc.start() if self.fwd_rc else None
127        self.fwd_rc_end = self.fwd_rc.end()   if self.fwd_rc else None
128        self.rev_rc_srt = self.rev_rc.start() if self.rev_rc else None
129        self.rev_rc_end = self.rev_rc.end()   if self.rev_rc else None

Uses regex patterns to search the given read. Records the start and end positions of primers if they are found. Both the forward and reverse primers are searched for. Both the original sequences and their reverse complements are searched for, in case the read is in the opposite direction.

read
seq
fwd
rev
fwd_rc
rev_rc
fwd_srt
fwd_end
rev_srt
rev_end
fwd_rc_srt
fwd_rc_end
rev_rc_srt
rev_rc_end
pretty_visualization

This property is useful for debugging. It will return a nicely formatted string showing the original read with all primers found highlighted with bash color codes.