fasta.indexed

Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio

 1#!/usr/bin/env python3
 2# -*- coding: utf-8 -*-
 3
 4"""
 5Written by Lucas Sinclair.
 6MIT Licensed.
 7Contact at www.sinclair.bio
 8"""
 9
10# Built-in modules #
11
12# Internal modules #
13from fasta import FASTA
14from plumbing.databases import convert_to_sql
15from plumbing.databases.sqlite_database import SQLiteDatabase
16from plumbing.common import GenWithLength
17
18# Third party modules #
19from tqdm import tqdm
20
21# Constants #
22base_keys = ('id', 'description', 'seq')
23
24###############################################################################
25class DatabaseFASTA(SQLiteDatabase):
26
27    def __init__(self, path=None):
28        from Bio.SeqRecord import SeqRecord
29        from Bio.Seq import Seq
30        self.path = path
31        self.factory = lambda cursor, row: SeqRecord(Seq(row[2]), id=row[0], description=row[1])
32
33    def parse(self):
34        pass
35
36###############################################################################
37def generate_values(path, progress=False):
38    from Bio import SeqIO
39    seqs = SeqIO.parse(path, 'fasta')
40    if not progress:
41        for seq in seqs: yield seq.id, seq.description, str(seq.seq)
42    if progress:
43        for seq in tqdm(GenWithLength(seqs, len(FASTA(path)))):
44            yield seq.id, seq.description, str(seq.seq)
45
46###############################################################################
47def fasta_to_sql(source, dest):
48    values = generate_values(source, progress=True)
49    convert_to_sql(dest, base_keys, values)
50    return DatabaseFASTA(dest)
base_keys = ('id', 'description', 'seq')
class DatabaseFASTA(plumbing.databases.sqlite_database.SQLiteDatabase):
26class DatabaseFASTA(SQLiteDatabase):
27
28    def __init__(self, path=None):
29        from Bio.SeqRecord import SeqRecord
30        from Bio.Seq import Seq
31        self.path = path
32        self.factory = lambda cursor, row: SeqRecord(Seq(row[2]), id=row[0], description=row[1])
33
34    def parse(self):
35        pass

A wrapper for an SQLite3 database.

DatabaseFASTA(path, *args, **kwargs)
59    def __new__(cls, path, *args, **kwargs):
60        """A Path object is in fact a string."""
61        return str.__new__(cls, cls.clean_path(path))

A Path object is in fact a string.

path
factory
def parse(self):
34    def parse(self):
35        pass
Inherited Members
plumbing.databases.sqlite_database.SQLiteDatabase
text_fact
isolation
retrieve
known_md5
prepared
connection
own_connection
cursor
own_cursor
tables
main_table
columns
first
last
df
new_connection
prepare
check_format
create
add_table
execute
get_columns_of_table
add
detailed_error
add_by_steps
count_entries
index
get_first
get_last
get_number
get
get_entry
vacuum
close
write_df
read_df
add_column
open
uniquify
get_and_order
import_table
autopaths.file_path.FilePath
prefix_path
prefix
name
filename
directory
count_bytes
count
size
contents
contents_utf8
md5
might_be_binary
contains_binary
magic_number
lines
read
touch
add_str
write
writelines
remove
copy
replace_extension
new_name_insert
make_directory
must_exist
head
pretty_head
tail
pretty_tail
move_to
rename
gzip_to
gzip_internal
gzip_external
gzip_pigz
ungzip_to
ungzip_internal
ungzip_external
zip_to
unzip_to
untar_to
untargz_to
untargz_to_internal
untargz_to_external
append
prepend
remove_line
remove_first_line
replace_line
replace_word
sed_replace
autopaths.base_path.BasePath
clean_path
short_prefix
extension
escaped
absolute_path
physical_path
relative_path
rel_path_from
exists
permissions
mdate
mdate_iso
cdate
cdate_iso
unix_style
wsl_style
win_style
with_tilda
with_home
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
expandtabs
find
partition
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
def generate_values(path, progress=False):
38def generate_values(path, progress=False):
39    from Bio import SeqIO
40    seqs = SeqIO.parse(path, 'fasta')
41    if not progress:
42        for seq in seqs: yield seq.id, seq.description, str(seq.seq)
43    if progress:
44        for seq in tqdm(GenWithLength(seqs, len(FASTA(path)))):
45            yield seq.id, seq.description, str(seq.seq)
def fasta_to_sql(source, dest):
48def fasta_to_sql(source, dest):
49    values = generate_values(source, progress=True)
50    convert_to_sql(dest, base_keys, values)
51    return DatabaseFASTA(dest)