fasta.indexed
Written by Lucas Sinclair. MIT Licensed. Contact at www.sinclair.bio
1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4""" 5Written by Lucas Sinclair. 6MIT Licensed. 7Contact at www.sinclair.bio 8""" 9 10# Built-in modules # 11 12# Internal modules # 13from fasta import FASTA 14from plumbing.databases import convert_to_sql 15from plumbing.databases.sqlite_database import SQLiteDatabase 16from plumbing.common import GenWithLength 17 18# Third party modules # 19from tqdm import tqdm 20 21# Constants # 22base_keys = ('id', 'description', 'seq') 23 24############################################################################### 25class DatabaseFASTA(SQLiteDatabase): 26 27 def __init__(self, path=None): 28 from Bio.SeqRecord import SeqRecord 29 from Bio.Seq import Seq 30 self.path = path 31 self.factory = lambda cursor, row: SeqRecord(Seq(row[2]), id=row[0], description=row[1]) 32 33 def parse(self): 34 pass 35 36############################################################################### 37def generate_values(path, progress=False): 38 from Bio import SeqIO 39 seqs = SeqIO.parse(path, 'fasta') 40 if not progress: 41 for seq in seqs: yield seq.id, seq.description, str(seq.seq) 42 if progress: 43 for seq in tqdm(GenWithLength(seqs, len(FASTA(path)))): 44 yield seq.id, seq.description, str(seq.seq) 45 46############################################################################### 47def fasta_to_sql(source, dest): 48 values = generate_values(source, progress=True) 49 convert_to_sql(dest, base_keys, values) 50 return DatabaseFASTA(dest)
base_keys =
('id', 'description', 'seq')
class
DatabaseFASTA(plumbing.databases.sqlite_database.SQLiteDatabase):
26class DatabaseFASTA(SQLiteDatabase): 27 28 def __init__(self, path=None): 29 from Bio.SeqRecord import SeqRecord 30 from Bio.Seq import Seq 31 self.path = path 32 self.factory = lambda cursor, row: SeqRecord(Seq(row[2]), id=row[0], description=row[1]) 33 34 def parse(self): 35 pass
A wrapper for an SQLite3 database.
DatabaseFASTA(path, *args, **kwargs)
59 def __new__(cls, path, *args, **kwargs): 60 """A Path object is in fact a string.""" 61 return str.__new__(cls, cls.clean_path(path))
A Path object is in fact a string.
Inherited Members
- plumbing.databases.sqlite_database.SQLiteDatabase
- text_fact
- isolation
- retrieve
- known_md5
- prepared
- connection
- own_connection
- cursor
- own_cursor
- tables
- main_table
- columns
- first
- last
- df
- new_connection
- prepare
- check_format
- create
- add_table
- execute
- get_columns_of_table
- add
- detailed_error
- add_by_steps
- count_entries
- index
- get_first
- get_last
- get_number
- get
- get_entry
- vacuum
- close
- write_df
- read_df
- add_column
- open
- uniquify
- get_and_order
- import_table
- autopaths.file_path.FilePath
- prefix_path
- prefix
- name
- filename
- directory
- count_bytes
- count
- size
- contents
- contents_utf8
- md5
- might_be_binary
- contains_binary
- magic_number
- lines
- read
- touch
- add_str
- write
- writelines
- remove
- copy
- replace_extension
- new_name_insert
- make_directory
- must_exist
- head
- pretty_head
- tail
- pretty_tail
- move_to
- rename
- gzip_to
- gzip_internal
- gzip_external
- gzip_pigz
- ungzip_to
- ungzip_internal
- ungzip_external
- zip_to
- unzip_to
- untar_to
- untargz_to
- untargz_to_internal
- untargz_to_external
- append
- prepend
- remove_line
- remove_first_line
- replace_line
- replace_word
- sed_replace
- autopaths.base_path.BasePath
- clean_path
- short_prefix
- extension
- escaped
- absolute_path
- physical_path
- relative_path
- rel_path_from
- exists
- is_symlink
- permissions
- mdate
- mdate_iso
- cdate
- cdate_iso
- unix_style
- wsl_style
- win_style
- with_tilda
- with_home
- link_from
- link_to
- symlinks_on_linux
- symlinks_on_windows
- hard_link_win_to
- builtins.str
- encode
- replace
- split
- rsplit
- join
- capitalize
- casefold
- title
- center
- expandtabs
- find
- partition
- ljust
- lower
- lstrip
- rfind
- rindex
- rjust
- rstrip
- rpartition
- splitlines
- strip
- swapcase
- translate
- upper
- startswith
- endswith
- removeprefix
- removesuffix
- isascii
- islower
- isupper
- istitle
- isspace
- isdecimal
- isdigit
- isnumeric
- isalpha
- isalnum
- isidentifier
- isprintable
- zfill
- format
- format_map
- maketrans
def
generate_values(path, progress=False):
38def generate_values(path, progress=False): 39 from Bio import SeqIO 40 seqs = SeqIO.parse(path, 'fasta') 41 if not progress: 42 for seq in seqs: yield seq.id, seq.description, str(seq.seq) 43 if progress: 44 for seq in tqdm(GenWithLength(seqs, len(FASTA(path)))): 45 yield seq.id, seq.description, str(seq.seq)
def
fasta_to_sql(source, dest):