Source code for spirepy.study
import os
import os.path as path
import tarfile
import tempfile
import urllib
import polars as pl
from spirepy.data import genome_metadata
[docs]
class Study:
"""
A study from SPIRE.
This class represents a study from the SPIRE database. It automatically
fetches metadata and automates the initialization of samples to further use
to obtain its genomic, geographical or other types of data provided by it.
:param name: Internal ID for the study.
:type name: str
"""
def __init__(self, name: str):
"""Constructor method."""
self._metadata = None
self._samples = None
self._mags = None
[docs]
def get_samples(self) -> list:
"""Retrive a list of samples for the study.
:return: List of :class:`spirepy.sample.Sample` that belong to the study.
:rtype: list
"""
from spirepy.sample import Sample
if self._samples is None:
sample_list = []
for s in self.get_metadata()["sample_id"].to_list():
sample = Sample(s, self)
sample_list.append(sample)
self._samples = sample_list
return self._samples
[docs]
def get_mags(self) -> pl.DataFrame:
"""Get a DataFrame with information regarding the MAGs.
:return: A Dataframe with the study's MAGs.
:rtype: :class:`polars.DataFrame`
"""
if self._mags is None:
genomes = genome_metadata()
self._mags = genomes.filter(
pl.col("derived_from_sample").is_in(
self.get_metadata()["sample_id"].to_list()
)
)
return self._mags
[docs]
def download_assemblies(self, output: str):
"""Download the assemblies into a specified folder.
:param output: Output folder to download the assemblies to.
:type output: str
"""
with tempfile.TemporaryDirectory() as tmpdir:
tarfpath = path.join(tmpdir, f"{self.name}_assemblies.tar")
urllib.request.urlretrieve(
f"https://swifter.embl.de/~fullam/spire/compiled/{self.name}_spire_v1_assemblies.tar",
tarfpath,
)
os.makedirs(output, exist_ok=True)
with tarfile.open(tarfpath) as tar:
tar.extractall(path.join(output, "assemblies"))
[docs]
def download_mags(self, output: str):
"""Download the MAGs into a specified folder.
:param output: Output folder to download the MAGs to.
:type output: str
"""
with tempfile.TemporaryDirectory() as tmpdir:
tarfpath = path.join(tmpdir, f"{self.name}_mags.tar")
urllib.request.urlretrieve(
f"https://swifter.embl.de/~fullam/spire/compiled/{self.name}_spire_v1_MAGs.tar",
tarfpath,
)
os.makedirs(output, exist_ok=True)
with tarfile.open(tarfpath) as tar:
tar.extractall(path.join(output, "mags"))
[docs]
def download_genecalls(self, output: str):
"""Download the genecalls into a specified folder.
:param output: Output folder to download the genecalls to.
:type output: str
"""
with tempfile.TemporaryDirectory() as tmpdir:
tarfpath = path.join(tmpdir, f"{self.name}_genecalls.tar")
urllib.request.urlretrieve(
f"https://swifter.embl.de/~fullam/spire/genes_per_study/{self.name}_spire_v1_genecalls_fna.tar",
tarfpath,
)
os.makedirs(output, exist_ok=True)
with tarfile.open(tarfpath) as tar:
tar.extractall(path.join(output, "genecalls"))
[docs]
def download_proteins(self, output: str):
"""Download the proteins into a specified folder.
:param output: Output folder to download the proteins to.
:type output: str
"""
with tempfile.TemporaryDirectory() as tmpdir:
tarfpath = path.join(tmpdir, f"{self.name}_genecalls.tar")
urllib.request.urlretrieve(
f"https://swifter.embl.de/~fullam/spire/genes_per_study/{self.name}_spire_v1_proteins_faa.tar",
tarfpath,
)
os.makedirs(output, exist_ok=True)
with tarfile.open(tarfpath) as tar:
tar.extractall(path.join(output, "proteins"))