from __future__ import annotations
from isogroup.base.feature import Feature
from isocor.base import LabelledChemical
# from isogroup.base.misc import Misc
import pandas as pd
[docs]
class Database:
"""
Represents a database of theoretical features for a specific tracer.
"""
def __init__(self, dataset: pd.DataFrame, tracer: str, tracer_element: str):
"""
:param dataset: DataFrame containing theoretical features with columns retention time (RT), metabolite names, and formulas.
:param tracer: Tracer code (e.g. "13C") used to initialize the database.
:param tracer_element: Tracer element (e.g. "C") used.
"""
self.dataset = dataset
self.theoretical_features = []
self.tracer = tracer
self._tracer_element = tracer_element
# self._tracer_element, self._tracer_idx = Misc._parse_strtracer(tracer)
self.clusters = []
_isodata: dict = LabelledChemical.DEFAULT_ISODATA
self._delta_mz_tracer: float = _isodata[self._tracer_element]["mass"][1] - _isodata[
self._tracer_element]["mass"][0]
self._delta_mz_hydrogen: float = _isodata["H"]["mass"][0]
self.initialize_theoretical_features()
self.theoretical_database_df = None
self.theoretical_database()
# self.export_database(filename="isotopic_db_export.tsv")
def __len__(self) -> int:
return len(self.dataset)
[docs]
def initialize_theoretical_features(self):
"""
Creates chemical labelled objects from the dataset and initializes theoretical features.
For each chemical, it generates features with isotopologues based on the tracer.
"""
for _, line in self.dataset.iterrows():
chemical = LabelledChemical(
formula=line["formula"],
tracer=self.tracer,
derivative_formula="",
tracer_purity=[1.0, 0.0],
correct_NA_tracer=False,
data_isotopes=None,
charge=line["charge"],
label=line["metabolite"],
)
for isotopologue in range(chemical.formula[self._tracer_element] + 1):
mz = (chemical.molecular_weight + isotopologue * self._delta_mz_tracer
+ line["charge"] * self._delta_mz_hydrogen)
feature = Feature(
rt=line["rt"],
mz=mz,
tracer=self.tracer,
intensity=None,
chemical=[chemical],
# isotopologue=[isotopologue],
cluster_isotopologue={chemical.label: isotopologue},
metabolite=[chemical.label],
formula = line["formula"],
)
self.theoretical_features.append(feature)
[docs]
def theoretical_database(self):
"""
Summarize theoretical features into a DataFrame and export it to a tsv file.
"""
feature_data = {
"mz": [],
"rt": [],
"metabolite": [],
"isotopologue": [],
"formula": []
}
for feature in self.theoretical_features:
feature_data["mz"].append(feature.mz)
feature_data["rt"].append(feature.rt)
feature_data["metabolite"].append(', '.join(feature.metabolite))
# feature_data["isotopologue"].append(', '.join(map(str, feature.isotopologue)))
for metabolite in feature.metabolite:
feature_data["isotopologue"].append(feature.cluster_isotopologue[metabolite])
feature_data["formula"].append(feature.formula)
self.theoretical_database_df = pd.DataFrame(feature_data)
# def export_database(self, filename = None):
# """
# Summarize theoretical features into a DataFrame and optionally export it to a tsv file.
# :param filename: Name of the file to export the summary to
# :return: pd.DataFrame with the summary of the theoretical features
# """
# # Create a DataFrame to summarize the theoretical features
# feature_data = []
# for feature in self.features:
# feature_data.append({
# "mz": feature.mz,
# "rt": feature.rt,
# "metabolite": ', '.join(feature.metabolite),
# "isotopologue": ', '.join(map(str, feature.isotopologue)),
# "formula": feature.formula,
# })
# df = pd.DataFrame(feature_data)
# # Export the DataFrame to a tsv file if a filename is provided
# if filename:
# df.to_csv(filename, sep="\t", index=False)
# return df
# if __name__ == "__main__":
# from isogroup.base.io import IoHandler
# from pathlib import Path
# io= IoHandler()
# database_df= io.read_database(Path(r"..\..\data\database.csv"))
# test_db = Database(dataset=database_df, tracer="13C", tracer_element="C")
# test_db.initialize_theoretical_features()
# for feature in test_db.theoretical_features:
# print(feature.metabolite)
# print(feature.cluster_isotopologue)