Source code for isogroup.base.experiment

from isogroup.base.feature import Feature
from isogroup.base.misc import Misc
import pandas as pd
import logging

logger = logging.getLogger(f"IsoGroup")

[docs] class Experiment: """ Represents a mass spectrometry experiment with experimental features. """ def __init__(self, dataset : pd.DataFrame, tracer:str, ppm_tol:float, rt_tol:float, max_atoms:int=None, database:pd.DataFrame=None): """ :param dataset: DataFrame containing experimental data with columns for m/z, retention time (RT), feature ID, and sample intensities. :param tracer: Tracer code used in the experiment (e.g. "13C"). :param ppm_tol: m/z tolerance (in ppm). :param rt_tol: Retention time tolerance (in sec). :param max_atoms: Maximum number of tracer atoms to consider for isotopologues. If None, IsoGroup automatically estimates the maximum number of isotopologues based on the feature m/z and tracer element. :param database: DataFrame containing theoretical features with columns retention time (RT), metabolite names, and formulas. """ self.dataset = dataset self._tracer = tracer self._tracer_element, self._tracer_idx = Misc._parse_strtracer(tracer) self._ppm_tol = ppm_tol self._rt_tol = rt_tol self.max_atoms = max_atoms self.database = database self.features = {} # {sample_name: {feature_id: Feature object}} self.clusters = {} # {sample_name: {cluster_id: Cluster object}} @property def rt_tol(self) -> float: """ Returns the retention time tolerance used for feature annotation. """ return self._rt_tol @rt_tol.setter def rt_tol(self, value) -> float: """ Sets the retention time tolerance used for feature annotation. """ if not isinstance(value, (float)): raise ValueError("RT tolerance must be a number.") if self._rt_tol is None: raise ValueError("RT tolerance must be provided.") self._rt_tol = value @property def tracer(self) -> str: """ Returns the tracer used for the experiment. """ return self._tracer @property def ppm_tol(self) -> float: """ Returns the m/z tolerance (in ppm) used for feature annotation. """ return self._ppm_tol @ppm_tol.setter def ppm_tol(self, value): """ Sets the m/z tolerance (in ppm) used for feature annotation. """ if not isinstance(value, (float)): raise ValueError("mz tolerance must be a number.") if self._ppm_tol is None: raise ValueError("mz tolerance must be provided.") self._ppm_tol = value @property def tracer_element(self) -> str: """ Returns the tracer element used in the experiment. """ return self._tracer_element @property def tracer_idx(self) -> int: """ Returns the tracer index used in the experiment. """ return self._tracer_idx
[docs] def initialize_experimental_features(self): """ Initialize Feature objects from the dataset and organize them by sample. Each feature is created with its retention time, m/z, tracer, intensity, and sample name. """ if not {"mz", "rt", "id"}.issubset(self.dataset.columns): raise ValueError("Dataset must contain 'mz', 'rt', and 'id' columns.") if not any(col not in {"mz", "rt", "id"} for col in self.dataset.columns): raise ValueError("Dataset must contain at least one sample column with intensity values.") dataset = self.dataset.set_index(["mz", "rt", "id"]) for idx, _ in dataset.iterrows(): mz = idx[0] rt = idx[1] id = idx[2] for sample in dataset.columns: # Extract the intensity for each sample in the dataset intensity = dataset.loc[idx, sample] # Initialize the experimental features for each sample feature = Feature( rt=rt, mz=mz, tracer=self.tracer, feature_id=id, intensity=intensity, sample=sample, tracer_element=self.tracer_element, ) # Add the feature in the list corresponding to the sample if sample not in self.features: self.features[sample] = {} self.features[sample][id] = feature features_count = len(next(iter(self.features.values()))) logger.info(f"{features_count} features loaded per sample ({len(self.features)} sample(s)).\n")
# if __name__ == "__main__": # # from isogroup.base.io import IoHandler # from isogroup.base.targeted_experiment import TargetedExperiment # # io= IoHandler() # # data= io.read_dataset(r"..\..\data\dataset_test_XCMS.txt") # # database = io.read_database(r"..\..\data\database.csv") # test = TargetedExperiment(data, tracer="13C", mz_tol=5, rt_tol=10, database=database) # test.initialize_experimental_features() # print(test.database.theoretical_features) # io.export_theoretical_database(theoretical_db) # test.initialize_experimental_features() # print(test.database.theoretical_features) # # print(test.features["C13_WT_2"])