# SPDX-FileCopyrightText: 2021-2022 Constantine Evans <const@costi.eu>
#
# SPDX-License-Identifier: AGPL-3.0-only
from __future__ import annotations
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
from typing import Any, ClassVar, Literal, Sequence
import pandas as pd
ScopeType = Literal["all", "limited"]
[docs]class Normalizer(metaclass=ABCMeta):
[docs] @abstractmethod
def normalize_scoped(self, data: pd.DataFrame, scope: ScopeType) -> pd.DataFrame:
"""
Filter the data, and return it (possibly not a copy), *if scope is the
minimum necessary scope for this normalization type*. Otherwise, just
return the same data.
This is useful for writing scope-agnostic code, provided that you call this for
every scope before using the data.
"""
...
[docs] @abstractmethod
def normalize(self, data: pd.DataFrame) -> pd.DataFrame:
...
@property
@abstractmethod
def ylabel(self) -> str: # pragma: no cover
...
[docs]class NormRaw(Normalizer):
"""
A Normalizer that takes no arguments, and simply passes through raw
fluorescence values.
"""
[docs] def normalize_scoped(self, data: pd.DataFrame, scope: ScopeType) -> pd.DataFrame:
return data
[docs] def normalize(self, data: pd.DataFrame) -> pd.DataFrame:
return data
ylabel = "fluorescence"
[docs]@dataclass(init=False)
class NormToMeanPerWell(Normalizer):
"""
A Normalizer that divides the fluorescence reading for each (filterset, well) pair
by the mean value of that pair within a particular selection of data.
The easiest way to use this is to give a particular stage (all data in that
stage will be used), or a stage and set of cycles (those cycles in that stage
will be used). For example:
- To normalize to the mean stage 8 values, use `NormToMeanPerWell(stage=8)`.
- To normalize to the first 5 cycles of stage 2, use
NormToMeanPerWell(stage=2, cycle=slice(1, 6)).
`selection` allows arbitrary Pandas indexing (without the filter_set level
of the MultiIndex) for unusual cases.
"""
selection: Any
scope: ClassVar[ScopeType] = "limited"
def __init__(
self,
stage: int | slice | Sequence[int] | None = None,
step: int | slice | Sequence[int] | None = None,
cycle: int | slice | Sequence[int] | None = None,
*,
selection: Any | None = None,
):
if selection is not None:
if stage is not None:
raise ValueError("Selection already set, can't specify stage.")
if step is not None:
raise ValueError("Selection already set, can't specify step.")
if cycle is not None:
raise ValueError("Selection already set, can't specify cycle.")
self.selection = selection
if stage is None:
stage = slice(None)
elif isinstance(stage, int):
stage = [stage]
if step is None:
step = slice(None)
elif isinstance(step, int):
step = [step]
if cycle is None:
cycle = slice(None)
elif isinstance(cycle, int):
cycle = [cycle]
self.selection = (stage, step, cycle)
[docs] def normalize_scoped(self, data: pd.DataFrame, scope: ScopeType) -> pd.DataFrame:
if scope == self.scope:
return self.normalize(data)
else:
return data
[docs] def normalize(self, data: pd.DataFrame) -> pd.DataFrame:
normdata = data.copy()
means = (
data.loc[(slice(None), *self.selection), (slice(None), "fl")]
.groupby("filter_set")
.mean()
)
normdata.loc[:, (slice(None), "fl")] /= means
return normdata
ylabel = "norm. fluorescence"
[docs]@dataclass
class NormToMaxPerWell(Normalizer):
"""
A Normalizer that divides the fluorescence reading for each (filterset, well) pair
by the max value of that pair within a particular selection of data.
The easiest way to use this is to give a particular stage (all data in that
stage will be used), or a stage and set of cycles (those cycles in that stage
will be used). For example:
- To normalize to the mean stage 8 values, use `NormToMeanPerWell(stage=8)`.
- To normalize to the first 5 cycles of stage 2, use
NormToMeanPerWell(stage=2, cycle=slice(1, 6)).
`selection` allows arbitrary Pandas indexing (without the filter_set level
of the MultiIndex) for unusual cases.
"""
selection: Any
scope: ClassVar[ScopeType] = "limited"
def __init__(
self,
stage: int | slice | Sequence[int] | None = None,
step: int | slice | Sequence[int] | None = None,
cycle: int | slice | Sequence[int] | None = None,
*,
selection: Any | None = None,
):
if selection is not None:
if stage is not None:
raise ValueError("Selection already set, can't specify stage.")
if step is not None:
raise ValueError("Selection already set, can't specify step.")
if cycle is not None:
raise ValueError("Selection already set, can't specify cycle.")
self.selection = selection
if stage is None:
stage = slice(None)
elif isinstance(stage, int):
stage = [stage]
if step is None:
step = slice(None)
elif isinstance(step, int):
step = [step]
if cycle is None:
cycle = slice(None)
elif isinstance(cycle, int):
cycle = [cycle]
self.selection = (stage, step, cycle)
[docs] def normalize_scoped(self, data: pd.DataFrame, scope: ScopeType) -> pd.DataFrame:
if scope == self.scope:
return self.normalize(data)
else:
return data
[docs] def normalize(self, data: pd.DataFrame) -> pd.DataFrame:
normdata = data.copy()
means = (
data.loc[(slice(None), *self.selection), (slice(None), "fl")]
.groupby("filter_set")
.max()
)
normdata.loc[:, (slice(None), "fl")] /= means
return normdata
ylabel = "norm fluorescence"