diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..9bd66a2 --- /dev/null +++ b/404.html @@ -0,0 +1,682 @@ + + + +
+ + + + + + + + + + + +Higher-order data modules to run unsupervised domain adaption experiments.
+ + + +AdaptionDataset
+
+
+
+ Bases: Dataset
A torch dataset for unsupervised domain adaption. The +dataset takes a labeled source and one or multiple unlabeled target dataset and combines them.
+For each label/features pair from the source dataset, a random sample of features +is drawn from each target dataset. The datasets are supposed to provide a sample +as a tuple of tensors. The target datasets' labels are assumed to be the last +element of the tuple and are omitted. The datasets length is determined by the +source dataset. This setup can be used to train with common unsupervised domain +adaption methods like DAN, DANN or JAN.
+ + + +Examples:
+>>> import torch
+>>> import rul_datasets
+>>> source = torch.utils.data.TensorDataset(torch.randn(10), torch.randn(10))
+>>> target = torch.utils.data.TensorDataset(torch.randn(10), torch.randn(10))
+>>> dataset = rul_datasets.adaption.AdaptionDataset(source, target)
+>>> source_features, source_label, target_features = dataset[0]
+
__init__(labeled, *unlabeled, deterministic=False)
+
+Create a new adaption data set from a labeled source and one or multiple +unlabeled target dataset.
+By default, a random sample is drawn from each target dataset when a source
+sample is accessed. This is the recommended setting for training. To
+deactivate this behavior and fix the pairing of source and target samples,
+set deterministic
to True
. This is the recommended setting for evaluation.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
labeled |
+
+ Dataset
+ |
+
+
+
+ The dataset from the labeled domain. + |
+ + required + | +
*unlabeled |
+
+ Dataset
+ |
+
+
+
+ The dataset(s) from the unlabeled domain(s). + |
+
+ ()
+ |
+
deterministic |
+
+ bool
+ |
+
+
+
+ Return the same target sample for each source sample. + |
+
+ False
+ |
+
DomainAdaptionDataModule
+
+
+
+ Bases: LightningDataModule
A higher-order data module used for +unsupervised domain adaption of a labeled source to an unlabeled target domain. +The training data of both domains is wrapped in a AdaptionDataset which provides a random sample of the +target domain with each sample of the source domain. It provides the validation and +test splits of both domains, and optionally a paired dataset for both.
+ + + +Examples:
+>>> import rul_datasets
+>>> fd1 = rul_datasets.CmapssReader(fd=1, window_size=20)
+>>> fd2 = rul_datasets.CmapssReader(fd=2, percent_broken=0.8)
+>>> source = rul_datasets.RulDataModule(fd1, 32)
+>>> target = rul_datasets.RulDataModule(fd2, 32)
+>>> dm = rul_datasets.DomainAdaptionDataModule(source, target)
+>>> dm.prepare_data()
+>>> dm.setup()
+>>> train_1_2 = dm.train_dataloader()
+>>> val_1, val_2 = dm.val_dataloader()
+>>> test_1, test_2 = dm.test_dataloader()
+
__init__(source, target, paired_val=False, inductive=False)
+
+Create a new domain adaption data module from a source and target +RulDataModule. The source domain is considered +labeled and the target domain unlabeled.
+The source and target data modules are checked for compatability (see
+RulDataModule). These
+checks include that the fd
differs between them, as they come from the same
+domain otherwise.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
source |
+
+ RulDataModule
+ |
+
+
+
+ The data module of the labeled source domain. + |
+ + required + | +
target |
+
+ RulDataModule
+ |
+
+
+
+ The data module of the unlabeled target domain. + |
+ + required + | +
paired_val |
+
+ bool
+ |
+
+
+
+ Whether to include paired data in validation. + |
+
+ False
+ |
+
inductive |
+
+ bool
+ |
+
+
+
+ Whether to use the target test set for training. + |
+
+ False
+ |
+
prepare_data(*args, **kwargs)
+
+Download and pre-process the underlying data.
+This calls the prepare_data
function for source and target domain. All
+previously completed preparation steps are skipped. It is called
+automatically by pytorch_lightning
and executed on the first GPU in
+distributed mode.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Passed down to each data module's |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Passed down to each data module's |
+
+ {}
+ |
+
setup(stage=None)
+
+Load source and target domain into memory.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
stage |
+
+ Optional[str]
+ |
+
+
+
+ Passed down to each data module's |
+
+ None
+ |
+
test_dataloader(*args, **kwargs)
+
+Create a data loader of the source and target test data.
+The data loaders are the return values of source.test_dataloader
+and target.test_dataloader
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ List[DataLoader]
+ |
+
+
+
+ The source and target test data loader. + |
+
train_dataloader(*args, **kwargs)
+
+Create a data loader of an AdaptionDataset using source and target domain.
+The data loader is configured to shuffle the data. The pin_memory
option is
+activated to achieve maximum transfer speed to the GPU.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataLoader
+ |
+
+
+
+ The training data loader + |
+
val_dataloader(*args, **kwargs)
+
+Create a data loader of the source, target and paired validation data.
+By default, two data loaders are returned, which correspond to the source
+and the target validation data loader. An optional third is a data loader of a
+PairedRulDataset using both source and
+target is returned if paired_val
was set to True
in the constructor.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ List[DataLoader]
+ |
+
+
+
+ The source, target and an optional paired validation data loader. + |
+
LatentAlignDataModule
+
+
+
+ Bases: DomainAdaptionDataModule
A higher-order data module based on +DomainAdaptionDataModule.
+It is specifically made to work with the latent space alignment approach by Zhang +et al. The training data of both domains is wrapped in a AdaptionDataset which splits the data into healthy and +degrading. For each sample of degrading source data, a random sample of degrading +target data and healthy sample of either source or target data is drawn. The +number of steps in degradation are supplied for each degrading sample, as well. +The data module also provides the validation and test splits of both domains, and +optionally a paired dataset for both.
+ + + +Examples:
+>>> import rul_datasets
+>>> fd1 = rul_datasets.CmapssReader(fd=1, window_size=20)
+>>> fd2 = rul_datasets.CmapssReader(fd=2, percent_broken=0.8)
+>>> src = rul_datasets.RulDataModule(fd1, 32)
+>>> trg = rul_datasets.RulDataModule(fd2, 32)
+>>> dm = rul_datasets.LatentAlignDataModule(src, trg, split_by_max_rul=True)
+>>> dm.prepare_data()
+>>> dm.setup()
+>>> train_1_2 = dm.train_dataloader()
+>>> val_1, val_2 = dm.val_dataloader()
+>>> test_1, test_2 = dm.test_dataloader()
+
__init__(source, target, paired_val=False, inductive=False, split_by_max_rul=False, split_by_steps=None)
+
+Create a new latent align data module from a source and target +RulDataModule. The source domain is considered +labeled and the target domain unlabeled.
+The source and target data modules are checked for compatability (see
+RulDataModule). These
+checks include that the fd
differs between them, as they come from the same
+domain otherwise.
The healthy and degrading data can be split by either maximum RUL value or +the number of time steps. See split_healthy for more information.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
source |
+
+ RulDataModule
+ |
+
+
+
+ The data module of the labeled source domain. + |
+ + required + | +
target |
+
+ RulDataModule
+ |
+
+
+
+ The data module of the unlabeled target domain. + |
+ + required + | +
paired_val |
+
+ bool
+ |
+
+
+
+ Whether to include paired data in validation. + |
+
+ False
+ |
+
split_by_max_rul |
+
+ bool
+ |
+
+
+
+ Whether to split healthy and degrading by max RUL value. + |
+
+ False
+ |
+
split_by_steps |
+
+ Optional[int]
+ |
+
+
+
+ Split the healthy and degrading data after this number of + time steps. + |
+
+ None
+ |
+
split_healthy(features, targets, by_max_rul=False, by_steps=None)
+
+Split the feature and target time series into healthy and degrading parts and +return a dataset of each.
+If by_max_rul
is set to True
the time steps with the maximum RUL value in
+each time series is considered healthy. This option is intended for labeled data
+with piece-wise linear RUL functions. If by_steps
is set to an integer,
+the first by_steps
time steps of each series are considered healthy. This
+option is intended for unlabeled data or data with a linear RUL function.
One option has to be set and both are mutually exclusive.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
features |
+
+ List[ndarray]
+ |
+
+
+
+ List of feature time series. + |
+ + required + | +
targets |
+
+ List[ndarray]
+ |
+
+
+
+ List of target time series. + |
+ + required + | +
by_max_rul |
+
+ bool
+ |
+
+
+
+ Whether to split healthy and degrading data by max RUL value. + |
+
+ False
+ |
+
by_steps |
+
+ Optional[int]
+ |
+
+
+
+ Split healthy and degrading data after this number of time steps. + |
+
+ None
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
healthy |
+ RulDataset
+ |
+
+
+
+ Dataset of healthy data. + |
+
degraded |
+ RulDataset
+ |
+
+
+
+ Dataset of degrading data. + |
+
Higher-order data modules to establish a baseline for transfer learning and domain +adaption experiments.
+ + + +BaselineDataModule
+
+
+
+ Bases: LightningDataModule
A higher-order data module that +takes a RulDataModule. It provides the +training and validation splits of the sub-dataset selected in the underlying data +module but provides the test splits of all available subsets of the dataset. This +makes it easy to evaluate the generalization of a supervised model on all +sub-datasets.
+ + + +Examples:
+>>> import rul_datasets
+>>> cmapss = rul_datasets.reader.CmapssReader(fd=1)
+>>> dm = rul_datasets.RulDataModule(cmapss, batch_size=32)
+>>> baseline_dm = rul_datasets.BaselineDataModule(dm)
+>>> baseline_dm.prepare_data()
+>>> baseline_dm.setup()
+>>> train_fd1 = baseline_dm.train_dataloader()
+>>> val_fd1 = baseline_dm.val_dataloader()
+>>> test_fd1, test_fd2, test_fd3, test_fd4 = baseline_dm.test_dataloader()
+
__init__(data_module)
+
+Create a new baseline data module from a RulDataModule.
+It will provide a data loader of the underlying data module's training and +validation splits. Additionally, it provides a data loader of the test split +of all sub-datasets.
+The data module keeps the configuration made in the underlying data module.
+The same configuration is then passed on to create RulDataModules for all
+sub-datasets, beside percent_fail_runs
and percent_broken
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
data_module |
+
+ RulDataModule
+ |
+
+
+
+ the underlying RulDataModule + |
+ + required + | +
prepare_data(*args, **kwargs)
+
+Download and pre-process the underlying data.
+This calls the prepare_data
function for all sub-datasets. All
+previously completed preparation steps are skipped. It is called
+automatically by pytorch_lightning
and executed on the first GPU in
+distributed mode.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Passed down to each data module's |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Passed down to each data module's |
+
+ {}
+ |
+
setup(stage=None)
+
+Load all splits as tensors into memory.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
stage |
+
+ Optional[str]
+ |
+
+
+
+ Passed down to each data module's |
+
+ None
+ |
+
test_dataloader(*args, **kwargs)
+
+Return data loaders for all sub-datasets.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Passed down to each data module. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Passed down to each data module. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ List[DataLoader]
+ |
+
+
+
+ The test dataloaders of all sub-datasets. + |
+
train_dataloader(*args, **kwargs)
+
+val_dataloader(*args, **kwargs)
+
+Basic data modules for experiments involving only a single subset of any RUL +dataset.
+ + + +PairedRulDataset
+
+
+
+ Bases: IterableDataset
A dataset of sample pairs drawn from the same time series.
+The dataset uses the runs exactly as loaded by the passed data modules. Options
+like degraded_only
need to be set there.
RulDataModule
+
+
+
+ Bases: LightningDataModule
A data module to provide windowed +time series features with RUL targets. It exposes the splits of the underlying +dataset for easy usage with PyTorch and PyTorch Lightning.
+The data module implements the hparams
property used by PyTorch Lightning to
+save hyperparameters to checkpoints. It retrieves the hyperparameters of its
+underlying reader and adds the batch size to them.
If you want to extract features from the windows, you can pass the
+feature_extractor
and window_size
arguments to the constructor. The
+feature_extractor
is a callable that takes a windowed time series as a numpy
+array with the shape [num_windows, window_size, num_features]
and returns
+another numpy array. Depending on window_size
, the expected output shapes for
+the feature_extractor
are:
window_size is None
: [num_new_windows, new_window_size, features]
window_size is not None
: [num_windows, features]
If window_size
is set, the extracted features are re-windowed.
Examples:
+Default
+>>> import rul_datasets
+>>> cmapss = rul_datasets.reader.CmapssReader(fd=1)
+>>> dm = rul_datasets.RulDataModule(cmapss, batch_size=32)
+
With Feature Extractor
+>>> import rul_datasets
+>>> import numpy as np
+>>> cmapss = rul_datasets.reader.CmapssReader(fd=1)
+>>> dm = rul_datasets.RulDataModule(
+... cmapss,
+... batch_size=32,
+... feature_extractor=lambda x: np.mean(x, axis=1),
+... window_size=10
+... )
+
Only Degraded Validation and Test Samples
+>>> import rul_datasets
+>>> cmapss = rul_datasets.reader.CmapssReader(fd=1)
+>>> dm = rul_datasets.RulDataModule(cmapss, 32, degraded_only=["val", "test"])
+
data: Dict[str, Tuple[List[np.ndarray], List[np.ndarray]]]
+
+
+ property
+
+
+A dictionary of the training, validation and test splits.
+Each split is a tuple of feature and target tensors.
+The keys are dev
(training split), val
(validation split) and test
+(test split).
fds
+
+
+ property
+
+
+Index list of the available subsets of the underlying dataset, i.e.
+[1, 2, 3, 4]
for CMAPSS
.
reader: AbstractReader
+
+
+ property
+
+
+The underlying dataset reader.
+__init__(reader, batch_size, feature_extractor=None, window_size=None, degraded_only=None)
+
+Create a new RUL data module from a reader.
+This data module exposes a training, validation and test data loader for the
+underlying dataset. First, prepare_data
is called to download and
+pre-process the dataset. Afterward, setup_data
is called to load all
+splits into memory.
If a feature_extractor
is supplied, the data module extracts new features
+from each window of the time series. If window_size
is None
,
+it is assumed that the extracted features form a new windows themselves. If
+window_size
is an int, it is assumed that the extracted features are a
+single feature vectors and should be re-windowed. The expected output shapes
+for the feature_extractor
are:
window_size is None
: [num_new_windows, new_window_size, features]
window_size is not None
: [num_windows, features]
The expected input shape for the feature_extractor
is always
+[num_windows, window_size, features]
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
reader |
+
+ AbstractReader
+ |
+
+
+
+ The dataset reader for the desired dataset, e.g., CmapssLoader. + |
+ + required + | +
batch_size |
+
+ int
+ |
+
+
+
+ The size of the batches built by the data loaders. + |
+ + required + | +
feature_extractor |
+
+ Optional[Callable]
+ |
+
+
+
+ A feature extractor that extracts feature vectors from + windows. + |
+
+ None
+ |
+
window_size |
+
+ Optional[int]
+ |
+
+
+
+ The new window size to apply after the feature extractor. + |
+
+ None
+ |
+
degraded_only |
+
+ Optional[List[Literal['dev', 'val', 'test']]]
+ |
+
+
+
+ Whether to load only degraded samples for the |
+
+ None
+ |
+
check_compatibility(other)
+
+Check if another RulDataModule is compatible to be used together with this one.
+RulDataModules can be used together in higher-order data modules,
+e.g. AdaptionDataModule. This function checks if other
is compatible to
+this data module to do so. It checks the underlying dataset readers, matching
+batch size, feature extractor and window size. If anything is incompatible,
+this function will raise a ValueError.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ RulDataModule
+ |
+
+
+
+ The RulDataModule to check compatibility with. + |
+ + required + | +
is_mutually_exclusive(other)
+
+Check if the other data module is mutually exclusive to this one. See +AbstractReader.is_mutually_exclusive.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ RulDataModule
+ |
+
+
+
+ Data module to check exclusivity against. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ Whether both data modules are mutually exclusive. + |
+
load_split(split, alias=None, degraded_only=None)
+
+Load a split from the underlying reader and apply the feature extractor.
+By setting alias, it is possible to load a split aliased as another split, +e.g., load the test split and treat it as the dev split. The data of the split +is loaded, but all pre-processing steps of alias are carried out.
+If degraded_only
is set, only degraded samples are loaded. This is only
+possible if the underlying reader has a max_rul
set or norm_rul
is set to
+True
. The degraded_only
argument takes precedence over the degraded_only
+of the data module.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
split |
+
+ str
+ |
+
+
+
+ The desired split to load. + |
+ + required + | +
alias |
+
+ Optional[str]
+ |
+
+
+
+ The split as which the loaded data should be treated. + |
+
+ None
+ |
+
degraded_only |
+
+ Optional[bool]
+ |
+
+
+
+ Whether to only load degraded samples. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[ndarray], List[ndarray]]
+ |
+
+
+
+ The feature and target tensors of the split's runs. + |
+
prepare_data(*args, **kwargs)
+
+Download and pre-process the underlying data.
+This calls the prepare_data
function of the underlying reader. All
+previously completed preparation steps are skipped. It is called
+automatically by pytorch_lightning
and executed on the first GPU in
+distributed mode.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
setup(stage=None)
+
+Load all splits as tensors into memory and optionally apply feature extractor.
+The splits are placed inside the data +property. If a split is empty, a tuple of empty tensors with the correct +number of dimensions is created as a placeholder. This ensures compatibility +with higher-order data modules.
+If the data module was constructed with a feature_extractor
argument,
+the feature windows are passed to the feature extractor. The resulting,
+new features may be re-windowed.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
stage |
+
+ Optional[str]
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ None
+ |
+
test_dataloader(*args, **kwargs)
+
+Create a data loader for the test split.
+The data loader is configured to leave the data unshuffled. The pin_memory
+option is activated to achieve maximum transfer speed to the GPU.
The whole split is held in memory. Therefore, the num_workers
are set to
+zero which uses the main process for creating batches.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataLoader
+ |
+
+
+
+ The test data loader + |
+
to_dataset(split, alias=None)
+
+Create a dataset of a split.
+This convenience function creates a plain tensor dataset to use outside the rul_datasets
library.
The data placed inside the dataset will be from the specified split
. If
+alias
is set, the loaded data will be treated as if from the alias
split.
+For example, one could load the test data and treat them as if it was the
+training data. This may be useful for inductive domain adaption.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
split |
+
+ str
+ |
+
+
+
+ The split to place inside the dataset. + |
+ + required + | +
alias |
+
+ Optional[str]
+ |
+
+
+
+ The split the loaded data should be treated as. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ RulDataset
+ |
+
+
+
+ A dataset containing the requested split. + |
+
train_dataloader(*args, **kwargs)
+
+Create a data loader for the training split.
+The data loader is configured to shuffle the data. The pin_memory
option is
+activated to achieve maximum transfer speed to the GPU. The data loader is also
+configured to drop the last batch of the data if it would only contain one
+sample.
The whole split is held in memory. Therefore, the num_workers
are set to
+zero which uses the main process for creating batches.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataLoader
+ |
+
+
+
+ The training data loader + |
+
val_dataloader(*args, **kwargs)
+
+Create a data loader for the validation split.
+The data loader is configured to leave the data unshuffled. The pin_memory
+option is activated to achieve maximum transfer speed to the GPU.
The whole split is held in memory. Therefore, the num_workers
are set to
+zero which uses the main process for creating batches.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataLoader
+ |
+
+
+
+ The validation data loader + |
+
RulDataset
+
+
+
+ Bases: Dataset
Internal dataset to hold multiple runs.
+Its length is the sum of all runs' lengths.
+ + + + +__init__(features, *targets, copy_tensors=False)
+
+Create a new dataset from multiple runs.
+If copy_tensors
is true, the tensors are copied to avoid side effects when
+modifying them. Otherwise, the tensors use the same memory as the original
+Numpy arrays to save space.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
features |
+
+ List[ndarray]
+ |
+
+
+
+ The features of each run. + |
+ + required + | +
targets |
+
+ List[ndarray]
+ |
+
+
+
+ The targets of each run. + |
+
+ ()
+ |
+
copy_tensors |
+
+ bool
+ |
+
+
+
+ Whether to copy the tensors or not. + |
+
+ False
+ |
+
This module contains the base class for all readers. It is only relevant to people +that want to extend this package with their own dataset.
+ + + +AbstractReader
+
+
+This reader is the abstract base class of all readers.
+In case you want to extend this library with a dataset of your own, you should
+create a subclass of AbstractReader
. It defines the public interface that all
+data modules in this library use. Just inherit from this class implement the
+abstract functions, and you should be good to go.
Please consider contributing your work afterward to help the community.
+ + + +Examples:
+>>> import rul_datasets
+>>> class MyReader(rul_datasets.reader.AbstractReader):
+... @property
+... def dataset_name(self):
+... return "my_dataset"
+...
+... @property
+... def fds(self):
+... return [1]
+...
+... def prepare_data(self):
+... pass
+...
+... def default_window_size(self, fd):
+... return 30
+...
+... def load_complete_split(self, split, alias):
+... features = [np.random.randn(100, 2, 30) for _ in range(10)]
+... targets = [np.arange(100, 0, -1) for _ in range(10)]
+...
+... return features, targets
+...
+>>> my_reader = MyReader(fd=1)
+>>> features, targets = my_reader.load_split("dev")
+>>> features[0].shape
+(100, 2, 30)
+
dataset_name: str
+
+
+ abstractmethod
+ property
+
+
+Name of the dataset.
+fds: List[int]
+
+
+ abstractmethod
+ property
+
+
+The indices of available sub-datasets.
+hparams: Dict[str, Any]
+
+
+ property
+
+
+All information logged by the data modules as hyperparameters in PyTorch +Lightning.
+__init__(fd, window_size=None, max_rul=None, percent_broken=None, percent_fail_runs=None, truncate_val=False, truncate_degraded_only=False)
+
+Create a new reader. If your reader needs additional input arguments,
+create your own __init__
function and call this one from within as super(
+).__init__(...)
.
For more information about using readers refer to the reader module page.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ int
+ |
+
+
+
+ Index of the selected sub-dataset + |
+ + required + | +
window_size |
+
+ Optional[int]
+ |
+
+
+
+ Size of the sliding window. Defaults to 2560. + |
+
+ None
+ |
+
max_rul |
+
+ Optional[int]
+ |
+
+
+
+ Maximum RUL value of targets. + |
+
+ None
+ |
+
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ The maximum relative degradation per time series. + |
+
+ None
+ |
+
percent_fail_runs |
+
+ Optional[Union[float, List[int]]]
+ |
+
+
+
+ The percentage or index list of available time series. + |
+
+ None
+ |
+
truncate_val |
+
+ bool
+ |
+
+
+
+ Truncate the validation data with |
+
+ False
+ |
+
truncate_degraded_only |
+
+ bool
+ |
+
+
+
+ Only truncate the degraded part of the data + (< max RUL). + |
+
+ False
+ |
+
check_compatibility(other)
+
+Check if the other reader is compatible with this one.
+Compatibility of two readers ensures that training with both will probably +succeed and produce valid results. Two readers are considered compatible, if +they:
+are both children of AbstractReader
+have the same window size
have the same max_rul
If any of these conditions is not met, the readers are considered
+misconfigured and a ValueError
is thrown.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ AbstractReader
+ |
+
+
+
+ Another reader object. + |
+ + required + | +
default_window_size(fd)
+
+
+ abstractmethod
+
+
+The default window size of the data set. This may vary from sub-dataset to +sub-dataset.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ int
+ |
+
+
+
+ The index of a sub-dataset. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ The default window size for the sub-dataset. + |
+
get_compatible(fd=None, percent_broken=None, percent_fail_runs=None, truncate_val=None, consolidate_window_size='override')
+
+Create a new reader of the desired sub-dataset that is compatible to this one +(see check_compatibility). Useful for +domain adaption.
+The values for percent_broken
, percent_fail_runs
and truncate_val
of
+the new reader can be overridden.
When constructing a compatible reader for another sub-dataset, the window
+size of this reader will be used to override the default window size of the
+new reader. This behavior can be changed by setting consolidate_window_size
+to "min"
. The window size of this reader and the new one will be set to the
+minimum of this readers window size and the default window size of the new
+reader. Please be aware that this will change the window size of this
+reader, too. If the new reader should use its default window size,
+set consolidate_window_size
to "none"
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ Optional[int]
+ |
+
+
+
+ The index of the sub-dataset for the new reader. + |
+
+ None
+ |
+
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ Override this value in the new reader. + |
+
+ None
+ |
+
percent_fail_runs |
+
+ Union[float, List[int], None]
+ |
+
+
+
+ Override this value in the new reader. + |
+
+ None
+ |
+
truncate_val |
+
+ Optional[bool]
+ |
+
+
+
+ Override this value in the new reader. + |
+
+ None
+ |
+
consolidate_window_size |
+
+ Literal['override', 'min', 'none']
+ |
+
+
+
+ How to consolidate the window size of the readers. + |
+
+ 'override'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ AbstractReader
+ |
+
+
+
+ A compatible reader with optional overrides. + |
+
get_complement(percent_broken=None, truncate_val=None)
+
+Get a compatible reader that contains all development runs that are not in +this reader (see check_compatibility). Useful for +semi-supervised learning.
+The new reader will contain the development runs that were discarded in this
+reader due to truncation through percent_fail_runs
. If percent_fail_runs
+was not set or this reader contains all development runs, it returns a reader
+with an empty development set.
The values for percent_broken
, and truncate_val
of the new reader can be
+overridden.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ Override this value in the new reader. + |
+
+ None
+ |
+
truncate_val |
+
+ Optional[bool]
+ |
+
+
+
+ Override this value in the new reader. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ AbstractReader
+ |
+
+
+
+ A compatible reader with all development runs missing in this one. + |
+
is_mutually_exclusive(other)
+
+Check if this reader is mutually exclusive to another reader.
+Two readers are mutually exclusive if:
+percent_fail_runs
arguments do not overlap (float arguments overlap
+ if they are greater than zero)Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ AbstractReader
+ |
+
+
+
+ The reader to check exclusivity against. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ Whether the readers are mutually exclusive. + |
+
load_complete_split(split, alias)
+
+
+ abstractmethod
+
+
+Load a complete split without truncation.
+This function should return the features and targets of the desired split.
+Both should be contained in a list of numpy arrays. Each of the arrays
+contains one time series. The features should have a shape of [num_windows,
+window_size, num_channels]
and the targets a shape of [num_windows]
. The
+features should be scaled as desired. The targets should be capped by
+max_rul
.
By setting alias
, it should be possible to load a split aliased as another
+split, e.g. load the test split and treat it as the dev split. The data of
+split
should be loaded but all pre-processing steps of alias
should be
+carried out.
This function is used internally in load_split which takes care of +truncation.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
split |
+
+ str
+ |
+
+
+
+ The name of the split to load. + |
+ + required + | +
alias |
+
+ str
+ |
+
+
+
+ The split as which the loaded data should be treated. + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
features |
+ List[ndarray]
+ |
+
+
+
+ The complete, scaled features of the desired split. + |
+
targets |
+ List[ndarray]
+ |
+
+
+
+ The capped target values corresponding to the features. + |
+
load_split(split, alias=None)
+
+Load a split as tensors and apply truncation to it.
+This function loads the scaled features and the targets of a split into
+memory. Afterwards, truncation is applied if the split
is set to dev
. The
+validation set is also truncated with percent_broken
if truncate_val
is
+set to True
.
By setting alias
, it is possible to load a split aliased as another split,
+e.g. load the test split and treat it as the dev split. The data of split
+is loaded but all pre-processing steps of alias
are carried out.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
split |
+
+ str
+ |
+
+
+
+ The desired split to load. + |
+ + required + | +
alias |
+
+ Optional[str]
+ |
+
+
+
+ The split as which the loaded data should be treated. + |
+
+ None
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
features |
+ List[ndarray]
+ |
+
+
+
+ The scaled, truncated features of the desired split. + |
+
targets |
+ List[ndarray]
+ |
+
+
+
+ The truncated targets of the desired split. + |
+
prepare_data()
+
+
+ abstractmethod
+
+
+Prepare the data. This function should take care of things that need to be +done once, before the data can be used. This may include downloading, +extracting or transforming the data, as well as fitting scalers. It is best +practice to check if a preparation step was completed before to avoid +repeating it unnecessarily.
+ +The NASA C-MAPSS Turbofan Degradation dataset is a collection of simulated +degradation experiments on jet engines. It contains four sub-datasets named FD1, FD2, +FD3 and FD4 which differ in operation conditions and possible failure types.
+ + + +CmapssReader
+
+
+
+ Bases: AbstractReader
This reader represents the NASA CMAPSS Turbofan Degradation dataset. Each of its +four sub-datasets contains a training and a test split. Upon first usage, +the training split will be further divided into a development and a validation +split. 20% of the original training split is reserved for validation.
+The features are provided as sliding windows over each time series in the +dataset. The label of a window is the label of its last time step. The RUL labels +are capped by a maximum value. The original data contains 24 channels per time +step. Following the literature, we omit the constant channels and operation +condition channels by default. Therefore, the default channel indices are 4, 5, +6, 9, 10, 11, 13, 14, 15, 16, 17, 19, 22 and 23.
+The features are min-max scaled between -1 and 1. The scaler is fitted on the +development data only.
+ + + +Examples:
+Default channels
+>>> import rul_datasets
+>>> fd1 = rul_datasets.reader.CmapssReader(fd=1, window_size=30)
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> features[0].shape
+(163, 30, 14)
+
Custom channels
+>>> import rul_datasets
+>>> fd1 = rul_datasets.reader.CmapssReader(fd=1, feature_select=[1, 2, 3])
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> features[0].shape
+(163, 30, 3)
+
fds: List[int]
+
+
+ property
+
+
+Indices of available sub-datasets.
+__init__(fd, window_size=None, max_rul=125, percent_broken=None, percent_fail_runs=None, feature_select=None, truncate_val=False, operation_condition_aware_scaling=False, truncate_degraded_only=False)
+
+Create a new CMAPSS reader for one of the sub-datasets. The maximum RUL value
+is set to 125 by default. The 14 feature channels selected by default can be
+overridden by passing a list of channel indices to feature_select
. The
+default window size is defined per sub-dataset as the minimum time series
+length in the test set.
The data can be scaled separately for each operation condition, as done by +Ragab et al. This only affects FD002 and FD004 due to them having multiple +operation conditions.
+For more information about using readers, refer to the reader module page.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ int
+ |
+
+
+
+ Index of the selected sub-dataset + |
+ + required + | +
window_size |
+
+ Optional[int]
+ |
+
+
+
+ Size of the sliding window. Default defined per sub-dataset. + |
+
+ None
+ |
+
max_rul |
+
+ Optional[int]
+ |
+
+
+
+ Maximum RUL value of targets. + |
+
+ 125
+ |
+
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ The maximum relative degradation per time series. + |
+
+ None
+ |
+
percent_fail_runs |
+
+ Optional[Union[float, List[int]]]
+ |
+
+
+
+ The percentage or index list of available time series. + |
+
+ None
+ |
+
feature_select |
+
+ Optional[List[int]]
+ |
+
+
+
+ The index list of selected feature channels. + |
+
+ None
+ |
+
truncate_val |
+
+ bool
+ |
+
+
+
+ Truncate the validation data with |
+
+ False
+ |
+
operation_condition_aware_scaling |
+
+ bool
+ |
+
+
+
+ Scale data separatly for each + operation condition. + |
+
+ False
+ |
+
truncate_degraded_only |
+
+ bool
+ |
+
+
+
+ Only truncate the degraded part of the data + (< max RUL). + |
+
+ False
+ |
+
prepare_data()
+
+Prepare the CMAPSS dataset. This function needs to be called before using the +dataset for the first time.
+The dataset is downloaded from a custom mirror and extracted into the data +root directory. The training data is then split into development and +validation set. Afterwards, a scaler is fit on the development features. +Previously completed steps are skipped.
+ +A module for working with the data root directory.
+ + + +get_data_root()
+
+Return the path to the data root directory.
+The default data root is located at ~/.rul-datasets
. You can customize the
+location by setting the environment variable RUL_DATASETS_DATA_ROOT
or by
+calling set_data_root.
A manually set data root must be an already existing directory.
+ + + +Returns:
+Type | +Description | +
---|---|
+ str
+ |
+
+
+
+ The data root path. + |
+
set_data_root(data_root)
+
+Set the data root to the specified location.
+A manually set data root must be an already existing directory.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
data_root |
+
+ str
+ |
+
+
+
+ The new data root location. + |
+ + required + | +
This dummy dataset is intended for testing and debugging purposes, not for +benchmarking. If your approach can fit this dataset it means that it is able to learn +how to estimate RUL. It does not mean it is good at it.
+ + + +DummyReader
+
+
+
+ Bases: AbstractReader
This reader represents a simple, small dummy dataset that can be uses to test or +debug RUL estimation approaches. It contains ten runs for each split with a +single feature which makes it easy to hold in memory even on low-end computers. +The dataset is so simple that it can be sufficiently fit by a three-layer +perceptron in less than 50 epochs.
+Each run is randomly generated by sampling a run length between 90 and 110 time
+steps and creating a piece-wise linear RUL function y(t)
with a maximum value of
+max_rul
. The feature x(t)
is then calculated as:
where N(loc, scale)
is a function drawing a sample from a normal distribution
+with a mean of loc
and a standard deviation of scale
. The dev
, val
and
+test
splits are all generated the same way with a different fixed random seed.
+This makes generating the dataset deterministic.
The dummy dataset contains two sub-datasets. The first has uses an offset
of
+0.5 and a noise_factor
of 0.01. The second uses an offset
of 0.75 and a
+noise_factor
of 0.02. Both use a default window size of 10 and are min-max
+scaled between -1 and 1 with a scaler fitted on the dev
split.
Examples:
+>>> import rul_datasets
+>>> fd1 = rul_datasets.reader.DummyReader(fd=1)
+>>> features, labels = fd1.load_split("dev")
+>>> features[0].shape
+(81, 10, 1)
+
fds: List[int]
+
+
+ property
+
+
+Indices of available sub-datasets.
+__init__(fd, window_size=None, max_rul=50, percent_broken=None, percent_fail_runs=None, truncate_val=False, truncate_degraded_only=False)
+
+Create a new dummy reader for one of the two sub-datasets. The maximum RUL +value is set to 50 by default. Please be aware that changing this value will +lead to different features, too, as they are calculated based on the RUL +values.
+For more information about using readers, refer to the reader module page.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ int
+ |
+
+
+
+ Index of the selected sub-dataset + |
+ + required + | +
window_size |
+
+ Optional[int]
+ |
+
+
+
+ Size of the sliding window. Default defined per sub-dataset. + |
+
+ None
+ |
+
max_rul |
+
+ Optional[int]
+ |
+
+
+
+ Maximum RUL value of targets. + |
+
+ 50
+ |
+
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ The maximum relative degradation per time series. + |
+
+ None
+ |
+
percent_fail_runs |
+
+ Optional[Union[float, List[int]]]
+ |
+
+
+
+ The percentage or index list of available time series. + |
+
+ None
+ |
+
truncate_val |
+
+ bool
+ |
+
+
+
+ Truncate the validation data with |
+
+ False
+ |
+
truncate_degraded_only |
+
+ bool
+ |
+
+
+
+ Only truncate the degraded part of the data + (< max RUL). + |
+
+ False
+ |
+
prepare_data()
+
+This function has no effect as there is nothing to prepare.
+ +The FEMTO (PRONOSTIA) Bearing dataset is a collection of run-to-failure +experiments on bearings. Three different operation conditions were used, resulting in +three sub-datasets. Sub-dataset 1 and 2 contain two training runs and five test runs, +while sub-dataset 3 contains only one test run. It was part of the 2012 IEEE +Prognostics Challenge.
+ + + +FemtoReader
+
+
+
+ Bases: AbstractReader
This reader represents the FEMTO (PRONOSTIA) Bearing dataset. Each of its three
+sub-datasets contains a training and a test split. By default, the reader
+constructs a validation split for sub-datasets 1 and 2 each by taking the first
+run of the test split. For sub-dataset 3, the second training run is used for
+validation because only one test run is available. The remaining training data is
+denoted as the development split. This run to split assignment can be overridden
+by setting run_split_dist
.
The features contain windows with three channels. Only the two acceleration +channels are used because the test runs are missing the temperature channel. +These features are standardized to zero mean and one standard deviation. The +scaler is fitted on the development data.
+ + + +Examples:
+Default splits:
+>>> import rul_datasets
+>>> fd1 = rul_datasets.reader.FemtoReader(fd=1)
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> features[0].shape
+(2803, 2560, 2)
+
Custom splits:
+>>> import rul_datasets
+>>> splits = {"dev": [5], "val": [4], "test": [3]}
+>>> fd1 = rul_datasets.reader.FemtoReader(fd=1, run_split_dist=splits)
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> features[0].shape
+(2463, 2560, 2)
+
Set first-time-to-predict:
+>>> import rul_datasets
+>>> fttp = [10, 20, 30, 40, 50]
+>>> fd1 = rul_datasets.reader.FemtoReader(fd=1, first_time_to_predict=fttp)
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> labels[0][:15]
+array([2793., 2793., 2793., 2793., 2793., 2793., 2793., 2793., 2793.,
+ 2793., 2793., 2792., 2791., 2790., 2789.])
+
fds: List[int]
+
+
+ property
+
+
+Indices of available sub-datasets.
+__init__(fd, window_size=None, max_rul=None, percent_broken=None, percent_fail_runs=None, truncate_val=False, run_split_dist=None, first_time_to_predict=None, norm_rul=False, truncate_degraded_only=False)
+
+Create a new FEMTO reader for one of the sub-datasets. By default, the RUL +values are not capped. The default window size is 2560.
+Use first_time_to_predict
to set an individual RUL inflection point for
+each run. It should be a list with an integer index for each run. The index
+is the time step after which RUL declines. Before the time step it stays
+constant. The norm_rul
argument can then be used to scale the RUL of each
+run between zero and one.
For more information about using readers refer to the reader module page.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ int
+ |
+
+
+
+ Index of the selected sub-dataset + |
+ + required + | +
window_size |
+
+ Optional[int]
+ |
+
+
+
+ Size of the sliding window. Defaults to 2560. + |
+
+ None
+ |
+
max_rul |
+
+ Optional[int]
+ |
+
+
+
+ Maximum RUL value of targets. + |
+
+ None
+ |
+
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ The maximum relative degradation per time series. + |
+
+ None
+ |
+
percent_fail_runs |
+
+ Optional[Union[float, List[int]]]
+ |
+
+
+
+ The percentage or index list of available time series. + |
+
+ None
+ |
+
truncate_val |
+
+ bool
+ |
+
+
+
+ Truncate the validation data with |
+
+ False
+ |
+
run_split_dist |
+
+ Optional[Dict[str, List[int]]]
+ |
+
+
+
+ Dictionary that assigns each run idx to each split. + |
+
+ None
+ |
+
first_time_to_predict |
+
+ Optional[List[int]]
+ |
+
+
+
+ The time step for each time series before which RUL + is constant. + |
+
+ None
+ |
+
norm_rul |
+
+ bool
+ |
+
+
+
+ Normalize RUL between zero and one. + |
+
+ False
+ |
+
truncate_degraded_only |
+
+ bool
+ |
+
+
+
+ Only truncate the degraded part of the data + (< max RUL). + |
+
+ False
+ |
+
prepare_data()
+
+Prepare the FEMTO dataset. This function needs to be called before using the +dataset and each custom split for the first time.
+The dataset is downloaded from a custom mirror and extracted into the data +root directory. The whole dataset is converted from CSV files to NPY files to +speed up loading it from disk. Afterwards, a scaler is fit on the development +features. Previously completed steps are skipped.
+ +A module for dataset readers. Currently supported datasets are:
+Readers are the foundation of the RUL Datasets library. They provide access to the +data on disk and convert them into a common format so that other parts of the library +can interact with it. The common format is as follows:
+Each dataset consists of multiple sub-datasets. The indices of these sub-datasets
+are called FD
, following C-MAPSS convention.
Each sub-dataset contains a development (dev
), a validation (val
) and test split
+(test
).
Each split contains one or multiple time series of features and RUL targets that +represent run-to-failure experiments.
+At each time step of a time series we have a window of features and a target RUL +value. The target is the RUL value of the window's last time step.
+A reader class, e.g. the CmapssReader +represents a dataset and can manipulate it to your liking. A reader object has access +to one sub-dataset of the dataset:
+ +The reader object can load the features and targets of each split into memory:
+>>> dev_features, dev_targets = reader.load_split("dev")
+>>> val_features, val_targets = reader.load_split("val")
+>>> test_features, test_targets = reader.load_split("test")
+
The features are a list of numpy arrays where each array has a shape of
+[num_windows, window_size, num_channels]
:
The targets are a list of numpy arrays, too, where each array has a
+shape of [num_windows]
:
Each reader defines a default window size for its data. This can be overridden by the
+window_size
argument:
>>> fd1 = CmapssReader(fd=1, window_size=15)
+>>> features, _ = fd1.load_split("dev")
+>>> features[0].shape
+(163, 15, 14)
+
Some datasets, i.e. CMAPSS, use a piece-wise linear RUL function, where a maximum RUL
+value is defined. The maximum RUL value for a reader can be set via the max_rul
+argument:
>>> fd1 = CmapssReader(fd=1, max_rul=100)
+>>> targets = fd1.load_split("dev")
+>>> max(np.max(t) for t in targets)
+100.0
+
If you want to use a sub-dataset as unlabeled data, e.g. for unsupervised domain
+adaption, it should not contain features from the point of failure. If the data
+contains these features, there would be no reason for it to be unlabeled. The
+percent_broken
argument controls how much data near failure is available. A
+percent_broken
of 0.8
for example means that only the first 80% of each time
+series are available:
>>> fd1 = CmapssReader(fd=1, percent_broken=0.8)
+>>> features, targets = fd1.load_split("dev")
+>>> features[0].shape
+(130, 30, 14])
+>>> np.min(targets[0])
+34.0
+
If you have set a max_rul
you may only want to truncate data that is considered
+degraded, i.e. with a RUL value smaller than max_rul
. You can use the
+truncate_degraded_only
option to do that. This way, the data where the RUL value is
+smaller or equal to (1 - percent_broken) * max_rul
is cut off.
>>> fd1 = CmapssReader(fd=1, percent_broken=0.8, truncate_degraded_only=True)
+>>> features, targets = fd1.load_split("dev")
+>>> features[0].shape
+(138, 30, 14])
+>>> np.min(targets[0])
+26.0
+
You may want to apply the same percent_broken
from your training data to your
+validation data. This is sensible if you do not expect that your algorithm has access
+to labeled validation data in real-life. You can achieve this, by setting
+truncate_val
to True
:
>>> fd1 = CmapssReader(fd=1, percent_broken=0.8, truncate_val=True)
+>>> features, targets = fd1.load_split("val")
+>>> np.min(targets[0])
+44.0
+
Data-driven RUL estimation algorithms are often sensitive to the overall amount of
+training data. The more data is available, the more of its variance is covered. If
+you want to investigate how an algorithm performs in a low-data setting, you can use
+percent_fail_runs
. This argument controls how many runs are used for training. A
+percent_fail_runs
of 0.8
means that 80% of the available training runs are used.
+If you need more controll over which runs are used, you can pass a list of indices to
+use only these runs. This is useful for conducting semi-supervised learning where you
+consider one part of a sub-dataset labeled and the other part unlabeled:
>>> fd1 = CmapssReader(fd=1, percent_fail_runs=0.8)
+>>> features, targets = fd1.load_split("dev")
+>>> len(features)
+64
+>>> fd1 = CmapssReader(fd=1, percent_fail_runs=[0, 5, 40])
+>>> features, targets = fd1.load_split("dev")
+>>> len(features)
+3
+
If you have constructed a reader with a certain percent_fail_runs
, you can get a
+reader containing all other runs by using the get_complement
function:
>>> fd1 = CmapssReader(fd=1, percent_fail_runs=0.8)
+>>> fd1_complement = fd1.get_complement()
+>>> features, targets = fd1_complement.load_split("dev")
+>>> len(features)
+16
+
The effects of percent_broken
and percent_fail_runs
are summarized under the term
+truncation as they effectively truncate the dataset in two dimensions.
The readers for the FEMTO and XJTU-SY datasets have two additional constructor
+arguments. The first_time_to_predict
lets you set an individual maximum RUL value
+per run in the dataset. As both are bearing datasets, the first-time-to-predict is
+defined as the time step where the degradation of the bearing is first noticeable.
+The RUL value before this time step is assumed to be constant. Setting norm_rul
+scales the RUL between [0, 1] per run, as it is best practice when using
+first-time-to-predict.
>>> fttp = [10, 20, 30, 40, 50]
+>>> fd1 = rul_datasets.reader.XjtuSyReader(
+... fd=1, first_time_to_predict=fttp, norm_rul=True
+... )
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> labels[0][:15]
+array([1. , 1. , 1. , 1. , 1. ,
+ 1. , 1. , 1. , 1. , 1. ,
+ 1. , 0.99115044, 0.98230088, 0.97345133, 0.96460177])
+
Readers can be used as is if you just want access to the dataset. If you plan to use +them with PyTorch or PyTorch Lightning, it is recommended to combine them with a +RulDataModule:
+ +For more information, see core module page or the +Libraries page.
+ + + +The New C-MAPSS Turbofan Degradation dataset is based on the same simulation as
+C-MAPSS. In contrast to the original dataset,
+it contains fewer engine units, but each of them is recorded in more detail and under
+more realistic operation conditions. Each unit has flight cycles recorded from the
+healthy state until failure with RUL values assigned to the whole cycle. Inside a
+flight cycle, data is recorded with a 1Hz resolution The dataset is split into seven
+sub-datasets (FD=1
to FD=7
) that differ in the number of engine units and the
+types of failures present.
An eighth sub-dataset exists but is not present here as one of its data files seems +corrupted. The dataset authors were already contacted about this issue.
+NCmapssReader
+
+
+
+ Bases: AbstractReader
This reader provides access to the New C-MAPSS Turbofan Degradation dataset. Each
+of its seven sub-datasets contains a default train/val/test split which can be
+overridden by the run_split_dist
argument.
The features are provided as a windowed time series for each unit. The windows
+represent one flight cycle and are, by default, padded to the longest cycle in
+the sub-dataset. The window size can be overridden by the window_size
argument
+which truncates each cycle at the end. Additionally, the features can be
+downsampled in time by taking the average of resolution_seconds
consecutive
+time steps. The default channels are the four operating conditions,
+the 14 physical, and 14 virtual sensors in this order.
The features are min-max scaled between zero and one. The scaler is fitted on the
+development data only. It is refit for each custom run_split_dist
when
+prepare_data
is called.
Examples:
+Default channels
+>>> reader = NCmapssReader(fd=1)
+>>> reader.prepare_data()
+>>> features, labels = reader.load_split("dev")
+>>> features[0].shape
+(100, 20294, 32)
+
Physical sensors only
+>>> reader = NCmapssReader(fd=1, feature_select=list(range(4, 18)))
+>>> reader.prepare_data()
+>>> features, labels = reader.load_split("dev")
+>>> features[0].shape
+(100, 20294, 14)
+
Custom split and window size
+>>> reader = NCmapssReader(
+... fd=1,
+... run_split_dist={"dev": [0, 1], "val": [2], "test": [3]},
+... window_size=100, # first 100 steps of each cycle
+... )
+>>> reader.prepare_data()
+>>> features, labels = reader.load_split("dev")
+>>> features[0].shape
+(100, 100, 32)
+
Downsampled features
+>>> reader = NCmapssReader(fd=1, resolution_seconds=10)
+>>> reader.prepare_data()
+>>> features, labels = reader.load_split("dev")
+>>> features[0].shape # window size is automatically adjusted
+(100, 2029, 32)
+
fds: List[int]
+
+
+ property
+
+
+Indices of the available sub-datasets.
+__init__(fd, window_size=None, max_rul=65, percent_broken=None, percent_fail_runs=None, feature_select=None, truncate_val=False, run_split_dist=None, truncate_degraded_only=False, resolution_seconds=1, padding_value=0.0, scaling_range=(0, 1))
+
+Create a new reader for the New C-MAPSS dataset. The maximum RUL value is set +to 65 by default. The default channels are the four operating conditions, +the 14 physical, and 14 virtual sensors in this order.
+The default window size is, by default, the longest flight cycle in the +sub-dataset. Shorter cycles are padded on the left. The default padding value +is zero but can be overridden, e.g., as -1 to make filtering for padding easier +later on.
+The default run_split_dist
is the same as in the original dataset, but with
+the last unit of the original train split designated for validation.
If the features are downsampled in time, the default window size is
+automatically adjusted to window_size // resolution_seconds
. Any manually
+set window_size
needs to take this into account as it is applied after
+downsampling.
For more information about using readers, refer to the reader module page.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ int
+ |
+
+
+
+ The sub-dataset to use. Must be in |
+ + required + | +
max_rul |
+
+ Optional[int]
+ |
+
+
+
+ The maximum RUL value. + |
+
+ 65
+ |
+
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ The maximum relative degradation per unit. + |
+
+ None
+ |
+
percent_fail_runs |
+
+ Optional[Union[float, List[int]]]
+ |
+
+
+
+ The percentage or index list of available units. + |
+
+ None
+ |
+
feature_select |
+
+ Optional[List[int]]
+ |
+
+
+
+ The indices of the features to use. + |
+
+ None
+ |
+
truncate_val |
+
+ bool
+ |
+
+
+
+ Truncate the validation data with |
+
+ False
+ |
+
run_split_dist |
+
+ Optional[Dict[str, List[int]]]
+ |
+
+
+
+ The assignment of units to each split. + |
+
+ None
+ |
+
truncate_degraded_only |
+
+ bool
+ |
+
+
+
+ Only truncate the degraded part of the data + (< max RUL). + |
+
+ False
+ |
+
resolution_seconds |
+
+ int
+ |
+
+
+
+ The number of consecutive seconds to average over for + downsampling. + |
+
+ 1
+ |
+
padding_value |
+
+ float
+ |
+
+
+
+ The value to use for padding the flight cycles. + |
+
+ 0.0
+ |
+
prepare_data()
+
+Prepare the N-C-MAPSS dataset. This function needs to be called before using the +dataset for the first time.
+The dataset is assumed to be present in the data root directory. The training +data is then split into development and validation set. Afterward, a scaler +is fit on the development features if it was not already done previously.
+ +A module with functions for efficient saving and loading of RUL features and +targets.
+ + + +exists(save_path)
+
+Return if the files resulting from a save
call with save_path
exist.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
save_path |
+
+ str
+ |
+
+
+
+ the |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ Whether the files exist + |
+
load(save_path, memmap=False)
+
+Load features and targets of a run from .npy files.
+This method is used to restore runs that were saved with the save function. If the runs are too large for the RAM,
+memmap
can be set to True to avoid reading them completely to memory. This
+results in slower processing, though.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
save_path |
+
+ str
+ |
+
+
+
+ Path that was supplied to the + save function. + |
+ + required + | +
memmap |
+
+ bool
+ |
+
+
+
+ whether to use memmap to avoid loading the whole run into memory + |
+
+ False
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
features |
+ ndarray
+ |
+
+
+
+ The feature array saved in |
+
targets |
+ ndarray
+ |
+
+
+
+ The target array saved in |
+
load_multiple(save_paths, memmap=False)
+
+Load multiple runs with the load function.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
save_paths |
+
+ List[str]
+ |
+
+
+
+ The list of run files to load. + |
+ + required + | +
memmap |
+
+ bool
+ |
+
+
+
+ See load + |
+
+ False
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
features |
+ List[ndarray]
+ |
+
+
+
+ The feature arrays saved in |
+
targets |
+ List[ndarray]
+ |
+
+
+
+ The target arrays saved in |
+
save(save_path, features, targets)
+
+Save features and targets of a run to .npy files.
+The arrays are saved to separate .npy files to enable memmap mode in case RAM is
+short. The files are saved as save_path
to the
+load function. If the save_path
does not have
+the .npy file extension .npy will be appended.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
save_path |
+
+ str
+ |
+
+
+
+ The path including file name to save the arrays to. + |
+ + required + | +
features |
+
+ ndarray
+ |
+
+
+
+ The feature array to save. + |
+ + required + | +
targets |
+
+ ndarray
+ |
+
+
+
+ The targets array to save. + |
+ + required + | +
A module with functions for scaling RUL features.
+ + + +Scaler = Union[scalers.StandardScaler, scalers.MinMaxScaler, scalers.MaxAbsScaler, scalers.RobustScaler]
+
+
+ module-attribute
+
+
+Supported scalers:
+ +OperationConditionAwareScaler
+
+
+
+ Bases: BaseEstimator
, TransformerMixin
This scaler is an ensemble of multiple base scalers, e.g. [ +sklearn.preprocessing.MinMaxScaler][]. It takes an additional operation condition +array while fitting and transforming that controls which base scaler is used. The +operation condition corresponding to a sample is compared against the boundaries +defined during construction of the scaler. If the condition lies between the +first set of boundaries, the first base scaler is used, and so forth. +If any condition does not fall between any boundaries, an exception will be +raised and the boundaries should be adjusted.
+ + + + +n_features_in_
+
+
+ property
+
+
+Number of expected input features.
+__init__(base_scaler, boundaries)
+
+Create a new scaler aware of operation conditions.
+Each pair in boundaries
represents the lower and upper value of an
+inclusive interval. For each interval a copy of the base_scaler
is
+maintained. If an operation condition value falls inside an interval,
+the corresponding scaler is used. The boundaries have to be mutually exclusive.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
base_scaler |
+
+ Scaler
+ |
+
+
+
+ The scaler that should be used for each condition. + |
+ + required + | +
boundaries |
+
+ List[Tuple[float, float]]
+ |
+
+
+
+ The pairs that form the inclusive boundaries of each condition. + |
+ + required + | +
partial_fit(features, operation_conditions)
+
+Fit the base scalers partially.
+This function calls partial_fit
on each of the base scalers with the
+samples that fall into the corresponding condition boundaries. If any sample
+does not fall into one of the boundaries, an exception is raised.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
features |
+
+ ndarray
+ |
+
+
+
+ The feature array to be scaled. + |
+ + required + | +
operation_conditions |
+
+ ndarray
+ |
+
+
+
+ The condition values compared against the boundaries. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ OperationConditionAwareScaler
+ |
+
+
+
+ The partially fitted scaler. + |
+
transform(features, operation_conditions)
+
+Scale the features with the appropriate condition aware scaler.
+This function calls transform
on each of the base scalers for the
+samples that fall into the corresponding condition boundaries. If any sample
+does not fall into one of the boundaries, an exception is raised.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
features |
+
+ ndarray
+ |
+
+
+
+ The features to be scaled. + |
+ + required + | +
operation_conditions |
+
+ ndarray
+ |
+
+
+
+ The condition values compared against the boundaries. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ndarray
+ |
+
+
+
+ The scaled features. + |
+
fit_scaler(features, scaler=None, operation_conditions=None)
+
+Fit a given scaler to the RUL features. If the scaler is omitted, +a StandardScaler will be created.
+If the scaler is an [OperationConditionAwareScaler][
+rul_datasets.reader.scaling.OperationConditionAwareScaler] and
+operation_conditions
are passed, the scaler will be fit aware of operation
+conditions.
The scaler assumes that the last axis of the features are the channels. Only +scalers unaware of operation conditions can be fit with windowed data.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
features |
+
+ List[ndarray]
+ |
+
+
+
+ The RUL features. + |
+ + required + | +
scaler |
+
+ Optional[Union[Scaler, OperationConditionAwareScaler]]
+ |
+
+
+
+ The scaler to be fit. Defaults to a StandardScaler. + |
+
+ None
+ |
+
operation_conditions |
+
+ Optional[List[ndarray]]
+ |
+
+
+
+ The operation conditions for condition aware scaling. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Union[Scaler, OperationConditionAwareScaler]
+ |
+
+
+
+ The fitted scaler. + |
+
load_scaler(save_path)
+
+Load a scaler from disk.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
save_path |
+
+ str
+ |
+
+
+
+ The path the scaler was saved to. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Scaler
+ |
+
+
+
+ The loaded scaler. + |
+
save_scaler(scaler, save_path)
+
+Save a scaler to disk.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
scaler |
+
+ Scaler
+ |
+
+
+
+ The scaler to be saved. + |
+ + required + | +
save_path |
+
+ str
+ |
+
+
+
+ The path to save the scaler to. + |
+ + required + | +
scale_features(features, scaler, operation_conditions=None)
+
+Scale the RUL features with a given scaler.
+The features can have a shape of [num_time_steps, channels]
or [num_windows,
+window_size, channels]
. The scaler needs to work on the channel dimension. If it
+was not fit with the right number of channels, a ValueError
is thrown.
If the scaler is operation condition aware, the operation_conditions
argument
+needs to be passed. Windowed data cannot be fit this way.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
features |
+
+ List[ndarray]
+ |
+
+
+
+ The RUL features to be scaled. + |
+ + required + | +
scaler |
+
+ Union[Scaler, OperationConditionAwareScaler]
+ |
+
+
+
+ The already fitted scaler. + |
+ + required + | +
operation_conditions |
+
+ Optional[List[ndarray]]
+ |
+
+
+
+ The operation conditions for condition aware scaling. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ List[ndarray]
+ |
+
+
+
+ The scaled features. + |
+
A module with functions for truncating RUL data.
+ + + +truncate_runs(features, targets, percent_broken=None, included_runs=None, degraded_only=False)
+
+Truncate RUL data according to percent_broken
and included_runs
.
RUL data has two dimensions in which it can be truncated: the number of runs and +the length of the runs. Truncating the number of runs limits the inter-run +variety of the data. Truncating the length of the run limits the amount of +available data near failure.
+For more information about truncation, see the reader +module page.
+ + + +Examples:
+Truncating via percent_broken
>>> import numpy as np
+>>> from rul_datasets.reader.truncating import truncate_runs
+>>> features = [np.random.randn(i*100, 5) for i in range(1, 6)]
+>>> targets = [np.arange(i*100)[::-1] for i in range(1, 6)]
+>>> (features[0].shape, targets[0].shape)
+((100, 5), (100,))
+>>> features, targets = truncate_runs(features, targets, percent_broken=0.8)
+>>> (features[0].shape, targets[0].shape) # runs are shorter
+((80, 5), (80,))
+>>> np.min(targets[0]) # runs contain no failures
+20
+
The XJTU-SY Bearing dataset is a collection of run-to-failure experiments on +bearings. Three different operation conditions were used, resulting in three +sub-datasets. Each sub-dataset contains five runs without an official training/test +split.
+ + + +XjtuSyReader
+
+
+
+ Bases: AbstractReader
This reader represents the XJTU-SY Bearing dataset. Each of its three
+sub-datasets contains five runs. By default, the reader assigns the first two to
+the development, the third to the validation and the remaining two to the test
+split. This run to split assignment can be overridden by setting run_split_dist
.
The features contain windows with two channels of acceleration data which are +standardized to zero mean and one standard deviation. The scaler is fitted on the +development data.
+ + + +Examples:
+Default splits:
+>>> import rul_datasets
+>>> fd1 = rul_datasets.reader.XjtuSyReader(fd=1)
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> features[0].shape
+(123, 32768, 2)
+
Custom splits:
+>>> import rul_datasets
+>>> splits = {"dev": [5], "val": [4], "test": [3]}
+>>> fd1 = rul_datasets.reader.XjtuSyReader(fd=1, run_split_dist=splits)
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> features[0].shape
+(52, 32768, 2)
+
Set first-time-to-predict:
+>>> import rul_datasets
+>>> fttp = [10, 20, 30, 40, 50]
+>>> fd1 = rul_datasets.reader.XjtuSyReader(fd=1, first_time_to_predict=fttp)
+>>> fd1.prepare_data()
+>>> features, labels = fd1.load_split("dev")
+>>> labels[0][:15]
+array([113., 113., 113., 113., 113., 113., 113., 113., 113., 113., 113.,
+ 112., 111., 110., 109.])
+
fds: List[int]
+
+
+ property
+
+
+Indices of available sub-datasets.
+__init__(fd, window_size=None, max_rul=None, percent_broken=None, percent_fail_runs=None, truncate_val=False, run_split_dist=None, first_time_to_predict=None, norm_rul=False, truncate_degraded_only=False)
+
+Create a new XJTU-SY reader for one of the sub-datasets. By default, the RUL +values are not capped. The default window size is 32768.
+Use first_time_to_predict
to set an individual RUL inflection point for
+each run. It should be a list with an integer index for each run. The index
+is the time step after which RUL declines. Before the time step it stays
+constant. The norm_rul
argument can then be used to scale the RUL of each
+run between zero and one.
For more information about using readers, refer to the reader module page.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
fd |
+
+ int
+ |
+
+
+
+ Index of the selected sub-dataset + |
+ + required + | +
window_size |
+
+ Optional[int]
+ |
+
+
+
+ Size of the sliding window. Defaults to 32768. + |
+
+ None
+ |
+
max_rul |
+
+ Optional[int]
+ |
+
+
+
+ Maximum RUL value of targets. + |
+
+ None
+ |
+
percent_broken |
+
+ Optional[float]
+ |
+
+
+
+ The maximum relative degradation per time series. + |
+
+ None
+ |
+
percent_fail_runs |
+
+ Optional[Union[float, List[int]]]
+ |
+
+
+
+ The percentage or index list of available time series. + |
+
+ None
+ |
+
truncate_val |
+
+ bool
+ |
+
+
+
+ Truncate the validation data with |
+
+ False
+ |
+
run_split_dist |
+
+ Optional[Dict[str, List[int]]]
+ |
+
+
+
+ Dictionary that assigns each run idx to each split. + |
+
+ None
+ |
+
first_time_to_predict |
+
+ Optional[List[int]]
+ |
+
+
+
+ The time step for each time series before which RUL + is constant. + |
+
+ None
+ |
+
norm_rul |
+
+ bool
+ |
+
+
+
+ Normalize RUL between zero and one. + |
+
+ False
+ |
+
truncate_degraded_only |
+
+ bool
+ |
+
+
+
+ Only truncate the degraded part of the data + (< max RUL). + |
+
+ False
+ |
+
prepare_data()
+
+Prepare the XJTU-SY dataset. This function needs to be called before using the +dataset and each custom split for the first time.
+The dataset is downloaded from a custom mirror and extracted into the data +root directory. The whole dataset is converted com CSV files to NPY files to +speed up loading it from disk. Afterwards, a scaler is fit on the development +features. Previously completed steps are skipped.
+ +A module with higher-order data modules for semi-supervised learning.
+ + + +SemiSupervisedDataModule
+
+
+
+ Bases: LightningDataModule
A higher-order data module used for +semi-supervised learning with a labeled data module and an unlabeled one. It +makes sure that both data modules come from the same sub-dataset.
+ + + +Examples:
+>>> import rul_datasets
+>>> fd1 = rul_datasets.CmapssReader(fd=1, window_size=20, percent_fail_runs=0.5)
+>>> fd1_complement = fd1.get_complement(percent_broken=0.8)
+>>> labeled = rul_datasets.RulDataModule(fd1, 32)
+>>> unlabeled = rul_datasets.RulDataModule(fd1_complement, 32)
+>>> dm = rul_datasets.SemiSupervisedDataModule(labeled, unlabeled)
+>>> dm.prepare_data()
+>>> dm.setup()
+>>> train_ssl = dm.train_dataloader()
+>>> val = dm.val_dataloader()
+>>> test = dm.test_dataloader()
+
__init__(labeled, unlabeled)
+
+Create a new semi-supervised data module from a labeled and unlabeled +RulDataModule.
+The both data modules are checked for compatability (seeRulDataModule). These
+checks include that the fd
match between them.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
labeled |
+
+ RulDataModule
+ |
+
+
+
+ The data module of the labeled dataset. + |
+ + required + | +
unlabeled |
+
+ RulDataModule
+ |
+
+
+
+ The data module of the unlabeled dataset. + |
+ + required + | +
prepare_data(*args, **kwargs)
+
+Download and pre-process the underlying data.
+This calls the prepare_data
function for source and target domain. All
+previously completed preparation steps are skipped. It is called
+automatically by pytorch_lightning
and executed on the first GPU in
+distributed mode.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Passed down to each data module's |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Passed down to each data module's |
+
+ {}
+ |
+
setup(stage=None)
+
+Load labeled and unlabeled data into memory.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
stage |
+
+ Optional[str]
+ |
+
+
+
+ Passed down to each data module's |
+
+ None
+ |
+
test_dataloader(*args, **kwargs)
+
+Create a data loader of the labeled test data.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataLoader
+ |
+
+
+
+ The labeled test data loader. + |
+
train_dataloader(*args, **kwargs)
+
+Create a data loader of an AdaptionDataset using labeled and unlabeled.
+The data loader is configured to shuffle the data. The pin_memory
option is
+activated to achieve maximum transfer speed to the GPU.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataLoader
+ |
+
+
+
+ The training data loader + |
+
val_dataloader(*args, **kwargs)
+
+Create a data loader of the labeled validation data.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*args |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ ()
+ |
+
**kwargs |
+
+ Any
+ |
+
+
+
+ Ignored. Only for adhering to parent class interface. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataLoader
+ |
+
+
+
+ The labeled validation data loader. + |
+
extract_windows(seq, window_size, dilation=1, mode='memory')
+
+Extract sliding windows from a sequence.
+The step size is considered to be one, which results in len(seq) - window_size +
+1
extracted windows. The resulting array has the shape [num_windows, window_size,
+num_channels].
If dilation is set to a value greater than one, the window will not contain
+consecutive time steps. Instead, the time steps are spaced by the dilation value.
+In this case, the number of extracted windows is len(seq) - (window_size - 1) *
+dilation
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
seq |
+
+ ndarray
+ |
+
+
+
+ sequence to extract windows from + |
+ + required + | +
window_size |
+
+ int
+ |
+
+
+
+ length of the sliding window + |
+ + required + | +
dilation |
+
+ int
+ |
+
+
+
+ dilation of the sliding window + |
+
+ 1
+ |
+
mode |
+
+ Literal['memory', 'memmap']
+ |
+
+
+
+ create windows either in memory or on disk + |
+
+ 'memory'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ndarray
+ |
+
+
+
+ array of sliding windows + |
+
feature_to_tensor(features, dtype=torch.float32, copy=False)
+
+Convert a numpy array to a torch tensor of dtype
and swap the last dimensions.
The function assumes that the last dimension of the numpy array is the channel +dimension, and the second to last is the time dimension. All preceding dimensions +are considered to be batch dimensions.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
features |
+
+ ndarray
+ |
+
+
+
+ numpy array to convert + |
+ + required + | +
dtype |
+
+ dtype
+ |
+
+
+
+ dtype of the resulting tensor + |
+
+ float32
+ |
+
copy |
+
+ bool
+ |
+
+
+
+ whether to copy the array before converting it + |
+
+ False
+ |
+
get_files_in_path(path, condition=None)
+
+Return the paths of all files in a path that satisfy a condition in alphabetical
+order.
+
+If the condition is `None` all files are returned.
+
+Args:
+ path: the path to look into
+ condition: the include-condition for files
+
Returns:
+Type | +Description | +
---|---|
+ List[str]
+ |
+
+
+
+ all files that satisfy the condition in alphabetical order + |
+
get_targets_from_file_paths(file_paths, timestep_from_file_path)
+
+Create the RUL targets based on the file paths of the feature files.
+The function extracts the feature file path from each path. The supplied +conversion function extracts the time step from it. Afterwards the RUL is +calculated by subtracting each time step from the maximum time step plus 1.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
file_paths |
+
+ Dict[int, List[str]]
+ |
+
+
+
+ runs represented as dict of feature file paths + |
+ + required + | +
timestep_from_file_path |
+
+ Callable
+ |
+
+
+
+ Function to convert a feature file path to a time step + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Dict[int, ndarray]
+ |
+
+
+
+ A list of RUL target arrays for each run + |
+
\n {translation(\"search.result.term.missing\")}: {...missing}\n
\n }\n