Title: | Analyse and Interpret Time Series Features |
---|---|
Description: | Provides a suite of functions for analysing, interpreting, and visualising time-series features calculated from different feature sets from the 'theft' package. Implements statistical learning methodologies described in Henderson, T., Bryant, A., and Fulcher, B. (2023) <arXiv:2303.17809>. |
Authors: | Trent Henderson [cre, aut] |
Maintainer: | Trent Henderson <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.3 |
Built: | 2025-02-05 09:24:47 UTC |
Source: | https://github.com/hendersontrent/theftdlc |
Fit classifiers using time-series features using a resample-based approach and get a fast understanding of performance
classify( data, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, seed = 123 ) tsfeature_classifier( data, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, seed = 123 )
classify( data, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, seed = 123 ) tsfeature_classifier( data, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, seed = 123 )
data |
|
classifier |
|
train_size |
|
n_resamples |
|
by_set |
|
use_null |
|
seed |
|
list
containing a named vector
of train-test set sizes, and a data.frame
of classification performance results
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = "catch22") classifiers <- classify(features, by_set = FALSE, n_resamples = 3)
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = "catch22") classifiers <- classify(features, by_set = FALSE, n_resamples = 3)
Perform cluster analysis of time series using their feature vectors
cluster( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("kmeans", "hclust", "mclust"), k = 2, features = NULL, na_removal = c("feature", "sample"), seed = 123, ... )
cluster( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("kmeans", "hclust", "mclust"), k = 2, features = NULL, na_removal = c("feature", "sample"), seed = 123, ... )
data |
|
norm_method |
|
unit_int |
|
clust_method |
|
k |
|
features |
|
na_removal |
|
seed |
|
... |
arguments to be passed to |
object of class feature_cluster
containing the clustering algorithm and a tidy version of clusters joined to the input dataset ready for further analysis
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = "catch22") clusts <- cluster(features, k = 6)
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = "catch22") clusts <- cluster(features, k = 6)
Conduct statistical testing on time-series feature classification performance to identify top features or compare entire sets
compare_features( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, hypothesis = c("null", "pairwise"), p_adj = c("none", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr") )
compare_features( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, hypothesis = c("null", "pairwise"), p_adj = c("none", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr") )
data |
|
metric |
|
by_set |
|
hypothesis |
|
p_adj |
|
data.frame
containing the results
Trent Henderson
Henderson, T., Bryant, A. G., and Fulcher, B. D. Never a Dull Moment: Distributional Properties as a Baseline for Time-Series Classification. 27th Pacific-Asia Conference on Knowledge Discovery and Data Mining, (2023).
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) compare_features(classifiers, by_set = FALSE, hypothesis = "pairwise")
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) compare_features(classifiers, by_set = FALSE, hypothesis = "pairwise")
Remove duplicate features that exist in multiple feature sets and retain a reproducible random selection of one of them
filter_duplicates(data, preference = NULL, seed = 123)
filter_duplicates(data, preference = NULL, seed = 123)
data |
|
preference |
deprecated. Do not use |
seed |
|
feature_calculations
object containing filtered feature data
Trent Henderson
Filter resample data sets according to good feature list
filter_good_features(data, x, good_features)
filter_good_features(data, x, good_features)
data |
|
x |
|
good_features |
|
list
of filtered train and test data
Trent Henderson
Helper function to find features in both train and test set that are "good"
find_good_features(data, x)
find_good_features(data, x)
data |
|
x |
|
character
vector of "good" feature names
Trent Henderson
Fit classification model and compute key metrics
fit_models(data, iter_data, row_id, is_null_run = FALSE, classifier)
fit_models(data, iter_data, row_id, is_null_run = FALSE, classifier)
data |
|
iter_data |
|
row_id |
|
is_null_run |
|
classifier |
|
data.frame
of classification results
Trent Henderson
Calculate central tendency and spread values for all numeric columns in a dataset
get_rescale_vals(data)
get_rescale_vals(data)
data |
|
list
of central tendency and spread values
Trent Henderson
Calculate interval summaries with a measure of central tendency of classification results
interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "qt", "quantile"), interval = NULL, model_type = c("main", "null") ) calculate_interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "qt", "quantile"), interval = NULL, model_type = c("main", "null") )
interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "qt", "quantile"), interval = NULL, model_type = c("main", "null") ) calculate_interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "qt", "quantile"), interval = NULL, model_type = c("main", "null") )
data |
|
metric |
|
by_set |
|
type |
|
interval |
|
model_type |
|
interval_calculations
object which is a data frame containing the results
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) interval(classifiers, by_set = FALSE, type = "sd", interval = 1)
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) interval(classifiers, by_set = FALSE, type = "sd", interval = 1)
Helper function for converting to title case
make_title(x)
make_title(x)
x |
|
character
vector
Trent Henderson
Produce a plot for a feature_calculations object
## S3 method for class 'feature_calculations' plot( x, type = c("quality", "matrix", "cor", "violin"), norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("average", "ward.D", "ward.D2", "single", "complete", "mcquitty", "median", "centroid"), cor_method = c("pearson", "spearman"), feature_names = NULL, ... )
## S3 method for class 'feature_calculations' plot( x, type = c("quality", "matrix", "cor", "violin"), norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("average", "ward.D", "ward.D2", "single", "complete", "mcquitty", "median", "centroid"), cor_method = c("pearson", "spearman"), feature_names = NULL, ... )
x |
|
type |
|
norm_method |
|
unit_int |
|
clust_method |
|
cor_method |
|
feature_names |
|
... |
Arguments to be passed to |
object of class ggplot
that contains the graphic
Trent Henderson
Produce a plot for a feature_projection object
## S3 method for class 'feature_projection' plot(x, show_covariance = TRUE, ...)
## S3 method for class 'feature_projection' plot(x, show_covariance = TRUE, ...)
x |
|
show_covariance |
|
... |
Arguments to be passed to methods |
object of class ggplot
that contains the graphic
Trent Henderson
Produce a plot for a interval_calculations object
## S3 method for class 'interval_calculations' plot(x, ...)
## S3 method for class 'interval_calculations' plot(x, ...)
x |
|
... |
Arguments to be passed to methods |
show_covariance |
|
object of class ggplot
that contains the graphic
Trent Henderson
Project a feature matrix into a two-dimensional representation using PCA, MDS, t-SNE, or UMAP ready for plotting
project( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... ) reduce_dims( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... )
project( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... ) reduce_dims( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... )
data |
|
norm_method |
|
unit_int |
|
low_dim_method |
|
na_removal |
|
seed |
|
... |
arguments to be passed to |
object of class feature_projection
which is a named list containing the feature_calculations
data supplied to the function, the wide matrix of filtered data, a tidy data.frame
of the projected 2-D data, and the model fit object
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = "catch22") pca <- project(features, norm_method = "zScore", low_dim_method = "PCA")
library(theft) features <- theft::calculate_features(theft::simData, group_var = "process", feature_set = "catch22") pca <- project(features, norm_method = "zScore", low_dim_method = "PCA")
Helper function to create a resampled dataset
resample_data(data, train_rows, test_rows, train_groups, test_groups, seed)
resample_data(data, train_rows, test_rows, train_groups, test_groups, seed)
data |
|
train_rows |
|
test_rows |
|
train_groups |
|
test_groups |
|
seed |
|
list
containing new train and test data
Trent Henderson
Calculate z-score for all columns in a dataset using train set central tendency and spread
rescale_zscore(data, rescalers)
rescale_zscore(data, rescalers)
data |
|
rescalers |
|
data.frame
of rescaled data
Trent Henderson
Helper function to select only the relevant columns for statistical testing
select_stat_cols(data, by_set, metric, hypothesis)
select_stat_cols(data, by_set, metric, hypothesis)
data |
|
by_set |
|
metric |
|
hypothesis |
|
object of class data.frame
Trent Henderson
Calculate p-values for feature sets or features relative to an empirical null or each other using resampled t-tests
stat_test( data, iter_data, row_id, by_set = FALSE, hypothesis, metric, train_test_sizes, n_resamples )
stat_test( data, iter_data, row_id, by_set = FALSE, hypothesis, metric, train_test_sizes, n_resamples )
data |
|
iter_data |
|
row_id |
|
by_set |
|
hypothesis |
|
metric |
|
train_test_sizes |
|
n_resamples |
|
object of class data.frame
Trent Henderson