| Title: | Analyse and Interpret Time Series Features |
|---|---|
| Description: | Provides a suite of functions for analysing, interpreting, and visualising time-series features calculated from different feature sets from the 'theft' package. Implements statistical learning methodologies described in Henderson, T., Bryant, A., and Fulcher, B. (2023) <doi:10.48550/arXiv.2303.17809>. |
| Authors: | Trent Henderson [cre, aut] |
| Maintainer: | Trent Henderson <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.2.2 |
| Built: | 2026-06-09 11:34:30 UTC |
| Source: | https://github.com/hendersontrent/theftdlc |
Fit classifiers using time-series features using a resample-based approach and get a fast understanding of performance
classify( data, tt_labels = NULL, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, filter_duplicates = FALSE, add_all_features = FALSE, n_workers = 1, seed = 123 ) tsfeature_classifier( data, tt_labels = NULL, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, filter_duplicates = FALSE, add_all_features = FALSE, n_workers = 1, seed = 123 )classify( data, tt_labels = NULL, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, filter_duplicates = FALSE, add_all_features = FALSE, n_workers = 1, seed = 123 ) tsfeature_classifier( data, tt_labels = NULL, classifier = NULL, train_size = 0.75, n_resamples = 30, by_set = TRUE, use_null = FALSE, filter_duplicates = FALSE, add_all_features = FALSE, n_workers = 1, seed = 123 )
data |
|
tt_labels |
|
classifier |
|
train_size |
|
n_resamples |
|
by_set |
|
use_null |
|
filter_duplicates |
|
add_all_features |
|
n_workers |
|
seed |
|
list containing a named vector of train-test set sizes, and a data.frame of classification performance results
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") classifiers <- classify(features, by_set = FALSE, n_resamples = 3)library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") classifiers <- classify(features, by_set = FALSE, n_resamples = 3)
Perform cluster analysis of time series using their feature vectors
cluster( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("kmeans", "hclust", "mclust"), k = 2, features = NULL, na_removal = c("feature", "sample"), seed = 123, ... )cluster( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("kmeans", "hclust", "mclust"), k = 2, features = NULL, na_removal = c("feature", "sample"), seed = 123, ... )
data |
|
norm_method |
|
unit_int |
|
clust_method |
|
k |
|
features |
|
na_removal |
|
seed |
|
... |
arguments to be passed to |
object of class feature_cluster containing the clustering algorithm and a tidy version of clusters joined to the input dataset ready for further analysis
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") clusts <- cluster(features, k = 6)library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") clusts <- cluster(features, k = 6)
Conduct statistical testing on time-series feature classification performance to identify top features or compare entire sets
compare_features( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, hypothesis = c("null", "pairwise"), p_adj = c("none", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr"), n_workers = 1 )compare_features( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, hypothesis = c("null", "pairwise"), p_adj = c("none", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr"), n_workers = 1 )
data |
|
metric |
|
by_set |
|
hypothesis |
|
p_adj |
|
n_workers |
|
data.frame containing the results
Trent Henderson
Henderson, T., Bryant, A. G., and Fulcher, B. D. Never a Dull Moment: Distributional Properties as a Baseline for Time-Series Classification. 27th Pacific-Asia Conference on Knowledge Discovery and Data Mining, (2023).
library(theft) features <- theft::calculate_features(theft::simData, feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) compare_features(classifiers, by_set = FALSE, hypothesis = "pairwise")library(theft) features <- theft::calculate_features(theft::simData, feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) compare_features(classifiers, by_set = FALSE, hypothesis = "pairwise")
Remove duplicate features that exist in multiple feature sets and retain a reproducible random selection of one of them
filter_duplicates(data, preference = NULL, seed = 123)filter_duplicates(data, preference = NULL, seed = 123)
data |
|
preference |
deprecated. Do not use |
seed |
|
feature_calculations object containing filtered feature data
Trent Henderson
Filter resample data sets according to good feature list
filter_good_features(data, x, good_features)filter_good_features(data, x, good_features)
data |
|
x |
|
good_features |
|
list of filtered train and test data
Trent Henderson
Helper function to find features in both train and test set that are "good"
find_good_features(data, x)find_good_features(data, x)
data |
|
x |
|
character vector of "good" feature names
Trent Henderson
Fit classification model and compute key metrics
fit_models(data, iter_data, row_id, is_null_run = FALSE, classifier)fit_models(data, iter_data, row_id, is_null_run = FALSE, classifier)
data |
|
iter_data |
|
row_id |
|
is_null_run |
|
classifier |
|
data.frame of classification results
Trent Henderson
Calculate central tendency and spread values for all numeric columns in a dataset
get_rescale_vals(data)get_rescale_vals(data)
data |
|
list of central tendency and spread values
Trent Henderson
Calculate interval summaries with a measure of central tendency of classification results
interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "se", "quantile"), interval = NULL, model_type = c("main", "null") ) calculate_interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "se", "quantile"), interval = NULL, model_type = c("main", "null") )interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "se", "quantile"), interval = NULL, model_type = c("main", "null") ) calculate_interval( data, metric = c("accuracy", "precision", "recall", "f1"), by_set = TRUE, type = c("sd", "se", "quantile"), interval = NULL, model_type = c("main", "null") )
data |
|
metric |
|
by_set |
|
type |
|
interval |
|
model_type |
|
interval_calculations object which is a data frame containing the results
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) interval(classifiers, by_set = FALSE, type = "sd", interval = 1)library(theft) features <- theft::calculate_features(theft::simData, feature_set = NULL, features = list("mean" = mean, "sd" = sd)) classifiers <- classify(features, by_set = FALSE, n_resamples = 3) interval(classifiers, by_set = FALSE, type = "sd", interval = 1)
Helper function for converting to title case
make_title(x)make_title(x)
x |
|
character vector
Trent Henderson
Produce a plot for a feature_calculations object
## S3 method for class 'feature_calculations' plot( x, type = c("matrix", "cor", "violin", "box", "quality"), norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("average", "ward.D", "ward.D2", "single", "complete", "mcquitty", "median", "centroid"), cor_method = c("pearson", "spearman"), feature_names = NULL, ... )## S3 method for class 'feature_calculations' plot( x, type = c("matrix", "cor", "violin", "box", "quality"), norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, clust_method = c("average", "ward.D", "ward.D2", "single", "complete", "mcquitty", "median", "centroid"), cor_method = c("pearson", "spearman"), feature_names = NULL, ... )
x |
|
type |
|
norm_method |
|
unit_int |
|
clust_method |
|
cor_method |
|
feature_names |
|
... |
Arguments to be passed to |
object of class ggplot that contains the graphic
Trent Henderson
Produce a plot for a feature_projection object
## S3 method for class 'feature_projection' plot(x, show_covariance = TRUE, ...)## S3 method for class 'feature_projection' plot(x, show_covariance = TRUE, ...)
x |
|
show_covariance |
|
... |
Arguments to be passed to methods |
object of class ggplot that contains the graphic
Trent Henderson
Produce a plot for a interval_calculations object
## S3 method for class 'interval_calculations' plot(x, ...)## S3 method for class 'interval_calculations' plot(x, ...)
x |
|
... |
Arguments to be passed to methods |
object of class ggplot that contains the graphic
Trent Henderson
Project a feature matrix into a two-dimensional representation using PCA, MDS, t-SNE, or UMAP ready for plotting
project( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... ) reduce_dims( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... )project( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... ) reduce_dims( data, norm_method = c("zScore", "Sigmoid", "RobustSigmoid", "MinMax", "MaxAbs"), unit_int = FALSE, low_dim_method = c("PCA", "tSNE", "ClassicalMDS", "KruskalMDS", "SammonMDS", "UMAP"), na_removal = c("feature", "sample"), seed = 123, ... )
data |
|
norm_method |
|
unit_int |
|
low_dim_method |
|
na_removal |
|
seed |
|
... |
arguments to be passed to |
object of class feature_projection which is a named list containing the feature_calculations data supplied to the function, the wide matrix of filtered data, a tidy data.frame of the projected 2-D data, and the model fit object
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") pca <- project(features, norm_method = "zScore", low_dim_method = "PCA")library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") pca <- project(features, norm_method = "zScore", low_dim_method = "PCA")
Helper function to create a resampled dataset
resample_data(data, train_rows, test_rows, train_groups, test_groups, seed)resample_data(data, train_rows, test_rows, train_groups, test_groups, seed)
data |
|
train_rows |
|
test_rows |
|
train_groups |
|
test_groups |
|
seed |
|
list containing new train and test data
Trent Henderson
Calculate z-score for all columns in a dataset using train set central tendency and spread
rescale_zscore(data, rescalers)rescale_zscore(data, rescalers)
data |
|
rescalers |
|
data.frame of rescaled data
Trent Henderson
Helper function to select only the relevant columns for statistical testing
select_stat_cols(data, by_set, metric, hypothesis)select_stat_cols(data, by_set, metric, hypothesis)
data |
|
by_set |
|
metric |
|
hypothesis |
|
object of class data.frame
Trent Henderson
Use a cross validated penalized maximum likelihood generalized linear model to perform feature selection
shrink(data, threshold = c("one", "all"), plot = FALSE, ...)shrink(data, threshold = c("one", "all"), plot = FALSE, ...)
data |
|
threshold |
|
plot |
|
... |
arguments to be passed to |
feature_calculations object containing a data frame of the reduced feature set
Trent Henderson
library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") best_features <- shrink(features)library(theft) features <- theft::calculate_features(theft::simData, feature_set = "catch22") best_features <- shrink(features)
Calculate p-values for feature sets or features relative to an empirical null or each other using resampled t-tests
stat_test( data, iter_data, row_id, by_set = FALSE, hypothesis, metric, train_test_sizes, n_resamples )stat_test( data, iter_data, row_id, by_set = FALSE, hypothesis, metric, train_test_sizes, n_resamples )
data |
|
iter_data |
|
row_id |
|
by_set |
|
hypothesis |
|
metric |
|
train_test_sizes |
|
n_resamples |
|
object of class data.frame
Trent Henderson