Script 4: functional_data_analysis.m
% This script is used to conduct functional data analysis (as an extension
% of the statistical analysis conducted using the
% 'statistical_analysis_segmented_clustering.m' file) on each of the patent
% indicators and patent indicator sets extracted for each technology using
% the 'extract_patent_metrics.m' script, which have subsequently been
% compiled in the 'Extracted patent indicators.xlsx' spreadsheet.
clearvars
clear all
close all
tic()
% Load the results and variables used in the already completed statistical
% analysis:
load('statistical_analysis.mat');
% Specify the Technology Life Cycle stage to consider during this
% functional data analysis process:
% 1 = all TLC stages
% 2 = emergence
% 3 = growth
% 4 = maturity
% 5 = decline
stage = 2;
% Specify patent indicator subset to use for model building purposes from
% list of top performing indicator subsets identified from 'leave-half-out'
% and/or 'leave-one-out' cross-validation analysis:
model_indicator_subset = [4,6];
% model_indicator_subset = [6,7];
% model_indicator_subset = [3,9];
% Option to use aligned or unaligned time signals (1 = aligned, 0 =
% unaligned)
signal_alignment = 0;
% Specify order of B-splines to use in basis functions:
bspline_order = 6;
% Preallocate empty cell array for storing names of dimension labels:
technology_fdnames = cell(1,3);
technology_labels = cell(1,2);
patent_indicator_labels = cell(1,2);
% Assign label names to the problem dimensions considered (see section
% 4.1.2 of 'Functional Data Analysis with R and MATLAB.pdf'):
technology_labels{1} = 'Technology';
technology_labels{2} = technology.(TLC_stages{stage});
patent_indicator_labels{1} = 'Patent indicator';
patent_indicator_labels{2} = patent_indicator_column_names;
technology_fdnames{1} = 'Time (years)';
technology_fdnames{2} = technology_labels;
technology_fdnames{3} = patent_indicator_labels;
% Set figures generated to be invisible by default:
set(0,'DefaultFigureVisible','off')
% Determine screensize so that figures are scaled to the right size for the
% current monitor (scrsz == screen size vector [left, bottom, width,
% height])
scrsz = get(0,'ScreenSize');
% Preallocate empty double and cell arrays for storing medoid alignment
% data:
model_medoid_locations_idx.(TLC_stages{stage}) = zeros(num_clusters,numel(model_indicator_subset));
model_realigned_cluster_ID.(TLC_stages{stage}) = zeros(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
model_cluster_labels.(TLC_stages{stage}) = zeros(num_clusters,numel(model_indicator_subset));
medoid_technology_mapping.(TLC_stages{stage}) = zeros(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
model_warping_paths_A_B.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
model_warping_paths_B_A.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
medoid_aligned_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
unaligned_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
medoid_aligned_time_normalised_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
unaligned_time_normalised_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
% Preallocate empty cell arrays for storing functional data constructs:
functional_basis_object.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
functional_parameter_object.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_FDO.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_df.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_gcv.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_beta.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_SSE.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_penmat.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_y2cMap.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_WFD.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_FSTR.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
time_normalised_values.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
mean_technology_values.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
mean_technology_function.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_functional_basis_object.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_functional_parameter_object.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_FDO.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_df.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_gcv.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_beta.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_SSE.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_penmat.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_y2cMap.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_WFD.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_FSTR.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
%% Build functional models of the technology profiles mapped to each patent indicator included in subset selected for model building purposes
% Build functional models of the technology profiles mapped to each patent
% indicator included in subset selected for model building purposes:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Extract medoid indices, realigned cluster IDs, and group labels
% corresponding to the current component of the selected model
% indicator subset:
model_medoid_locations_idx.(TLC_stages{stage})(:,i) = medoid_locations_idx_subset.(TLC_stages{stage})(:,model_component);
model_realigned_cluster_ID.(TLC_stages{stage})(:,i) = realigned_cluster_ID_subsets.(TLC_stages{stage})(:,model_component);
model_cluster_labels.(TLC_stages{stage})(:,i) = model_realigned_cluster_ID.(TLC_stages{stage})(model_medoid_locations_idx.(TLC_stages{stage})(:,i),i);
% Map medoid technology indices across to realigned cluster IDs:
medoid_technology_mapping.(TLC_stages{stage})(:,i) = model_realigned_cluster_ID.(TLC_stages{stage})(:,i);
for j = 1:num_clusters
medoid_technology_mapping.(TLC_stages{stage})(medoid_technology_mapping.(TLC_stages{stage})(:,i) == model_cluster_labels.(TLC_stages{stage})(j,i),i) = model_medoid_locations_idx.(TLC_stages{stage})(j,i);
end
% Generate figure for plotting aligned technology time series clusters:
if signal_alignment == 1
figure_name = ['Medoid aligned technology profiles for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
else
figure_name = ['Technology profiles for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
end
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Use medoid technology mapping to select the Dynamic Time Warping
% paths that align each technology to its respective cluster:
for j = 1:numel(technology.(TLC_stages{stage}))
% Select the current indicator set for analysis:
current_technology_data = technology_data_filtered.(TLC_stages{stage}).(technology.(TLC_stages{stage}){j});
% Select the Dynamic Time Warping path for the current technology
% and medoid alignment:
model_warping_paths_A_B.(TLC_stages{stage}){j,i} = current_technology_warping_path.(TLC_stages{stage}){j,medoid_technology_mapping.(TLC_stages{stage})(j,i),model_component};
model_warping_paths_B_A.(TLC_stages{stage}){j,i} = comparison_technology_warping_path.(TLC_stages{stage}){j,medoid_technology_mapping.(TLC_stages{stage})(j,i),model_component};
% Compile matrices of medoid aligned and unaligned signals:
medoid_aligned_signals.(TLC_stages{stage}){j,i} = [model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component)];
unaligned_signals.(TLC_stages{stage}){j,i} = [(1:size(current_technology_data,1))',current_technology_data(:,model_component)];
% Compile matrices of medoid aligned and unaligned time normalised
% signals:
medoid_aligned_time_normalised_signals.(TLC_stages{stage}){j,i} = [model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)/max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component)];
unaligned_time_normalised_signals.(TLC_stages{stage}){j,i} = [(1:size(current_technology_data,1))'/size(current_technology_data,1),current_technology_data(:,model_component)];
% Plot the medoid aligned technologies for the current component of
% the selected modelling indicator subset:
if model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 1
% Plot the medoid aligned technology profiles without any
% further time normalisation:
if signal_alignment == 1
subplot(2,1,1), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'r-');
else
subplot(2,1,1), plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'r-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (medoid aligned time)']);
else
title([figure_name,' (real-time)']);
end
xlabel('Time (years)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Plot the medoid aligned and time normalised technology
% profiles:
if signal_alignment == 1
subplot(2,1,2), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)/max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'r-');
else
subplot(2,1,2), plot(unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,2),'r-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 2
% Plot the medoid aligned technology profiles without any
% further time normalisation:
if signal_alignment == 1
subplot(2,1,1), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'b-');
else
subplot(2,1,1), plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'b-');
end
hold on
hold off
% Calculate mean values of resampled curves for the current model
% component:
mean_technology_values.(TLC_stages{stage}){i} = sum(time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),2) / numel(technology.(TLC_stages{stage}));
mean_technology_function.(TLC_stages{stage}){i} = [tvec_normalised.(TLC_stages{stage})',mean_technology_values.(TLC_stages{stage}){i}];
% Generate figure for plotting time-normalised functional data objects:
figure_name = ['Time-normalised Functional Data Objects based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Plot the medoid aligned functional data objects for each technology
% profile without any further time normalisation:
for j = 1:numel(technology.(TLC_stages{stage}))
if model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 1
plot(time_normalised_values.(TLC_stages{stage}){j,i}(:,1),time_normalised_values.(TLC_stages{stage}){j,i}(:,2),'r-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 2
plot(time_normalised_values.(TLC_stages{stage}){j,i}(:,1),time_normalised_values.(TLC_stages{stage}){j,i}(:,2),'b-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 3
plot(time_normalised_values.(TLC_stages{stage}){j,i}(:,1),time_normalised_values.(TLC_stages{stage}){j,i}(:,2),'g-');
end
hold on
end
% Plot the mean value curve for the current model component:
plot(tvec_normalised.(TLC_stages{stage})',mean_technology_values.(TLC_stages{stage}){i},'co--');
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Scale normalised time values to maximum length of warped time paths
% Scale normalised time values to maximum length of warped time paths,
% as if based on unit intervals (this is just to avoid rounding errors
% during the creation of the functional basis system in the next step
% (see section 3.3.5 of 'Functional Data Analysis with R and
% MATLAB.pdf'), but will subsequently use normalised time values to map
% against the final functional basis systems):
tvec_normalised_scaled.(TLC_stages{stage}) = 0:1:(num_resample_points.(TLC_stages{stage}) - 1);
%% Create functional basis systems for use in benchmarking performance of functional regression analysis
% Construct the corresponding 'Constant Basis' function for use in any
% functional regression comparisons (see section 3.4.1 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
conbasis.(TLC_stages{stage}) = create_constant_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))]);
% Construct the corresponding 'Monomial Basis' function for use in any
% statistics benchmarking exercises (see section 3.4.2 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
monbasis.(TLC_stages{stage}) = create_monomial_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],4);
figure_name = ['Constant basis system for functional regression analysis - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(conbasis.(TLC_stages{stage}));
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
figure_name = ['Monomial basis system for functional regression analysis - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(monbasis.(TLC_stages{stage}));
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Create b-spline basis system for the beta coefficient functions
% Create b-spline basis system for the beta coefficient functions. The
% time intervals used (defined in the function/help file as 'rangeval',
% and specified by a lower and upper time limit) are taken from the
% scaled normalised time periods above rather than normalised times
% (i.e. between 0 and 1), to ensure that the length of each spline
% subinterval is equal to 1, avoiding any rounding errors when using
% large numbers of spline basis functions (see section 3.3.5 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
beta_functional_basis_object.(TLC_stages{stage}) = create_bspline_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],(max(tvec_normalised_scaled.(TLC_stages{stage})) - 2 + bspline_order),bspline_order);
% Create a Functional Parameter Object (FPO) that penalises the roughness
% of growth of acceleration of each beta coefficient function by using the
% second/third/fourth/fifth/sixth/seventh derivative in the roughness
% penalty (This is equivalent to creating 'betafdPar' in section 9.4.2 of
% 'Functional Data Analysis with R and MATLAB.pdf' - see page 137). The
% smoothing parameter, lambda, was initially set to 0.01 here based on the
% growth curves considered in section 5.2.4 of 'Functional Data Analysis
% with R and MATLAB.pdf', but has since been refined based on
% 'leave-one-out' cross validation scores for different lambda values (see
% below cross-validation scores for lambda section below). This enables
% smoothness to be imposed on estimated functional parameters (see section
% 5.2.4 of 'Functional Data Analysis with R and MATLAB.pdf'). Aim is to
% smooth the data using a very light smoothing procedure (using many basis
% functions and no or a very small penalty parameter) and use these
% smoothed curves for my analysis:
if signal_alignment == 1
beta_functional_parameter_object.(TLC_stages{stage}) = fdPar(beta_functional_basis_object.(TLC_stages{stage}),2,10^4.8);
else
beta_functional_parameter_object.(TLC_stages{stage}) = fdPar(beta_functional_basis_object.(TLC_stages{stage}),2,10^4.8);
end
% Plot the B-spline basis functions:
figure_name = ['Functional basis system for the beta coefficients used in functional regression analysis - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(beta_functional_basis_object.(TLC_stages{stage}));
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Preallocate empty cell array for storing beta basis systems:
beta_basis_systems.(TLC_stages{stage}) = cell(1,(numel(model_indicator_subset) + 1));
% Create beta coefficient basis system cell array from constant basis
% system and b-spline basis systems (this is equivalent to the 'betalist'
% variable created on page 134, section 9.4.1 of 'Functional Data Analysis
% with R and MATLAB.pdf'). Here the constant basis system accounts for
% 'alpha', the multiplier of the constant intercept covariate set up
% as the first element in 'covariates' below (see page 134):
for i = 1:(numel(model_indicator_subset) + 1)
if i == 1
beta_basis_systems.(TLC_stages{stage}){i} = conbasis.(TLC_stages{stage});
else
beta_basis_systems.(TLC_stages{stage}){i} = beta_functional_parameter_object.(TLC_stages{stage});
end
end
%% Build functional data object for each model component
% Generate sequence of log lambda values to evaluate smoothness fit:
log_lambda_patent_indicators.(TLC_stages{stage}) = -5:0.5:12;
number_lambda_values_patent_indicators.(TLC_stages{stage}) = length(log_lambda_patent_indicators.(TLC_stages{stage}));
% Preallocate empty double and cell arrays for storing degrees of freedom
% and generalised cross-validation coefficients (i.e. 'dfsave' and
% 'gcvsave' from section 5.3 of 'Functional Data Analysis with R and
% MATLAB.pdf'):
indicator_fit_df.(TLC_stages{stage}) = zeros(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
indicator_fit_gcv.(TLC_stages{stage}) = zeros(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
indicator_fit_functional_parameter_object.(TLC_stages{stage}) = cell(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
curve_fit_gcv.(TLC_stages{stage}) = cell(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
% Preallocate empty cell arrays for storing values assessing the fit of the
% functional data objects created:
patent_indicator_FDO_values.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_residuals.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_variance_across_technologies.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_variance_across_time.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_standard_deviations_line.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_standard_deviations_line_fit.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
% Use resampled, time normalised, medoid aligned technology profiles to
% build functional data object for each model component:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Create b-spline basis system for the current model component (i.e.
% patent indicator), using the same time normalised functions as above:
patent_indicator_functional_basis_object.(TLC_stages{stage}){i} = create_bspline_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],(max(tvec_normalised_scaled.(TLC_stages{stage})) - 2 + bspline_order),bspline_order);
% Conduct generalised cross-validation scoring of possible values of
% the smoothing parameter, lambda, to use in patent indicator
% functional parameter object for final functional regression analysis.
% This is equivalent to the example lambda generalised cross-validation
% scoring carried out in section 5.3 of 'Functional Data Analysis with
% R and MATLAB.pdf'
% Iterate through possible smoothing parameter (lambda) values for each
% patent indicator included for model building purposes and calculate
% degrees of freedom and generalised cross-validation criterion:
for j = 1:number_lambda_values_patent_indicators.(TLC_stages{stage})
% Select current lambda value:
lambda_patent_indicators.(TLC_stages{stage}) = 10^(log_lambda_patent_indicators.(TLC_stages{stage})(j));
% Create a Functional Parameter Object (FPO) that penalises the
% roughness of growth of acceleration by using the
% second/third/fourth/fifth/sixth/seventh derivative in the
% roughness penalty with the current smoothing parameter (lambda):
indicator_fit_functional_parameter_object.(TLC_stages{stage}){j,i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,lambda_patent_indicators.(TLC_stages{stage}));
[~,indicator_fit_df.(TLC_stages{stage})(j,i),curve_fit_gcv.(TLC_stages{stage}){j,i}] = smooth_basis(tvec_normalised_scaled.(TLC_stages{stage}),time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),indicator_fit_functional_parameter_object.(TLC_stages{stage}){j,i},'method','qr');
% Sum generalised cross-validation coefficients together for all
% replicants included in the current patent indicator fit:
indicator_fit_gcv.(TLC_stages{stage})(j,i) = sum(curve_fit_gcv.(TLC_stages{stage}){j,i});
end
% Plot the degrees of freedom for possible smoothing parameters to use
% in creating a functional parameter object for the current patent
% indicator:
figure_name = ['Degrees of freedom for functional parameter object smoothing parameters to fit ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(log_lambda_patent_indicators.(TLC_stages{stage}),indicator_fit_df.(TLC_stages{stage})(:,i),'ro-');
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Log 10 of smoothing parameter, lambda','FontSize',12)
ylabel('Degrees of freedom','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Plot the generalised cross-validation scores for possible smoothing
% parameters to use in creating a functional parameter object for the
% current patent indicator:
figure_name = ['Generalised cross-validation scores for ',patent_indicator_column_names{model_component},' functional parameter object smoothing parameter - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(log_lambda_patent_indicators.(TLC_stages{stage}),indicator_fit_gcv.(TLC_stages{stage})(:,i),'ro-');
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Log 10 of smoothing parameter, lambda','FontSize',12)
ylabel('Generalised cross-validation criterion value','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Create functional parameter object and functional data object for
% current model component (i.e. patent indicator), using the same time
% normalised functions as above and smoothing parameter, lambda, based
% on the generalised cross-validation criterion values plotted above
% (NB: currently need to re-run script to adjust these values - see
% screenshots for analysis of curves and selection of lambda values
% used below). This enables smoothness to be imposed on estimated
% functional parameters (see section 5.2.4 of 'Functional Data Analysis
% with R and MATLAB.pdf'). Aim is to smooth the data using a very light
% smoothing procedure (using many basis functions and no or a very
% small penalty parameter) and use these smoothed curves for functional
% regression analysis:
if signal_alignment == 1
patent_indicator_functional_parameter_object.(TLC_stages{stage}){i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,100);
else
patent_indicator_functional_parameter_object.(TLC_stages{stage}){i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,100);
end
[patent_indicator_FDO.(TLC_stages{stage}){i},patent_indicator_df.(TLC_stages{stage}){i},patent_indicator_gcv.(TLC_stages{stage}){i},patent_indicator_beta.(TLC_stages{stage}){i},patent_indicator_SSE.(TLC_stages{stage}){i},patent_indicator_penmat.(TLC_stages{stage}){i},patent_indicator_y2cMap.(TLC_stages{stage}){i}] = smooth_basis(tvec_normalised_scaled.(TLC_stages{stage}),time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),patent_indicator_functional_parameter_object.(TLC_stages{stage}){i},'fdnames',{technology_fdnames{1},technology_fdnames{2}{1},technology_fdnames{3}{1}},'method','qr');
% [patent_indicator_FDO.(TLC_stages{stage}){i},patent_indicator_WFD.(TLC_stages{stage}){i},patent_indicator_FSTR.(TLC_stages{stage}){i}] = smooth_pos(tvec_normalised_scaled.(TLC_stages{stage}),time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),patent_indicator_functional_parameter_object.(TLC_stages{stage}){i});
% Plot the medoid and time normalised functional data object for the
% current patent indicator:
figure_name = ['Functional Data Object for all technology profiles based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(patent_indicator_FDO.(TLC_stages{stage}){i});
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
%
ylabel(['Canonical weight for patent indicator ',num2str(model_indicator_subset(j))],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
else
cca.(TLC_stages{stage}){i,j} = cca.(TLC_stages{stage}){j,i};
end
end
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Create cell array of covariate functions for use in functional regression analysis
% Preallocate empty cell array for storing covariate functions:
covariates.(TLC_stages{stage}) = cell(1,(numel(model_indicator_subset) + 1));
% Generate constant 'covariate' vector of value 1 (to be first element in
% the overall covariate matrix):
constant_covariate.(TLC_stages{stage}) = ones(numel(technology.(TLC_stages{stage})),1);
% Create covariates matrix combining constant covariate with medoid and
% time normalised functional data objects generated for each patent
% indicator considered in the model (this is the equivalent of the
% 'templist' cell array created on page 134, section 9.4 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
for i = 1:(numel(model_indicator_subset) + 1)
if i == 1
covariates.(TLC_stages{stage}){i} = constant_covariate.(TLC_stages{stage});
else
covariates.(TLC_stages{stage}){i} = patent_indicator_FDO.(TLC_stages{stage}){i - 1};
end
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Generate plot of cross-validation scores for different values of lambda (the smoothing parameter) to use in beta basis system
%
% % This cross-validation scoring of possible values of the smoothing
% % parameter, lambda, to use in beta basis system for final functional
% % regression analysis is equivalent to the example lambda cross-validation
% % scoring carried out in section 9.4.3 of 'Functional Data Analysis with R
% % and MATLAB.pdf'
%
% % Generate sequence of log lambda values to evaluate:
% % log_lambda.(TLC_stages{stage}) = -5:1:10;
% log_lambda.(TLC_stages{stage}) = 3:0.1:8;
% % log_lambda.(TLC_stages{stage}) = 4.0;
% number_lambda_values.(TLC_stages{stage}) = length(log_lambda.(TLC_stages{stage}));
%
% % Preallocate empty double arrays for storing error sum of squares
% % cross-validation scores (i.e. SSE.CV from section 9.4.3 of 'Functional
% % Data Analysis with R and MATLAB.pdf'):
% SSE_lambda_cross_validation_score.(TLC_stages{stage}) = zeros(number_lambda_values.(TLC_stages{stage}),1);
%
% % Iterate through possible beta basis systems for each patent indicator
% % included for model building purposes with different smoothing parameter
% % (lambda) values and calculate cross-validation score:
% for j = 1:number_lambda_values.(TLC_stages{stage})
% % Select current lambda value:
% lambda.(TLC_stages{stage}) = 10^(log_lambda.(TLC_stages{stage})(j))
%
% % Reset beta basis system to match default beta basis system:
% current_beta_basis_systems = beta_basis_systems.(TLC_stages{stage});
%
% for i = 1:numel(model_indicator_subset)
% % Substitute current lambda value into beta basis system:
% current_beta_basis_systems{1,i+1} = putlambda(current_beta_basis_systems{1,i+1},lambda.(TLC_stages{stage}));
% end
%
% % Compute leave-one-out cross-validated error sum of squares (SSE) for
% % functional responses (see section 10.6.2 of 'Functional Data Analysis
% % with R and MATLAB.pdf'):
% current_SSE_lambda_cross_validation_score = fRegress_CV(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),current_beta_basis_systems);
%
% % Add the current error sum of squares cross-validation score (i.e.
% % SSE.CV from section 9.4.3 of 'Functional Data Analysis with R and
% % MATLAB.pdf') to the corresponding array:
% SSE_lambda_cross_validation_score.(TLC_stages{stage})(j) = current_SSE_lambda_cross_validation_score;
% end
%
% %% Plot the cross-validation scores for log of the smoothing parameters evaluated for use in the beta basis system for the current patent indicator
% for i = 1:numel(model_indicator_subset)
% % Select the current component of the selected model indicator subset:
% model_component = model_indicator_subset(i);
%
% % Plot the cross-validation scores for log of the smoothing parameters
% % evaluated for use in the beta basis system for the current patent
% % indicator:
% figure_name = ['Cross-validation scores for the ',patent_indicator_column_names{model_component},' beta basis system smoothing parameter - ',TLC_stages{stage}];
% figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
% plot(log_lambda.(TLC_stages{stage}),SSE_lambda_cross_validation_score.(TLC_stages{stage})(:),'ro-');
% hold on
%
% % Add title, X, and Y labels to subplot:
% title(figure_name);
% xlabel('Log 10 of smoothing parameter, lambda','FontSize',12)
% ylabel('Cross-validation score','FontSize',12)
%
% % Set plots to be invisible now, but visible when opened later:
% set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
%
% % Save figure:
% saveas(gcf,figure_name,'fig')
% hold off
% end
%% Run functional regression analysis using known cluster IDs, covariate functions, and beta basis system objects
% The functional linear model built in this section and the sections below
% are based on a set of scalar response values (Y), so are based on chapter
% 9 of 'Functional Data Analysis with R and MATLAB.pdf' (as opposed to
% chapter 10 which relates to 'Linear models for functional responses')
% Run functional regression analysis to identify functional (beta)
% coefficients (see page 137, section 9.4.2 of 'Functional Data Analysis
% with R and MATLAB.pdf') using identified smoothing parameters (see
% section 9.4.3 of 'Functional Data Analysis with R and MATLAB.pdf'):
functional_regression.(TLC_stages{stage}) = fRegress(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),beta_basis_systems.(TLC_stages{stage}));
% Extract the estimated beta functional coefficients, predicted Y values,
% and residuals from the functional regression analysis:
estimated_beta_coefficients.(TLC_stages{stage}) = functional_regression.(TLC_stages{stage}).betahat;
estimated_beta_coefficients_values.(TLC_stages{stage}) = cell(numel(estimated_beta_coefficients.(TLC_stages{stage})),1);
estimated_beta_functional_data_object.(TLC_stages{stage}) = cell(numel(estimated_beta_coefficients.(TLC_stages{stage})),1);
for i = 1:numel(estimated_beta_coefficients.(TLC_stages{stage}))
estimated_beta_coefficients_values.(TLC_stages{stage}){i} = getcoef(estimated_beta_coefficients.(TLC_stages{stage}){i});
estimated_beta_functional_data_object.(TLC_stages{stage}){i} = getfd(estimated_beta_coefficients.(TLC_stages{stage}){i});
end
predicted_values.(TLC_stages{stage}) = functional_regression.(TLC_stages{stage}).yhat;
residuals.(TLC_stages{stage}) = technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - predicted_values.(TLC_stages{stage});
% Determine the variance (SigmaE_squared = SigmaE. from page 141, section
% 9.4.4 of 'Functional Data Analysis with R and MATLAB.pdf') associated
% with the functional regression fit:
SigmaE_squared.(TLC_stages{stage}) = sum(residuals.(TLC_stages{stage}).^2) / (numel(technology.(TLC_stages{stage})) - functional_regression.(TLC_stages{stage}).df);
% Create the variance matrix (i.e. SigmaE from page 141, section 9.4.4 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
SigmaE.(TLC_stages{stage}) = SigmaE_squared.(TLC_stages{stage}) * diag(ones(numel(technology.(TLC_stages{stage})),1));
% Plot the estimate of the constant regression coefficient:
figure_name = ['Estimated regression coefficient for the constant functional basis system - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(estimated_beta_functional_data_object.(TLC_stages{stage}){1});
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Beta for the constant functional basis system','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Preallocate empty cell array for storing standard error estimates for
% regression coefficient functions estimated by FREGRESS:
standard_error_estimates.(TLC_stages{stage}) = cell((numel(estimated_beta_coefficients.(TLC_stages{stage})) - 1),1);
% Plot the estimate of the regression function for the medoid and time
% normalised functional data objects generated for each patent indicator
% considered in the model (see page 134, section 9.4.1 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
for i = 1:(numel(estimated_beta_coefficients.(TLC_stages{stage})) - 1)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Select the corresponding matrix mapping the raw data vector 'y' to
% the coefficient vector 'c' of the basis function expansion of 'x'
% (see page 61, section 5.1 of 'Functional Data Analysis with R and
% MATLAB.pdf' for explanation of y2cMap):
current_y2cMap = patent_indicator_y2cMap.(TLC_stages{stage}){i};
% Compute the standard error estimates for regression coefficient
% functions estimated by FREGRESS:
standard_error_estimates.(TLC_stages{stage}){i+1} = fRegress_stderr(functional_regression.(TLC_stages{stage}),current_y2cMap,SigmaE.(TLC_stages{stage}));
% Extract the function curves which are plus and minus two times the
% standard error of the regression coefficient function, to obtain the
% approximate regression function (i.e. beta) confidence bounds:
beta_standard_error_estimates.(TLC_stages{stage}) = standard_error_estimates.(TLC_stages{stage}){i+1}.betastderr;
current_beta_standard_error_estimate = beta_standard_error_estimates.(TLC_stages{stage}){i+1};
% Plot the current regression coefficient along with 95% confidence
% bounds:
figure_name = ['Estimated regression coefficient for predicting technology cluster from ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(estimated_beta_functional_data_object.(TLC_stages{stage}){i+1});
line(estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}+times(current_beta_standard_error_estimate,2));
line(estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}-times(current_beta_standard_error_estimate,2));
% Resize y-axis to fit the confidence bounds curves:
ylim('auto');
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel(['Beta for ',patent_indicator_column_names{model_component}],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
% Calculate R-Squared, adjusted R-Squared, and F-ratio statistics to assess
% the fit of the regression model (where 'error_sum_of_squares_Y' (or
% SSE_Y) is the total sum of squares for Y, and
% 'error_sum_of_squares_residuals' (or SSE_RES) is the total sum of squares
% for residuals) - see sections 9.4.1 and 9.4.2 of 'Functional Data
% Analysis with R and MATLAB.pdf':
error_sum_of_squares_Y.(TLC_stages{stage}) = sum((technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - mean(technology_data_filtered.known_cluster_id.(TLC_stages{stage}))).^2);
error_sum_of_squares_residuals.(TLC_stages{stage}) = sum((residuals.(TLC_stages{stage})).^2);
R_squared.(TLC_stages{stage}) = (error_sum_of_squares_Y.(TLC_stages{stage}) - error_sum_of_squares_residuals.(TLC_stages{stage})) / error_sum_of_squares_Y.(TLC_stages{stage});
adjusted_R_squared.(TLC_stages{stage}) = 1 - ((1 - R_squared.(TLC_stages{stage})) * ((numel(technology.(TLC_stages{stage})) - 1) / (numel(technology.(TLC_stages{stage})) - numel(model_indicator_subset) - 1)));
degrees_freedom_1 = functional_regression.(TLC_stages{stage}).df - 1;
degrees_freedom_2 = numel(technology.(TLC_stages{stage})) - functional_regression.(TLC_stages{stage}).df;
F_ratio.(TLC_stages{stage}) = ((error_sum_of_squares_Y.(TLC_stages{stage}) - error_sum_of_squares_residuals.(TLC_stages{stage})) / degrees_freedom_1) / (error_sum_of_squares_residuals.(TLC_stages{stage}) / degrees_freedom_2);
%% Benchmark results of functional regression against regression functions obtained using a low-dimensional basis system for the beta coefficients
% Reset beta basis system to match default beta basis system:
low_dimensional_beta_basis_systems.(TLC_stages{stage}) = beta_basis_systems.(TLC_stages{stage});
% Create low-dimensional b-spline basis system for beta coefficients (see
% section 9.4.1. of 'Functional Data Analysis with R and MATLAB.pdf'):
low_dimensional_beta_functional_basis_object.(TLC_stages{stage}) = create_bspline_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],5);
% Substitute low-dimensional basis system into beta basis system:
for i = 1:numel(model_indicator_subset)
low_dimensional_beta_basis_systems.(TLC_stages{stage}){1,i+1} = low_dimensional_beta_functional_basis_object.(TLC_stages{stage});
end
% Re-run functional regression analysis to identify functional (beta)
% coefficients (see page 134, section 9.4.1 of 'Functional Data Analysis
% with R and MATLAB.pdf') using low-dimensional basis systems for the beta
% coefficients:
low_dimensional_functional_regression.(TLC_stages{stage}) = fRegress(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),low_dimensional_beta_basis_systems.(TLC_stages{stage}));
% Extract the estimated beta coefficients, predicted Y values, and
% residuals when using a low-dimensional basis system for the beta
% coefficients:
low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}) = low_dimensional_functional_regression.(TLC_stages{stage}).betahat;
low_dimensional_estimated_beta_coefficients_values.(TLC_stages{stage}) = cell(numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})),1);
low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}) = cell(numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})),1);
for i = 1:numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}))
low_dimensional_estimated_beta_coefficients_values.(TLC_stages{stage}){i} = getcoef(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}){i});
low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = getfd(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}){i});
end
low_dimensional_predicted_values.(TLC_stages{stage}) = low_dimensional_functional_regression.(TLC_stages{stage}).yhat;
low_dimensional_residuals.(TLC_stages{stage}) = technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - low_dimensional_predicted_values.(TLC_stages{stage});
% Determine the variance (SigmaE_squared = SigmaE. from page 141, section
% 9.4.4 of 'Functional Data Analysis with R and MATLAB.pdf') associated
% with the low-dimensional functional regression fit:
low_dimensional_SigmaE_squared.(TLC_stages{stage}) = sum(low_dimensional_residuals.(TLC_stages{stage}).^2) / (numel(technology.(TLC_stages{stage})) - low_dimensional_functional_regression.(TLC_stages{stage}).df);
% Create the variance matrix (i.e. SigmaE from page 141, section 9.4.4 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
low_dimensional_SigmaE.(TLC_stages{stage}) = low_dimensional_SigmaE_squared.(TLC_stages{stage}) * diag(ones(numel(technology.(TLC_stages{stage})),1));
% Plot the low-dimensional estimate of the constant regression coefficient:
figure_name = ['Low-dimensional estimate of the regression coefficient for the constant functional basis system - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}){1});
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Beta for the constant functional basis system','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Preallocate empty cell array for storing standard error estimates for the
% low dimensional regression coefficient functions estimated by FREGRESS:
low_dimensional_standard_error_estimates.(TLC_stages{stage}) = cell((numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})) - 1),1);
% Plot the estimate of the low-dimensional regression function for the
% medoid and time normalised functional data objects generated for each
% patent indicator considered in the model (see page 134, section 9.4.1 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
for i = 1:(numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})) - 1)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Select the corresponding matrix mapping the raw data vector 'y' to
% the coefficient vector 'c' of the basis function expansion of 'x'
% (see page 61, section 5.1 of 'Functional Data Analysis with R and
% MATLAB.pdf' for explanation of y2cMap):
current_y2cMap = patent_indicator_y2cMap.(TLC_stages{stage}){i};
% Compute the standard error estimates for low-dimensional regression
%
ages{stage}))+((i-1)*fPCA_num_kept_components.(TLC_stages{stage}))) = array2table(fPCA_patent_indicator.(TLC_stages{stage}){i}.harmscr);
% Assemble string describing the components included for the
% current patent indicator (for use in building factor description
% for 'fitlm' - see below):
fPCA_components.(TLC_stages{stage}){i} = strjoin(fPCA_variable_names.(TLC_stages{stage})((1:fPCA_num_kept_components.(TLC_stages{stage}))+((i-1)*fPCA_num_kept_components.(TLC_stages{stage}))),' + ');
end
% Assign headings to table variables:
fPCA_table.(TLC_stages{stage}).Properties.VariableNames = fPCA_variable_names.(TLC_stages{stage});
% Append response variable Y as the last column in the table (taken as
% default in 'fitlm' to be the response variable):
fPCA_table.(TLC_stages{stage}).Responses = technology_data_filtered.known_cluster_id.(TLC_stages{stage});
% Construct string describing the factors to include in the linear
% regression model using 'Wilkinson Notation' (see Matlab reference page
% for fitlm for explanation of this notation):
fPCA_factor_equation.(TLC_stages{stage}) = ['Responses ~ (',strjoin(fPCA_components.(TLC_stages{stage}),') + ('),')'];
% Fit linear regression model based on the principal component scores
% stored in the predictor table (equivalent of 'pcamodel' on page 143,
% section 9.4.5 of 'Functional Data Analysis with R and MATLAB.pdf'):
fPCA_model.(TLC_stages{stage}) = fitlm(fPCA_table.(TLC_stages{stage}),fPCA_factor_equation.(TLC_stages{stage}));
% Extract functional principal component analysis coefficients for building
% linear regression model (equivalent of 'pcacoefs' on page 143, section
% 9.4.5 of 'Functional Data Analysis with R and MATLAB.pdf'):
fPCA_coefficients.(TLC_stages{stage}) = fPCA_model.(TLC_stages{stage}).Coefficients;
% Determine the variance of the coefficients from the fPCA coefficients
% (equivalent of 'coefvar' on page 143, section 9.4.5 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
fPCA_coefficient_variance.(TLC_stages{stage}) = fPCA_coefficients.(TLC_stages{stage}){:,2}.^2;
% Create the corresponding functional data objects for the fPCA regression
% functions:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Combine the fPCA coefficients with the fPCA harmonic functions for
% the current patent indicator (equivalent of 'betafd' on page 143,
% section 9.4.5 of 'Functional Data Analysis with R and MATLAB.pdf'):
for j = 1:(fPCA_num_kept_components.(TLC_stages{stage}))
fPCA_estimated_beta_functional_data_object_harmonics.(TLC_stages{stage}){i,j} = times(fPCA_coefficients.(TLC_stages{stage}){j+1+((i-1)*fPCA_num_kept_components.(TLC_stages{stage})),1},fPCA_harmonics.(TLC_stages{stage}){i}(j));
if j == 1
fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = fPCA_estimated_beta_functional_data_object_harmonics.(TLC_stages{stage}){i,j};
else
fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i} + fPCA_estimated_beta_functional_data_object_harmonics.(TLC_stages{stage}){i,j};
end
end
% Combine the fPCA coefficient variances with the square of the fPCA
% harmonic functions for the current patent indicator (equivalent of
% 'betavar' on page 143, section 9.4.5 of 'Functional Data Analysis
% with R and MATLAB.pdf'):
for j = 1:(fPCA_num_kept_components.(TLC_stages{stage}))
fPCA_beta_variance_harmonics.(TLC_stages{stage}){i,j} = times(fPCA_coefficient_variance.(TLC_stages{stage})(j+1+((i-1)*fPCA_num_kept_components.(TLC_stages{stage}))),power(fPCA_harmonics.(TLC_stages{stage}){i}(j),2));
if j == 1
fPCA_beta_variance.(TLC_stages{stage}){i} = fPCA_beta_variance_harmonics.(TLC_stages{stage}){i,j};
else
fPCA_beta_variance.(TLC_stages{stage}){i} = fPCA_beta_variance.(TLC_stages{stage}){i} + fPCA_beta_variance_harmonics.(TLC_stages{stage}){i,j};
end
end
% Plot the current fPCA regression coefficient along with 95% confidence
% bounds:
figure_name = ['Estimated fPCA regression coefficient for predicting technology cluster from ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i});
line(fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i}+times(sqrt(fPCA_beta_variance.(TLC_stages{stage}){i}),2));
line(fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i}-times(sqrt(fPCA_beta_variance.(TLC_stages{stage}){i}),2));
% Resize y-axis to fit the confidence bounds curves:
ylim('auto');
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel(['Beta for ',patent_indicator_column_names{model_component}],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Set figures generated to be visible again by default:
set(0,'DefaultFigureVisible','on')
%% Apply permutation testing to the regression functions estimated by the high-dimensional functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the high-dimensional functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the high-dimensional functional regression model (see help
% on 'Fperm_fd' for details, along with section 9.5 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Apply permutation testing to the regression functions estimated by the low-dimensional functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the low-dimensional functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the low-dimensional functional regression model (see help
% on 'Fperm_fd' for details, along with section 9.5 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
low_dimensional_F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),low_dimensional_beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Apply permutation testing to the regression functions estimated by the constant basis system functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the constant basis system functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the constant basis system functional regression model (see
% help on 'Fperm_fd' for details, along with section 9.5 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
conbasis_F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),conbasis_beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Apply permutation testing to the regression functions estimated by the monomial basis system functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the monomial basis system functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the monomial basis system functional regression model (see
% help on 'Fperm_fd' for details, along with section 9.5 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
monbasis_F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),monbasis_beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc