Script 4: functional_data_analysis.m
% This script is used to conduct functional data analysis (as an extension
% of the statistical analysis conducted using the
% 'statistical_analysis_segmented_clustering.m' file) on each of the patent
% indicators and patent indicator sets extracted for each technology using
% the 'extract_patent_metrics.m' script, which have subsequently been
% compiled in the 'Extracted patent indicators.xlsx' spreadsheet.
clearvars
clear all
close all
tic()
% Load the results and variables used in the already completed statistical
% analysis:
load('statistical_analysis.mat');
% Specify the Technology Life Cycle stage to consider during this
% functional data analysis process:
% 1 = all TLC stages
% 2 = emergence
% 3 = growth
% 4 = maturity
% 5 = decline
stage = 2;
% Specify patent indicator subset to use for model building purposes from
% list of top performing indicator subsets identified from 'leave-half-out'
% and/or 'leave-one-out' cross-validation analysis:
model_indicator_subset = [4,6];
% model_indicator_subset = [6,7];
% model_indicator_subset = [3,9];
% Option to use aligned or unaligned time signals (1 = aligned, 0 =
% unaligned)
signal_alignment = 0;
% Specify order of B-splines to use in basis functions:
bspline_order = 6;
% Preallocate empty cell array for storing names of dimension labels:
technology_fdnames = cell(1,3);
technology_labels = cell(1,2);
patent_indicator_labels = cell(1,2);
% Assign label names to the problem dimensions considered (see section
% 4.1.2 of 'Functional Data Analysis with R and MATLAB.pdf'):
technology_labels{1} = 'Technology';
technology_labels{2} = technology.(TLC_stages{stage});
patent_indicator_labels{1} = 'Patent indicator';
patent_indicator_labels{2} = patent_indicator_column_names;
technology_fdnames{1} = 'Time (years)';
technology_fdnames{2} = technology_labels;
technology_fdnames{3} = patent_indicator_labels;
% Set figures generated to be invisible by default:
set(0,'DefaultFigureVisible','off')
% Determine screensize so that figures are scaled to the right size for the
% current monitor (scrsz == screen size vector [left, bottom, width,
% height])
scrsz = get(0,'ScreenSize');
% Preallocate empty double and cell arrays for storing medoid alignment
% data:
model_medoid_locations_idx.(TLC_stages{stage}) = zeros(num_clusters,numel(model_indicator_subset));
model_realigned_cluster_ID.(TLC_stages{stage}) = zeros(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
model_cluster_labels.(TLC_stages{stage}) = zeros(num_clusters,numel(model_indicator_subset));
medoid_technology_mapping.(TLC_stages{stage}) = zeros(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
model_warping_paths_A_B.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
model_warping_paths_B_A.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
medoid_aligned_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
unaligned_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
medoid_aligned_time_normalised_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
unaligned_time_normalised_signals.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
% Preallocate empty cell arrays for storing functional data constructs:
functional_basis_object.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
functional_parameter_object.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_FDO.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_df.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_gcv.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_beta.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_SSE.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_penmat.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_y2cMap.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_WFD.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
technology_FSTR.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
time_normalised_values.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
mean_technology_values.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
mean_technology_function.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_functional_basis_object.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_functional_parameter_object.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_FDO.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_df.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_gcv.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_beta.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_SSE.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_penmat.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_y2cMap.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_WFD.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
patent_indicator_FSTR.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
%% Build functional models of the technology profiles mapped to each patent indicator included in subset selected for model building purposes
% Build functional models of the technology profiles mapped to each patent
% indicator included in subset selected for model building purposes:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Extract medoid indices, realigned cluster IDs, and group labels
% corresponding to the current component of the selected model
% indicator subset:
model_medoid_locations_idx.(TLC_stages{stage})(:,i) = medoid_locations_idx_subset.(TLC_stages{stage})(:,model_component);
model_realigned_cluster_ID.(TLC_stages{stage})(:,i) = realigned_cluster_ID_subsets.(TLC_stages{stage})(:,model_component);
model_cluster_labels.(TLC_stages{stage})(:,i) = model_realigned_cluster_ID.(TLC_stages{stage})(model_medoid_locations_idx.(TLC_stages{stage})(:,i),i);
% Map medoid technology indices across to realigned cluster IDs:
medoid_technology_mapping.(TLC_stages{stage})(:,i) = model_realigned_cluster_ID.(TLC_stages{stage})(:,i);
for j = 1:num_clusters
medoid_technology_mapping.(TLC_stages{stage})(medoid_technology_mapping.(TLC_stages{stage})(:,i) == model_cluster_labels.(TLC_stages{stage})(j,i),i) = model_medoid_locations_idx.(TLC_stages{stage})(j,i);
end
% Generate figure for plotting aligned technology time series clusters:
if signal_alignment == 1
figure_name = ['Medoid aligned technology profiles for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
else
figure_name = ['Technology profiles for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
end
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Use medoid technology mapping to select the Dynamic Time Warping
% paths that align each technology to its respective cluster:
for j = 1:numel(technology.(TLC_stages{stage}))
% Select the current indicator set for analysis:
current_technology_data = technology_data_filtered.(TLC_stages{stage}).(technology.(TLC_stages{stage}){j});
% Select the Dynamic Time Warping path for the current technology
% and medoid alignment:
model_warping_paths_A_B.(TLC_stages{stage}){j,i} = current_technology_warping_path.(TLC_stages{stage}){j,medoid_technology_mapping.(TLC_stages{stage})(j,i),model_component};
model_warping_paths_B_A.(TLC_stages{stage}){j,i} = comparison_technology_warping_path.(TLC_stages{stage}){j,medoid_technology_mapping.(TLC_stages{stage})(j,i),model_component};
% Compile matrices of medoid aligned and unaligned signals:
medoid_aligned_signals.(TLC_stages{stage}){j,i} = [model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component)];
unaligned_signals.(TLC_stages{stage}){j,i} = [(1:size(current_technology_data,1))',current_technology_data(:,model_component)];
% Compile matrices of medoid aligned and unaligned time normalised
% signals:
medoid_aligned_time_normalised_signals.(TLC_stages{stage}){j,i} = [model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)/max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component)];
unaligned_time_normalised_signals.(TLC_stages{stage}){j,i} = [(1:size(current_technology_data,1))'/size(current_technology_data,1),current_technology_data(:,model_component)];
% Plot the medoid aligned technologies for the current component of
% the selected modelling indicator subset:
if model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 1
% Plot the medoid aligned technology profiles without any
% further time normalisation:
if signal_alignment == 1
subplot(2,1,1), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'r-');
else
subplot(2,1,1), plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'r-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (medoid aligned time)']);
else
title([figure_name,' (real-time)']);
end
xlabel('Time (years)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Plot the medoid aligned and time normalised technology
% profiles:
if signal_alignment == 1
subplot(2,1,2), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)/max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'r-');
else
subplot(2,1,2), plot(unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,2),'r-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 2
% Plot the medoid aligned technology profiles without any
% further time normalisation:
if signal_alignment == 1
subplot(2,1,1), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'b-');
else
subplot(2,1,1), plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'b-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (medoid aligned time)']);
else
title([figure_name,' (real-time)']);
end
xlabel('Time (years)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Plot the medoid aligned and time normalised technology
% profiles:
if signal_alignment == 1
subplot(2,1,2), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)/max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'b-');
else
subplot(2,1,2), plot(unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,2),'b-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 3
% Plot the medoid aligned technology profiles without any
% further time normalisation:
if signal_alignment == 1
subplot(2,1,1), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'g-');
else
subplot(2,1,1), plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'g-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (medoid aligned time)']);
else
title([figure_name,' (real-time)']);
end
xlabel('Time (years)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Plot the medoid aligned and time normalised technology
% profiles:
if signal_alignment == 1
subplot(2,1,2), plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)/max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'g-');
else
subplot(2,1,2), plot(unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_time_normalised_signals.(TLC_stages{stage}){j,i}(:,2),'g-');
end
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
end
end
% Plot the medoid time series for the current patent indicator group:
for j = 1:num_clusters
if signal_alignment == 1
subplot(2,1,1), plot(model_warping_paths_B_A.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:),...
technology_data_filtered.(TLC_stages{stage}).(technology.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i)})(model_warping_paths_A_B.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:),model_component),'co','MarkerSize',7,'LineWidth',1.5);
subplot(2,1,2), plot((1:numel(model_warping_paths_B_A.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:)))/max(model_warping_paths_A_B.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:)),...
technology_data_filtered.(TLC_stages{stage}).(technology.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i)})(model_warping_paths_A_B.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:),model_component),'co','MarkerSize',7,'LineWidth',1.5);
else
subplot(2,1,1), plot(unaligned_signals.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:,1),...
unaligned_signals.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:,2),'co','MarkerSize',7,'LineWidth',1.5);
subplot(2,1,2), plot(unaligned_time_normalised_signals.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:,1),...
unaligned_time_normalised_signals.(TLC_stages{stage}){model_medoid_locations_idx.(TLC_stages{stage})(j,i),i}(:,2),'co','MarkerSize',7,'LineWidth',1.5);
end
end
% Add legend to the technology time series clusters:
legend(strrep(technology.(TLC_stages{stage}),'_',' '),'FontSize',12,'Location','northwest');
% Set figures to be visible when opened later:
set(gcf,'CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Determine the max number of sampling points from the different
% technology profiles for the current model component:
if signal_alignment == 1
num_resample_points.(TLC_stages{stage}) = max(max(cellfun(@numel,model_warping_paths_A_B.(TLC_stages{stage}))));
else
num_resample_points.(TLC_stages{stage}) = max(max(cellfun(@numel,unaligned_signals.(TLC_stages{stage}))/2));
end
for j = 1:numel(technology.(TLC_stages{stage}))
% Select the current indicator set for analysis:
current_technology_data = technology_data_filtered.(TLC_stages{stage}).(technology.(TLC_stages{stage}){j});
% Create b-spline basis system of functional datasets. The time
% intervals used (defined in the function/help file as 'rangeval',
% and specified by a lower and upper time limit) are taken from
% real-world time periods rather than normalised times (i.e.
% between 0 and 1), to ensure that the length of each spline
% subinterval is equal to 1, avoiding any rounding errors when
% using large numbers of spline basis functions (see section 3.3.5
% of 'Functional Data Analysis with R and MATLAB.pdf'). As such,
% the upper time limit is taken as the total duration (in years) of
% the technology dataset being considered, which is potentially
% equal to the number of annual observations for the technology
% concerned (i.e. size(current_technology_data,1)) since patent
% records are provided on an annual basis anyway. This means that
% each spline subinterval will be equal to one year:
if signal_alignment == 1
tvec.(TLC_stages{stage}) = medoid_aligned_signals.(TLC_stages{stage}){j,i}(:,1);
else
tvec.(TLC_stages{stage}) = unaligned_signals.(TLC_stages{stage}){j,i}(:,1);
end
% functional_basis_object.(TLC_stages{stage}){j,i} = create_bspline_basis([1,max(tvec.(TLC_stages{stage}))],(max(tvec.(TLC_stages{stage})) - 2 + bspline_order),bspline_order,tvec.(TLC_stages{stage}));
% functional_basis_object.(TLC_stages{stage}){j,i} = create_bspline_basis([min(tvec.(TLC_stages{stage})),max(tvec.(TLC_stages{stage}))],(numel(tvec.(TLC_stages{stage})) - 2 + bspline_order),bspline_order,tvec.(TLC_stages{stage}));
functional_basis_object.(TLC_stages{stage}){j,i} = create_bspline_basis([min(tvec.(TLC_stages{stage})),max(tvec.(TLC_stages{stage}))],(max(tvec.(TLC_stages{stage})) - 2 + bspline_order),bspline_order);
% Create a Functional Parameter Object (FPO) that penalises the
% roughness of growth of acceleration by using the fourth
% derivative in the roughness penalty. The smoothing parameter,
% lambda, is set to 0.01 here based on the growth curves considered
% in section 5.2.4 of 'Functional Data Analysis with R and
% MATLAB.pdf'. This enables smoothness to be imposed on estimated
% functional parameters (see section 5.2.4 of 'Functional Data
% Analysis with R and MATLAB.pdf'). Aim is to smooth the data using
% a very light smoothing procedure (using many basis functions and
% no or a very small penalty parameter) and use these smoothed
% curves for my analysis:
functional_parameter_object.(TLC_stages{stage}){j,i} = fdPar(functional_basis_object.(TLC_stages{stage}){j,i},4,0.01);
% Plot the B-spline basis functions:
figure_name = ['Functional basis system for ',technology.(TLC_stages{stage}){j},' based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(functional_basis_object.(TLC_stages{stage}){j,i});
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% figure_name = ['Functional parameter object for ',technology.(TLC_stages{stage}){i},' based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
% figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
% hold on
% plot(functional_parameter_object.(TLC_stages{stage}){j,i});
%
% % Set plots to be invisible now, but visible when opened later:
% set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
%
% % Save figure:
% saveas(gcf,figure_name,'fig')
% hold off
% Preallocate empty cell arrays for storing matrices of basis
% function values, derivatives basis function values, and
% functional data objects:
basismatrix.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
Dbasismatrix.(TLC_stages{stage}) = cell(numel(technology.(TLC_stages{stage})),numel(model_indicator_subset));
% Set up functional data objects based on current technology. First
% set up matrices of basis function values and derivative basis
% function values for later use in plotting and as an input into
% regression analysis (see section 3.5 of 'Functional Data Analysis
% with R and MATLAB.pdf'):
basismatrix.(TLC_stages{stage}){j,i} = eval_basis(tvec.(TLC_stages{stage}),functional_basis_object.(TLC_stages{stage}){j,i});
Dbasismatrix.(TLC_stages{stage}){j,i} = eval_basis(tvec.(TLC_stages{stage}),functional_basis_object.(TLC_stages{stage}){j,i},1);
% % Plot the basis function values for the current technology:
% figure_name = ['Basis function values for ',technology.(TLC_stages{stage}){j},' based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
% figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
% hold on
% plot(basismatrix.(TLC_stages{stage}){j,i});
%
% % Add title, X, and Y labels to figure:
% title(figure_name);
% xlabel('Time (years)','FontSize',12)
% ylabel('Basis function value','FontSize',12)
%
% % Set plots to be invisible now, but visible when opened later:
% set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
%
% % Save figure:
% saveas(gcf,figure_name,'fig')
% hold off
%
% % Plot the derivative basis function values for the current
% % technology:
% figure_name = ['Derivative basis function values for ',technology.(TLC_stages{stage}){j},' based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
% figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
% hold on
% plot(Dbasismatrix.(TLC_stages{stage}){j,i});
%
% % Add title, X, and Y labels to figure:
% title(figure_name);
% xlabel('Time (years)','FontSize',12)
% ylabel('Derivative basis function value','FontSize',12)
%
% % Set plots to be invisible now, but visible when opened later:
% set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
%
% % Save figure:
% saveas(gcf,figure_name,'fig')
% hold off
if signal_alignment == 1
[technology_FDO.(TLC_stages{stage}){j,i},technology_df.(TLC_stages{stage}){j,i},technology_gcv.(TLC_stages{stage}){j,i},technology_beta.(TLC_stages{stage}){j,i},technology_SSE.(TLC_stages{stage}){j,i},technology_penmat.(TLC_stages{stage}){j,i},technology_y2cMap.(TLC_stages{stage}){j,i}] = smooth_basis(tvec.(TLC_stages{stage}),medoid_aligned_signals.(TLC_stages{stage}){j,i}(:,2),functional_parameter_object.(TLC_stages{stage}){j,i},'fdnames',{technology_fdnames{1},technology_fdnames{2}{1},technology_fdnames{3}{1}},'method','qr');
% [technology_FDO.(TLC_stages{stage}){j,i},technology_WFD.(TLC_stages{stage}){j,i},technology_FSTR.(TLC_stages{stage}){j,i}] = smooth_pos(tvec.(TLC_stages{stage}),medoid_aligned_signals.(TLC_stages{stage}){j,i}(:,2),functional_parameter_object.(TLC_stages{stage}){j,i});
else
[technology_FDO.(TLC_stages{stage}){j,i},technology_df.(TLC_stages{stage}){j,i},technology_gcv.(TLC_stages{stage}){j,i},technology_beta.(TLC_stages{stage}){j,i},technology_SSE.(TLC_stages{stage}){j,i},technology_penmat.(TLC_stages{stage}){j,i},technology_y2cMap.(TLC_stages{stage}){j,i}] = smooth_basis(tvec.(TLC_stages{stage}),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),functional_parameter_object.(TLC_stages{stage}){j,i},'fdnames',{technology_fdnames{1},technology_fdnames{2}{1},technology_fdnames{3}{1}},'method','qr');
% [technology_FDO.(TLC_stages{stage}){j,i},technology_WFD.(TLC_stages{stage}){j,i},technology_FSTR.(TLC_stages{stage}){j,i}] = smooth_pos(tvec.(TLC_stages{stage}),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),functional_parameter_object.(TLC_stages{stage}){j,i});
end
% Resample the current functional data object based on the maximum
% number of resampling points identified previously:
if signal_alignment == 1
tvec_resampled.(TLC_stages{stage}) = 1:(max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:)) - 1)/(num_resample_points.(TLC_stages{stage}) - 1):max(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:));
else
tvec_resampled.(TLC_stages{stage}) = 1:(max(unaligned_signals.(TLC_stages{stage}){j,i}(:,1)) - 1)/(num_resample_points.(TLC_stages{stage}) - 1):max(unaligned_signals.(TLC_stages{stage}){j,i}(:,1));
end
% Extract values from the current functional data object based on
% the maximum number of sampling points recorded for any of the
% technologies considered to give normalised time values:
tvec_normalised.(TLC_stages{stage}) = 0:(1/(num_resample_points.(TLC_stages{stage}) - 1)):1;
time_normalised_values.(TLC_stages{stage}){j,i} = [tvec_normalised.(TLC_stages{stage})', eval_fd(tvec_resampled.(TLC_stages{stage}),technology_FDO.(TLC_stages{stage}){j,i})];
time_normalised_values_matrix.(TLC_stages{stage})(:,j,i) = time_normalised_values.(TLC_stages{stage}){j,i}(:,2);
% Generate figure for plotting current functional data object:
figure_name = ['Functional Data Object for ',technology.(TLC_stages{stage}){j},' based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Plot the current functional data object along with the original
% medoid aligned technology profile without any further time
% normalisation:
if signal_alignment == 1
if model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 1
plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'r-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 2
plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'b-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 3
plot(model_warping_paths_B_A.(TLC_stages{stage}){j,i}(:),current_technology_data(model_warping_paths_A_B.(TLC_stages{stage}){j,i}(:),model_component),'g-');
end
else
if model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 1
plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'r-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 2
plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'b-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 3
plot(unaligned_signals.(TLC_stages{stage}){j,i}(:,1),unaligned_signals.(TLC_stages{stage}){j,i}(:,2),'g-');
end
end
plot(technology_FDO.(TLC_stages{stage}){j,i})
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (medoid aligned time)']);
else
title([figure_name,' (real-time)']);
end
xlabel('Time (years)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
% Generate figure for plotting functional data objects:
figure_name = ['Functional Data Objects based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Plot the medoid aligned functional data objects for each technology
% profile without any further time normalisation:
for j = 1:numel(technology.(TLC_stages{stage}))
plot(technology_FDO.(TLC_stages{stage}){j,i});
hold on
end
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (medoid aligned time)']);
else
title([figure_name,' (real-time)']);
end
xlabel('Time (years)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Set the x-axis limits to match the largest aligned time-scale used:
if signal_alignment == 1
max_aligned_time_value.(TLC_stages{stage}) = max(cellfun(@max,model_warping_paths_B_A.(TLC_stages{stage})(:,i)));
else
max_aligned_time_value.(TLC_stages{stage}) = max(max(cellfun(@numel,unaligned_signals.(TLC_stages{stage})(:,i))/2));
end
xlim([0 max_aligned_time_value.(TLC_stages{stage})]);
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Calculate mean values of resampled curves for the current model
% component:
mean_technology_values.(TLC_stages{stage}){i} = sum(time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),2) / numel(technology.(TLC_stages{stage}));
mean_technology_function.(TLC_stages{stage}){i} = [tvec_normalised.(TLC_stages{stage})',mean_technology_values.(TLC_stages{stage}){i}];
% Generate figure for plotting time-normalised functional data objects:
figure_name = ['Time-normalised Functional Data Objects based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Plot the medoid aligned functional data objects for each technology
% profile without any further time normalisation:
for j = 1:numel(technology.(TLC_stages{stage}))
if model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 1
plot(time_normalised_values.(TLC_stages{stage}){j,i}(:,1),time_normalised_values.(TLC_stages{stage}){j,i}(:,2),'r-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 2
plot(time_normalised_values.(TLC_stages{stage}){j,i}(:,1),time_normalised_values.(TLC_stages{stage}){j,i}(:,2),'b-');
elseif model_realigned_cluster_ID.(TLC_stages{stage})(j,i) == 3
plot(time_normalised_values.(TLC_stages{stage}){j,i}(:,1),time_normalised_values.(TLC_stages{stage}){j,i}(:,2),'g-');
end
hold on
end
% Plot the mean value curve for the current model component:
plot(tvec_normalised.(TLC_stages{stage})',mean_technology_values.(TLC_stages{stage}){i},'co--');
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Scale normalised time values to maximum length of warped time paths
% Scale normalised time values to maximum length of warped time paths,
% as if based on unit intervals (this is just to avoid rounding errors
% during the creation of the functional basis system in the next step
% (see section 3.3.5 of 'Functional Data Analysis with R and
% MATLAB.pdf'), but will subsequently use normalised time values to map
% against the final functional basis systems):
tvec_normalised_scaled.(TLC_stages{stage}) = 0:1:(num_resample_points.(TLC_stages{stage}) - 1);
%% Create functional basis systems for use in benchmarking performance of functional regression analysis
% Construct the corresponding 'Constant Basis' function for use in any
% functional regression comparisons (see section 3.4.1 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
conbasis.(TLC_stages{stage}) = create_constant_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))]);
% Construct the corresponding 'Monomial Basis' function for use in any
% statistics benchmarking exercises (see section 3.4.2 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
monbasis.(TLC_stages{stage}) = create_monomial_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],4);
figure_name = ['Constant basis system for functional regression analysis - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(conbasis.(TLC_stages{stage}));
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
figure_name = ['Monomial basis system for functional regression analysis - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(monbasis.(TLC_stages{stage}));
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Create b-spline basis system for the beta coefficient functions
% Create b-spline basis system for the beta coefficient functions. The
% time intervals used (defined in the function/help file as 'rangeval',
% and specified by a lower and upper time limit) are taken from the
% scaled normalised time periods above rather than normalised times
% (i.e. between 0 and 1), to ensure that the length of each spline
% subinterval is equal to 1, avoiding any rounding errors when using
% large numbers of spline basis functions (see section 3.3.5 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
beta_functional_basis_object.(TLC_stages{stage}) = create_bspline_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],(max(tvec_normalised_scaled.(TLC_stages{stage})) - 2 + bspline_order),bspline_order);
% Create a Functional Parameter Object (FPO) that penalises the roughness
% of growth of acceleration of each beta coefficient function by using the
% second/third/fourth/fifth/sixth/seventh derivative in the roughness
% penalty (This is equivalent to creating 'betafdPar' in section 9.4.2 of
% 'Functional Data Analysis with R and MATLAB.pdf' - see page 137). The
% smoothing parameter, lambda, was initially set to 0.01 here based on the
% growth curves considered in section 5.2.4 of 'Functional Data Analysis
% with R and MATLAB.pdf', but has since been refined based on
% 'leave-one-out' cross validation scores for different lambda values (see
% below cross-validation scores for lambda section below). This enables
% smoothness to be imposed on estimated functional parameters (see section
% 5.2.4 of 'Functional Data Analysis with R and MATLAB.pdf'). Aim is to
% smooth the data using a very light smoothing procedure (using many basis
% functions and no or a very small penalty parameter) and use these
% smoothed curves for my analysis:
if signal_alignment == 1
beta_functional_parameter_object.(TLC_stages{stage}) = fdPar(beta_functional_basis_object.(TLC_stages{stage}),2,10^4.8);
else
beta_functional_parameter_object.(TLC_stages{stage}) = fdPar(beta_functional_basis_object.(TLC_stages{stage}),2,10^4.8);
end
% Plot the B-spline basis functions:
figure_name = ['Functional basis system for the beta coefficients used in functional regression analysis - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(beta_functional_basis_object.(TLC_stages{stage}));
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Preallocate empty cell array for storing beta basis systems:
beta_basis_systems.(TLC_stages{stage}) = cell(1,(numel(model_indicator_subset) + 1));
% Create beta coefficient basis system cell array from constant basis
% system and b-spline basis systems (this is equivalent to the 'betalist'
% variable created on page 134, section 9.4.1 of 'Functional Data Analysis
% with R and MATLAB.pdf'). Here the constant basis system accounts for
% 'alpha', the multiplier of the constant intercept covariate set up
% as the first element in 'covariates' below (see page 134):
for i = 1:(numel(model_indicator_subset) + 1)
if i == 1
beta_basis_systems.(TLC_stages{stage}){i} = conbasis.(TLC_stages{stage});
else
beta_basis_systems.(TLC_stages{stage}){i} = beta_functional_parameter_object.(TLC_stages{stage});
end
end
%% Build functional data object for each model component
% Generate sequence of log lambda values to evaluate smoothness fit:
log_lambda_patent_indicators.(TLC_stages{stage}) = -5:0.5:12;
number_lambda_values_patent_indicators.(TLC_stages{stage}) = length(log_lambda_patent_indicators.(TLC_stages{stage}));
% Preallocate empty double and cell arrays for storing degrees of freedom
% and generalised cross-validation coefficients (i.e. 'dfsave' and
% 'gcvsave' from section 5.3 of 'Functional Data Analysis with R and
% MATLAB.pdf'):
indicator_fit_df.(TLC_stages{stage}) = zeros(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
indicator_fit_gcv.(TLC_stages{stage}) = zeros(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
indicator_fit_functional_parameter_object.(TLC_stages{stage}) = cell(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
curve_fit_gcv.(TLC_stages{stage}) = cell(number_lambda_values_patent_indicators.(TLC_stages{stage}),numel(model_indicator_subset));
% Preallocate empty cell arrays for storing values assessing the fit of the
% functional data objects created:
patent_indicator_FDO_values.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_residuals.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_variance_across_technologies.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_variance_across_time.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_standard_deviations_line.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_standard_deviations_line_fit.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
% Use resampled, time normalised, medoid aligned technology profiles to
% build functional data object for each model component:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Create b-spline basis system for the current model component (i.e.
% patent indicator), using the same time normalised functions as above:
patent_indicator_functional_basis_object.(TLC_stages{stage}){i} = create_bspline_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],(max(tvec_normalised_scaled.(TLC_stages{stage})) - 2 + bspline_order),bspline_order);
% Conduct generalised cross-validation scoring of possible values of
% the smoothing parameter, lambda, to use in patent indicator
% functional parameter object for final functional regression analysis.
% This is equivalent to the example lambda generalised cross-validation
% scoring carried out in section 5.3 of 'Functional Data Analysis with
% R and MATLAB.pdf'
% Iterate through possible smoothing parameter (lambda) values for each
% patent indicator included for model building purposes and calculate
% degrees of freedom and generalised cross-validation criterion:
for j = 1:number_lambda_values_patent_indicators.(TLC_stages{stage})
% Select current lambda value:
lambda_patent_indicators.(TLC_stages{stage}) = 10^(log_lambda_patent_indicators.(TLC_stages{stage})(j));
% Create a Functional Parameter Object (FPO) that penalises the
% roughness of growth of acceleration by using the
% second/third/fourth/fifth/sixth/seventh derivative in the
% roughness penalty with the current smoothing parameter (lambda):
indicator_fit_functional_parameter_object.(TLC_stages{stage}){j,i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,lambda_patent_indicators.(TLC_stages{stage}));
[~,indicator_fit_df.(TLC_stages{stage})(j,i),curve_fit_gcv.(TLC_stages{stage}){j,i}] = smooth_basis(tvec_normalised_scaled.(TLC_stages{stage}),time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),indicator_fit_functional_parameter_object.(TLC_stages{stage}){j,i},'method','qr');
% Sum generalised cross-validation coefficients together for all
% replicants included in the current patent indicator fit:
indicator_fit_gcv.(TLC_stages{stage})(j,i) = sum(curve_fit_gcv.(TLC_stages{stage}){j,i});
end
% Plot the degrees of freedom for possible smoothing parameters to use
% in creating a functional parameter object for the current patent
% indicator:
figure_name = ['Degrees of freedom for functional parameter object smoothing parameters to fit ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(log_lambda_patent_indicators.(TLC_stages{stage}),indicator_fit_df.(TLC_stages{stage})(:,i),'ro-');
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Log 10 of smoothing parameter, lambda','FontSize',12)
ylabel('Degrees of freedom','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Plot the generalised cross-validation scores for possible smoothing
% parameters to use in creating a functional parameter object for the
% current patent indicator:
figure_name = ['Generalised cross-validation scores for ',patent_indicator_column_names{model_component},' functional parameter object smoothing parameter - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(log_lambda_patent_indicators.(TLC_stages{stage}),indicator_fit_gcv.(TLC_stages{stage})(:,i),'ro-');
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Log 10 of smoothing parameter, lambda','FontSize',12)
ylabel('Generalised cross-validation criterion value','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Create functional parameter object and functional data object for
% current model component (i.e. patent indicator), using the same time
% normalised functions as above and smoothing parameter, lambda, based
% on the generalised cross-validation criterion values plotted above
% (NB: currently need to re-run script to adjust these values - see
% screenshots for analysis of curves and selection of lambda values
% used below). This enables smoothness to be imposed on estimated
% functional parameters (see section 5.2.4 of 'Functional Data Analysis
% with R and MATLAB.pdf'). Aim is to smooth the data using a very light
% smoothing procedure (using many basis functions and no or a very
% small penalty parameter) and use these smoothed curves for functional
% regression analysis:
if signal_alignment == 1
patent_indicator_functional_parameter_object.(TLC_stages{stage}){i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,100);
else
patent_indicator_functional_parameter_object.(TLC_stages{stage}){i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,100);
end
[patent_indicator_FDO.(TLC_stages{stage}){i},patent_indicator_df.(TLC_stages{stage}){i},patent_indicator_gcv.(TLC_stages{stage}){i},patent_indicator_beta.(TLC_stages{stage}){i},patent_indicator_SSE.(TLC_stages{stage}){i},patent_indicator_penmat.(TLC_stages{stage}){i},patent_indicator_y2cMap.(TLC_stages{stage}){i}] = smooth_basis(tvec_normalised_scaled.(TLC_stages{stage}),time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),patent_indicator_functional_parameter_object.(TLC_stages{stage}){i},'fdnames',{technology_fdnames{1},technology_fdnames{2}{1},technology_fdnames{3}{1}},'method','qr');
% [patent_indicator_FDO.(TLC_stages{stage}){i},patent_indicator_WFD.(TLC_stages{stage}){i},patent_indicator_FSTR.(TLC_stages{stage}){i}] = smooth_pos(tvec_normalised_scaled.(TLC_stages{stage}),time_normalised_values_matrix.(TLC_stages{stage})(:,:,i),patent_indicator_functional_parameter_object.(TLC_stages{stage}){i});
% Plot the medoid and time normalised functional data object for the
% current patent indicator:
figure_name = ['Functional Data Object for all technology profiles based on ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(patent_indicator_FDO.(TLC_stages{stage}){i});
hold on
% Add title, X, and Y labels to subplot:
if signal_alignment == 1
title([figure_name,' (normalised medoid aligned time)']);
else
title([figure_name,' (normalised time)']);
end
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised IHS indicator count','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Assess the fit of the functional data object to the current patent
% indicator technology profile set (see section 5.5 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
patent_indicator_FDO_values.(TLC_stages{stage}){i} = eval_fd(tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_FDO.(TLC_stages{stage}){i});
patent_indicator_FDO_residuals.(TLC_stages{stage}){i} = time_normalised_values_matrix.(TLC_stages{stage})(:,:,i) - patent_indicator_FDO_values.(TLC_stages{stage}){i};
% Create variance vectors for the current patent indicator functional
% data object to determine variance across the technologies considered,
% and across the normalised time period considered (see section 5.5 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
patent_indicator_FDO_variance_across_technologies.(TLC_stages{stage}){i} = sum((patent_indicator_FDO_residuals.(TLC_stages{stage}){i}).^2,2) / numel(technology.(TLC_stages{stage}));
patent_indicator_FDO_variance_across_time.(TLC_stages{stage}){i} = sum((patent_indicator_FDO_residuals.(TLC_stages{stage}){i}).^2,1) / (size(patent_indicator_FDO_residuals.(TLC_stages{stage}){i},1) - patent_indicator_df.(TLC_stages{stage}){i});
% Plot the variation in residuals within the technologies considered
% within the current patent indicator functional data object. The
% standard deviation is used for this as in section 5.5 of 'Functional
% Data Analysis with R and MATLAB.pdf':
figure_name = ['Standard deviations of the residuals within technologies from the functional data object for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(1:numel(technology.(TLC_stages{stage})),sqrt(patent_indicator_FDO_variance_across_time.(TLC_stages{stage}){i}),'bo');
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Technology','FontSize',12)
ylabel('Standard deviation across normalised and scaled time','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Create functional data object to represent the smoothed log of the
% standard deviations for plotting the variation in residuals within
% the normalised and scaled time period (see section 5.5 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
patent_indicator_FDO_standard_deviations_line.(TLC_stages{stage}){i} = smooth_basis(tvec_normalised_scaled.(TLC_stages{stage}),log(patent_indicator_FDO_variance_across_technologies.(TLC_stages{stage}){i})/2,patent_indicator_functional_parameter_object.(TLC_stages{stage}){i},'method','qr');
% Exponentiating the standard deviation functional data object for
% plotting purposes (see section 5.5 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
patent_indicator_FDO_standard_deviations_line_fit.(TLC_stages{stage}){i} = exp(eval_fd(tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_FDO_standard_deviations_line.(TLC_stages{stage}){i}));
% Plot the variation in residuals within the normalised and scaled time
% considered within the current patent indicator functional data
% object. The standard deviation is used for this as in section 5.5 of
% 'Functional Data Analysis with R and MATLAB.pdf':
figure_name = ['Standard deviations of the residuals within time from the functional data object for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(tvec_normalised_scaled.(TLC_stages{stage}),sqrt(patent_indicator_FDO_variance_across_technologies.(TLC_stages{stage}){i}),'bo');
line(tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_FDO_standard_deviations_line_fit.(TLC_stages{stage}){i},'Color','r', 'LineStyle','-');
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Standard deviation across technologies','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Calculate functional descriptive statistics
% Preallocate empty cell arrays for storing functional descriptive
% statistics data:
patent_indicator_FDO_mean.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_FDO_standard_deviation.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_bivariate_functional_data_object.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
patent_indicator_bivariate_function_values.(TLC_stages{stage}) = cell(1,numel(model_indicator_subset));
cross_covariance_bivariate_functional_data_object.(TLC_stages{stage}) = cell(numel(model_indicator_subset),numel(model_indicator_subset));
cross_covariance_bivariate_function_values.(TLC_stages{stage}) = cell(numel(model_indicator_subset),numel(model_indicator_subset));
cross_correlation_matrix.(TLC_stages{stage}) = cell(numel(model_indicator_subset),numel(model_indicator_subset));
% Cycle through eacah patent indicator included in the model:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Calculate the functional mean and standard deviations of the current
% patent indicator technology profile set:
patent_indicator_FDO_mean.(TLC_stages{stage}){i} = mean(patent_indicator_FDO.(TLC_stages{stage}){i});
patent_indicator_FDO_standard_deviation.(TLC_stages{stage}){i} = std_fd(patent_indicator_FDO.(TLC_stages{stage}){i});
% Calculate the bivariate covariance function for curve values of the
% current patent indicator functional data object in order to determine
% the covariance between curves:
patent_indicator_bivariate_functional_data_object.(TLC_stages{stage}){i} = var_fd(patent_indicator_FDO.(TLC_stages{stage}){i});
patent_indicator_bivariate_function_values.(TLC_stages{stage}){i} = eval_bifd(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_bivariate_functional_data_object.(TLC_stages{stage}){i});
% Generate bivariate variance-covariance surface and contour plots for
% the current patent indicator functional data object (see section
% 6.1.1 of 'Functional Data Analysis with R and MATLAB.pdf'):
figure_name = ['Estimated bivariate variance-covariance surface and contours for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
subplot(1,2,1), surf(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_bivariate_function_values.(TLC_stages{stage}){i});
hold on
% Add title, X, Y, and Z labels to subplot:
title(['Estimated bivariate variance-covariance surface for ',patent_indicator_column_names{model_component}]);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised time (scaled)','FontSize',12)
zlabel('Variance','FontSize',12)
subplot(1,2,2), contour(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_bivariate_function_values.(TLC_stages{stage}){i});
hold on
% Add title, X, and Y labels to subplot:
title(['Estimated bivariate variance-covariance contours for ',patent_indicator_column_names{model_component}]);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised time (scaled)','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
for j = 1:numel(model_indicator_subset)
% Calculate the bivariate cross-covariance and cross-correlation
% functions for curve values between the current patent indicator
% functional data object and all other patent indicator functional
% data objects in order to determine the cross-covariance between
% indicators:
if i <= j
cross_covariance_bivariate_functional_data_object.(TLC_stages{stage}){i,j} = var_fd(patent_indicator_FDO.(TLC_stages{stage}){i},patent_indicator_FDO.(TLC_stages{stage}){j});
cross_covariance_bivariate_function_values.(TLC_stages{stage}){i,j} = eval_bifd(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),cross_covariance_bivariate_functional_data_object.(TLC_stages{stage}){i,j});
cross_correlation_matrix.(TLC_stages{stage}){i,j} = cor_fd(tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_FDO.(TLC_stages{stage}){i},tvec_normalised_scaled.(TLC_stages{stage}),patent_indicator_FDO.(TLC_stages{stage}){j});
% Generate bivariate cross-covariance surface and contour plots
% for the current pair of patent indicator functional data
% objects (see section 6.1.1 of 'Functional Data Analysis with
% R and MATLAB.pdf'):
figure_name = ['Estimated bivariate cross-covariance surface and contours for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
subplot(1,2,1), surf(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),cross_covariance_bivariate_function_values.(TLC_stages{stage}){i,j});
hold on
% Add title, X, Y, and Z labels to subplot:
title(['Estimated bivariate cross-covariance surface for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}]);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised time (scaled)','FontSize',12)
zlabel('Variance','FontSize',12)
subplot(1,2,2), contour(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),cross_covariance_bivariate_function_values.(TLC_stages{stage}){i,j});
hold on
% Add title, X, and Y labels to subplot:
title(['Estimated bivariate cross-covariance contours for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}]);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised time (scaled)','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Generate bivariate cross-correlation surface and contour
% plots for the current pair of patent indicator functional
% data objects (see section 6.1.1 of 'Functional Data Analysis
% with R and MATLAB.pdf'):
figure_name = ['Estimated bivariate cross-correlation surface and contours for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
subplot(1,2,1), surf(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),cross_correlation_matrix.(TLC_stages{stage}){i,j});
hold on
% Add title, X, Y, and Z labels to subplot:
title(['Estimated bivariate cross-correlation surface for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}]);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised time (scaled)','FontSize',12)
zlabel('Cross-correlation score','FontSize',12)
subplot(1,2,2), contour(tvec_normalised_scaled.(TLC_stages{stage}),tvec_normalised_scaled.(TLC_stages{stage}),cross_correlation_matrix.(TLC_stages{stage}){i,j});
hold on
% Add title, X, and Y labels to subplot:
title(['Estimated bivariate cross-correlation contours for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}]);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Normalised time (scaled)','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
else
cross_covariance_bivariate_functional_data_object.(TLC_stages{stage}){i,j} = cross_covariance_bivariate_functional_data_object.(TLC_stages{stage}){j,i};
cross_covariance_bivariate_function_values.(TLC_stages{stage}){i,j} = cross_covariance_bivariate_function_values.(TLC_stages{stage}){j,i};
cross_correlation_matrix.(TLC_stages{stage}){i,j} = cross_correlation_matrix.(TLC_stages{stage}){j,i};
end
end
end
% Generate plot of the functional means of the current patent indicator
% technology profile set:
figure_name = ['Mean functional data object values for chosen patent indicator subset',' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
for i = 1:numel(patent_indicator_FDO_mean.(TLC_stages{stage}))
plot(patent_indicator_FDO_mean.(TLC_stages{stage}){i});
end
% Add title, X, Y, and Z labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Mean normalised patent indicator count','FontSize',12)
% Add legend to the technology time series clusters:
legend(strrep(patent_indicator_labels{1, 2}(model_indicator_subset),'_',' '),'FontSize',12,'Location','northwest');
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Generate plot of the standard deviations of the current patent indicator
% technology profile set:
figure_name = ['Standard deviation of functional data objects created for chosen patent indicator subset',' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
for i = 1:numel(patent_indicator_FDO_standard_deviation.(TLC_stages{stage}))
plot(patent_indicator_FDO_standard_deviation.(TLC_stages{stage}){i});
end
% Add title, X, Y, and Z labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Standard deviation of patent indicator FDO','FontSize',12)
% Add legend to the technology time series clusters:
legend(strrep(patent_indicator_labels{1, 2}(model_indicator_subset),'_',' '),'FontSize',12,'Location','northwest');
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Examine functional covariance by applying Canonical Correlation Analysis
% Canonical Correlation Analysis (CCA) is used to examine how much
% variation is shared between the different model components (i.e. patent
% indicator technology profile sets) - see section 7.5 of 'Functional
% Data Analysis with R and MATLAB.pdf' for details.
% Create a canonical correlation analysis functional parameter object that
% has heavy penalties applied to the roughness defined by the second
% derivative of the parameter object to avoid the CCA 'greediness pitfall'
% that can make it difficult to see anything of interest in the results if
% not avoided (see page 111, section 7.5 of 'Functional Data Analysis with
% R and MATLAB.pdf'):
cca_functional_parameter_object.(TLC_stages{stage}) = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,5e6);
% Set the number of canonical weight/variable pairs that should be included
% in the analysis - "the length k of the sequence is the smallest of the
% sample size N, the number of basis functions for either functional
% variable, or the number of basis functions used for either of the probe
% weight functions" (see page 111, section 7.5 of 'Functional Data Analysis
% with R and MATLAB.pdf'). Therefore assume that the number of canonical
% weight/variable pairs is equivalent to the number of technologies:
num_canonical_pairs.(TLC_stages{stage}) = numel(technology.(TLC_stages{stage}));
% Preallocate empty cell array for storing results of canonical correlation
% analysis:
cca.(TLC_stages{stage}) = cell(numel(model_indicator_subset),numel(model_indicator_subset));
% Run canonical correlation analysis for each pair of patent indicators
% included in the model:
for i = 1:numel(model_indicator_subset)
for j = 1:numel(model_indicator_subset)
if i <= j
cca.(TLC_stages{stage}){i,j} = cca_fd(patent_indicator_FDO.(TLC_stages{stage}){i},patent_indicator_FDO.(TLC_stages{stage}){j},num_canonical_pairs.(TLC_stages{stage}),cca_functional_parameter_object.(TLC_stages{stage}),cca_functional_parameter_object.(TLC_stages{stage}));
% Plot the first type of variation associated with the first
% canonical correlation for the current pair of patent
% indicator functional data objects (see section 7.5 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
figure_name = ['The first pair of canonical weight functions for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(cca.(TLC_stages{stage}){i,j}.wtfdx(1));
plot(cca.(TLC_stages{stage}){i,j}.wtfdy(1));
hold on
% Add title and X label to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
hold on
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Plot the scores for first pair of canonical variables (see
% section 7.5 of 'Functional Data Analysis with R and
% MATLAB.pdf'):
figure_name = ['The scores for the first pair of canonical variables plotted against each other for patent indicators ',num2str(model_indicator_subset(i)),' and ',num2str(model_indicator_subset(j)),' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(cca.(TLC_stages{stage}){i,j}.varx(:,1),cca.(TLC_stages{stage}){i,j}.vary(:,1),'bo');
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel(['Canonical weight for patent indicator ',num2str(model_indicator_subset(i))],'FontSize',12)
ylabel(['Canonical weight for patent indicator ',num2str(model_indicator_subset(j))],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
else
cca.(TLC_stages{stage}){i,j} = cca.(TLC_stages{stage}){j,i};
end
end
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Create cell array of covariate functions for use in functional regression analysis
% Preallocate empty cell array for storing covariate functions:
covariates.(TLC_stages{stage}) = cell(1,(numel(model_indicator_subset) + 1));
% Generate constant 'covariate' vector of value 1 (to be first element in
% the overall covariate matrix):
constant_covariate.(TLC_stages{stage}) = ones(numel(technology.(TLC_stages{stage})),1);
% Create covariates matrix combining constant covariate with medoid and
% time normalised functional data objects generated for each patent
% indicator considered in the model (this is the equivalent of the
% 'templist' cell array created on page 134, section 9.4 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
for i = 1:(numel(model_indicator_subset) + 1)
if i == 1
covariates.(TLC_stages{stage}){i} = constant_covariate.(TLC_stages{stage});
else
covariates.(TLC_stages{stage}){i} = patent_indicator_FDO.(TLC_stages{stage}){i - 1};
end
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Generate plot of cross-validation scores for different values of lambda (the smoothing parameter) to use in beta basis system
%
% % This cross-validation scoring of possible values of the smoothing
% % parameter, lambda, to use in beta basis system for final functional
% % regression analysis is equivalent to the example lambda cross-validation
% % scoring carried out in section 9.4.3 of 'Functional Data Analysis with R
% % and MATLAB.pdf'
%
% % Generate sequence of log lambda values to evaluate:
% % log_lambda.(TLC_stages{stage}) = -5:1:10;
% log_lambda.(TLC_stages{stage}) = 3:0.1:8;
% % log_lambda.(TLC_stages{stage}) = 4.0;
% number_lambda_values.(TLC_stages{stage}) = length(log_lambda.(TLC_stages{stage}));
%
% % Preallocate empty double arrays for storing error sum of squares
% % cross-validation scores (i.e. SSE.CV from section 9.4.3 of 'Functional
% % Data Analysis with R and MATLAB.pdf'):
% SSE_lambda_cross_validation_score.(TLC_stages{stage}) = zeros(number_lambda_values.(TLC_stages{stage}),1);
%
% % Iterate through possible beta basis systems for each patent indicator
% % included for model building purposes with different smoothing parameter
% % (lambda) values and calculate cross-validation score:
% for j = 1:number_lambda_values.(TLC_stages{stage})
% % Select current lambda value:
% lambda.(TLC_stages{stage}) = 10^(log_lambda.(TLC_stages{stage})(j))
%
% % Reset beta basis system to match default beta basis system:
% current_beta_basis_systems = beta_basis_systems.(TLC_stages{stage});
%
% for i = 1:numel(model_indicator_subset)
% % Substitute current lambda value into beta basis system:
% current_beta_basis_systems{1,i+1} = putlambda(current_beta_basis_systems{1,i+1},lambda.(TLC_stages{stage}));
% end
%
% % Compute leave-one-out cross-validated error sum of squares (SSE) for
% % functional responses (see section 10.6.2 of 'Functional Data Analysis
% % with R and MATLAB.pdf'):
% current_SSE_lambda_cross_validation_score = fRegress_CV(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),current_beta_basis_systems);
%
% % Add the current error sum of squares cross-validation score (i.e.
% % SSE.CV from section 9.4.3 of 'Functional Data Analysis with R and
% % MATLAB.pdf') to the corresponding array:
% SSE_lambda_cross_validation_score.(TLC_stages{stage})(j) = current_SSE_lambda_cross_validation_score;
% end
%
% %% Plot the cross-validation scores for log of the smoothing parameters evaluated for use in the beta basis system for the current patent indicator
% for i = 1:numel(model_indicator_subset)
% % Select the current component of the selected model indicator subset:
% model_component = model_indicator_subset(i);
%
% % Plot the cross-validation scores for log of the smoothing parameters
% % evaluated for use in the beta basis system for the current patent
% % indicator:
% figure_name = ['Cross-validation scores for the ',patent_indicator_column_names{model_component},' beta basis system smoothing parameter - ',TLC_stages{stage}];
% figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
% plot(log_lambda.(TLC_stages{stage}),SSE_lambda_cross_validation_score.(TLC_stages{stage})(:),'ro-');
% hold on
%
% % Add title, X, and Y labels to subplot:
% title(figure_name);
% xlabel('Log 10 of smoothing parameter, lambda','FontSize',12)
% ylabel('Cross-validation score','FontSize',12)
%
% % Set plots to be invisible now, but visible when opened later:
% set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
%
% % Save figure:
% saveas(gcf,figure_name,'fig')
% hold off
% end
%% Run functional regression analysis using known cluster IDs, covariate functions, and beta basis system objects
% The functional linear model built in this section and the sections below
% are based on a set of scalar response values (Y), so are based on chapter
% 9 of 'Functional Data Analysis with R and MATLAB.pdf' (as opposed to
% chapter 10 which relates to 'Linear models for functional responses')
% Run functional regression analysis to identify functional (beta)
% coefficients (see page 137, section 9.4.2 of 'Functional Data Analysis
% with R and MATLAB.pdf') using identified smoothing parameters (see
% section 9.4.3 of 'Functional Data Analysis with R and MATLAB.pdf'):
functional_regression.(TLC_stages{stage}) = fRegress(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),beta_basis_systems.(TLC_stages{stage}));
% Extract the estimated beta functional coefficients, predicted Y values,
% and residuals from the functional regression analysis:
estimated_beta_coefficients.(TLC_stages{stage}) = functional_regression.(TLC_stages{stage}).betahat;
estimated_beta_coefficients_values.(TLC_stages{stage}) = cell(numel(estimated_beta_coefficients.(TLC_stages{stage})),1);
estimated_beta_functional_data_object.(TLC_stages{stage}) = cell(numel(estimated_beta_coefficients.(TLC_stages{stage})),1);
for i = 1:numel(estimated_beta_coefficients.(TLC_stages{stage}))
estimated_beta_coefficients_values.(TLC_stages{stage}){i} = getcoef(estimated_beta_coefficients.(TLC_stages{stage}){i});
estimated_beta_functional_data_object.(TLC_stages{stage}){i} = getfd(estimated_beta_coefficients.(TLC_stages{stage}){i});
end
predicted_values.(TLC_stages{stage}) = functional_regression.(TLC_stages{stage}).yhat;
residuals.(TLC_stages{stage}) = technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - predicted_values.(TLC_stages{stage});
% Determine the variance (SigmaE_squared = SigmaE. from page 141, section
% 9.4.4 of 'Functional Data Analysis with R and MATLAB.pdf') associated
% with the functional regression fit:
SigmaE_squared.(TLC_stages{stage}) = sum(residuals.(TLC_stages{stage}).^2) / (numel(technology.(TLC_stages{stage})) - functional_regression.(TLC_stages{stage}).df);
% Create the variance matrix (i.e. SigmaE from page 141, section 9.4.4 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
SigmaE.(TLC_stages{stage}) = SigmaE_squared.(TLC_stages{stage}) * diag(ones(numel(technology.(TLC_stages{stage})),1));
% Plot the estimate of the constant regression coefficient:
figure_name = ['Estimated regression coefficient for the constant functional basis system - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(estimated_beta_functional_data_object.(TLC_stages{stage}){1});
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Beta for the constant functional basis system','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Preallocate empty cell array for storing standard error estimates for
% regression coefficient functions estimated by FREGRESS:
standard_error_estimates.(TLC_stages{stage}) = cell((numel(estimated_beta_coefficients.(TLC_stages{stage})) - 1),1);
% Plot the estimate of the regression function for the medoid and time
% normalised functional data objects generated for each patent indicator
% considered in the model (see page 134, section 9.4.1 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
for i = 1:(numel(estimated_beta_coefficients.(TLC_stages{stage})) - 1)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Select the corresponding matrix mapping the raw data vector 'y' to
% the coefficient vector 'c' of the basis function expansion of 'x'
% (see page 61, section 5.1 of 'Functional Data Analysis with R and
% MATLAB.pdf' for explanation of y2cMap):
current_y2cMap = patent_indicator_y2cMap.(TLC_stages{stage}){i};
% Compute the standard error estimates for regression coefficient
% functions estimated by FREGRESS:
standard_error_estimates.(TLC_stages{stage}){i+1} = fRegress_stderr(functional_regression.(TLC_stages{stage}),current_y2cMap,SigmaE.(TLC_stages{stage}));
% Extract the function curves which are plus and minus two times the
% standard error of the regression coefficient function, to obtain the
% approximate regression function (i.e. beta) confidence bounds:
beta_standard_error_estimates.(TLC_stages{stage}) = standard_error_estimates.(TLC_stages{stage}){i+1}.betastderr;
current_beta_standard_error_estimate = beta_standard_error_estimates.(TLC_stages{stage}){i+1};
% Plot the current regression coefficient along with 95% confidence
% bounds:
figure_name = ['Estimated regression coefficient for predicting technology cluster from ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(estimated_beta_functional_data_object.(TLC_stages{stage}){i+1});
line(estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}+times(current_beta_standard_error_estimate,2));
line(estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}-times(current_beta_standard_error_estimate,2));
% Resize y-axis to fit the confidence bounds curves:
ylim('auto');
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel(['Beta for ',patent_indicator_column_names{model_component}],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
% Calculate R-Squared, adjusted R-Squared, and F-ratio statistics to assess
% the fit of the regression model (where 'error_sum_of_squares_Y' (or
% SSE_Y) is the total sum of squares for Y, and
% 'error_sum_of_squares_residuals' (or SSE_RES) is the total sum of squares
% for residuals) - see sections 9.4.1 and 9.4.2 of 'Functional Data
% Analysis with R and MATLAB.pdf':
error_sum_of_squares_Y.(TLC_stages{stage}) = sum((technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - mean(technology_data_filtered.known_cluster_id.(TLC_stages{stage}))).^2);
error_sum_of_squares_residuals.(TLC_stages{stage}) = sum((residuals.(TLC_stages{stage})).^2);
R_squared.(TLC_stages{stage}) = (error_sum_of_squares_Y.(TLC_stages{stage}) - error_sum_of_squares_residuals.(TLC_stages{stage})) / error_sum_of_squares_Y.(TLC_stages{stage});
adjusted_R_squared.(TLC_stages{stage}) = 1 - ((1 - R_squared.(TLC_stages{stage})) * ((numel(technology.(TLC_stages{stage})) - 1) / (numel(technology.(TLC_stages{stage})) - numel(model_indicator_subset) - 1)));
degrees_freedom_1 = functional_regression.(TLC_stages{stage}).df - 1;
degrees_freedom_2 = numel(technology.(TLC_stages{stage})) - functional_regression.(TLC_stages{stage}).df;
F_ratio.(TLC_stages{stage}) = ((error_sum_of_squares_Y.(TLC_stages{stage}) - error_sum_of_squares_residuals.(TLC_stages{stage})) / degrees_freedom_1) / (error_sum_of_squares_residuals.(TLC_stages{stage}) / degrees_freedom_2);
%% Benchmark results of functional regression against regression functions obtained using a low-dimensional basis system for the beta coefficients
% Reset beta basis system to match default beta basis system:
low_dimensional_beta_basis_systems.(TLC_stages{stage}) = beta_basis_systems.(TLC_stages{stage});
% Create low-dimensional b-spline basis system for beta coefficients (see
% section 9.4.1. of 'Functional Data Analysis with R and MATLAB.pdf'):
low_dimensional_beta_functional_basis_object.(TLC_stages{stage}) = create_bspline_basis([min(tvec_normalised_scaled.(TLC_stages{stage})),max(tvec_normalised_scaled.(TLC_stages{stage}))],5);
% Substitute low-dimensional basis system into beta basis system:
for i = 1:numel(model_indicator_subset)
low_dimensional_beta_basis_systems.(TLC_stages{stage}){1,i+1} = low_dimensional_beta_functional_basis_object.(TLC_stages{stage});
end
% Re-run functional regression analysis to identify functional (beta)
% coefficients (see page 134, section 9.4.1 of 'Functional Data Analysis
% with R and MATLAB.pdf') using low-dimensional basis systems for the beta
% coefficients:
low_dimensional_functional_regression.(TLC_stages{stage}) = fRegress(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),low_dimensional_beta_basis_systems.(TLC_stages{stage}));
% Extract the estimated beta coefficients, predicted Y values, and
% residuals when using a low-dimensional basis system for the beta
% coefficients:
low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}) = low_dimensional_functional_regression.(TLC_stages{stage}).betahat;
low_dimensional_estimated_beta_coefficients_values.(TLC_stages{stage}) = cell(numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})),1);
low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}) = cell(numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})),1);
for i = 1:numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}))
low_dimensional_estimated_beta_coefficients_values.(TLC_stages{stage}){i} = getcoef(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}){i});
low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = getfd(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage}){i});
end
low_dimensional_predicted_values.(TLC_stages{stage}) = low_dimensional_functional_regression.(TLC_stages{stage}).yhat;
low_dimensional_residuals.(TLC_stages{stage}) = technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - low_dimensional_predicted_values.(TLC_stages{stage});
% Determine the variance (SigmaE_squared = SigmaE. from page 141, section
% 9.4.4 of 'Functional Data Analysis with R and MATLAB.pdf') associated
% with the low-dimensional functional regression fit:
low_dimensional_SigmaE_squared.(TLC_stages{stage}) = sum(low_dimensional_residuals.(TLC_stages{stage}).^2) / (numel(technology.(TLC_stages{stage})) - low_dimensional_functional_regression.(TLC_stages{stage}).df);
% Create the variance matrix (i.e. SigmaE from page 141, section 9.4.4 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
low_dimensional_SigmaE.(TLC_stages{stage}) = low_dimensional_SigmaE_squared.(TLC_stages{stage}) * diag(ones(numel(technology.(TLC_stages{stage})),1));
% Plot the low-dimensional estimate of the constant regression coefficient:
figure_name = ['Low-dimensional estimate of the regression coefficient for the constant functional basis system - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}){1});
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Beta for the constant functional basis system','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Preallocate empty cell array for storing standard error estimates for the
% low dimensional regression coefficient functions estimated by FREGRESS:
low_dimensional_standard_error_estimates.(TLC_stages{stage}) = cell((numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})) - 1),1);
% Plot the estimate of the low-dimensional regression function for the
% medoid and time normalised functional data objects generated for each
% patent indicator considered in the model (see page 134, section 9.4.1 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
for i = 1:(numel(low_dimensional_estimated_beta_coefficients.(TLC_stages{stage})) - 1)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Select the corresponding matrix mapping the raw data vector 'y' to
% the coefficient vector 'c' of the basis function expansion of 'x'
% (see page 61, section 5.1 of 'Functional Data Analysis with R and
% MATLAB.pdf' for explanation of y2cMap):
current_y2cMap = patent_indicator_y2cMap.(TLC_stages{stage}){i};
% Compute the standard error estimates for low-dimensional regression
% coefficient functions estimated by FREGRESS:
low_dimensional_standard_error_estimates.(TLC_stages{stage}){i+1} = fRegress_stderr(low_dimensional_functional_regression.(TLC_stages{stage}),current_y2cMap,low_dimensional_SigmaE.(TLC_stages{stage}));
% Extract the function curves which are plus and minus two times the
% standard error of the low-dimensional regression coefficient
% function, to obtain the approximate regression function (i.e. beta)
% confidence bounds:
low_dimensional_beta_standard_error_estimates.(TLC_stages{stage}) = low_dimensional_standard_error_estimates.(TLC_stages{stage}){i+1}.betastderr;
current_beta_standard_error_estimate = low_dimensional_beta_standard_error_estimates.(TLC_stages{stage}){i+1};
% Plot the current low-dimensional regression coefficient along with
% 95% confidence bounds:
figure_name = ['Low-dimensional estimate of the regression coefficient for predicting group from ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}){i+1});
line(low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}+times(current_beta_standard_error_estimate,2));
line(low_dimensional_estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}-times(current_beta_standard_error_estimate,2));
% Resize y-axis to fit the confidence bounds curves:
ylim('auto');
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel(['Beta for ',patent_indicator_column_names{model_component}],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
% Calculate R-Squared, adjusted R-Squared, and F-ratio statistics to assess
% the fit of the low-dimensional basis regression model (where
% 'error_sum_of_squares_Y' (or SSE_Y) is the total sum of squares for Y,
% and 'low_dimensional_error_sum_of_squares_residuals' (or SSE_RES) is the
% total sum of squares for residuals using the low-dimensional basis system
% for beta coefficients):
low_dimensional_error_sum_of_squares_residuals.(TLC_stages{stage}) = sum((technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - low_dimensional_predicted_values.(TLC_stages{stage})).^2);
low_dimensional_R_squared.(TLC_stages{stage}) = (error_sum_of_squares_Y.(TLC_stages{stage}) - low_dimensional_error_sum_of_squares_residuals.(TLC_stages{stage})) / error_sum_of_squares_Y.(TLC_stages{stage});
low_dimensional_adjusted_R_squared.(TLC_stages{stage}) = 1 - ((1 - low_dimensional_R_squared.(TLC_stages{stage})) * ((numel(technology.(TLC_stages{stage})) - 1) / (numel(technology.(TLC_stages{stage})) - numel(model_indicator_subset) - 1)));
low_dimensional_degrees_freedom_1 = low_dimensional_functional_regression.(TLC_stages{stage}).df - 1;
low_dimensional_degrees_freedom_2 = numel(technology.(TLC_stages{stage})) - low_dimensional_functional_regression.(TLC_stages{stage}).df;
low_dimensional_F_ratio.(TLC_stages{stage}) = ((error_sum_of_squares_Y.(TLC_stages{stage}) - low_dimensional_error_sum_of_squares_residuals.(TLC_stages{stage})) / low_dimensional_degrees_freedom_1) / (low_dimensional_error_sum_of_squares_residuals.(TLC_stages{stage}) / low_dimensional_degrees_freedom_2);
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Benchmark results of functional regression against regression functions obtained using a constant basis system for beta coefficients
% Reset beta basis system to match default beta basis system:
conbasis_beta_basis_systems.(TLC_stages{stage}) = beta_basis_systems.(TLC_stages{stage});
% Substitute constant basis system into beta basis system:
for i = 1:numel(model_indicator_subset)
% Substitute current lambda value into beta basis system:
conbasis_beta_basis_systems.(TLC_stages{stage}){1,i+1} = fdPar(conbasis.(TLC_stages{stage}));
end
% Re-run functional regression analysis to identify functional (beta)
% coefficients (see page 137, section 9.4.2 of 'Functional Data Analysis
% with R and MATLAB.pdf') using constant basis systems for the beta
% coefficients:
conbasis_functional_regression.(TLC_stages{stage}) = fRegress(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),conbasis_beta_basis_systems.(TLC_stages{stage}));
% Extract the estimated beta coefficients and predicted Y values when using
% a constant basis system for the beta coefficients:
conbasis_estimated_beta_coefficients.(TLC_stages{stage}) = conbasis_functional_regression.(TLC_stages{stage}).betahat;
conbasis_estimated_beta_coefficients_values.(TLC_stages{stage}) = cell(numel(conbasis_estimated_beta_coefficients.(TLC_stages{stage})),1);
conbasis_estimated_beta_functional_data_object.(TLC_stages{stage}) = cell(numel(conbasis_estimated_beta_coefficients.(TLC_stages{stage})),1);
for i = 1:numel(conbasis_estimated_beta_coefficients.(TLC_stages{stage}))
conbasis_estimated_beta_coefficients_values.(TLC_stages{stage}){i} = getcoef(conbasis_estimated_beta_coefficients.(TLC_stages{stage}){i});
conbasis_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = getfd(conbasis_estimated_beta_coefficients.(TLC_stages{stage}){i});
end
conbasis_predicted_values.(TLC_stages{stage}) = conbasis_functional_regression.(TLC_stages{stage}).yhat;
% Calculate R-Squared, adjusted R-Squared, and F-ratio statistics to assess
% the fit of the constant basis regression model (where
% 'error_sum_of_squares_Y' (or SSE_Y) is the total sum of squares for Y,
% and 'conbasis_error_sum_of_squares_residuals' (or SSE_RES) is the total
% sum of squares for residuals using the constant basis system for beta
% coefficients):
conbasis_error_sum_of_squares_residuals.(TLC_stages{stage}) = sum((technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - conbasis_predicted_values.(TLC_stages{stage})).^2);
conbasis_R_squared.(TLC_stages{stage}) = (error_sum_of_squares_Y.(TLC_stages{stage}) - conbasis_error_sum_of_squares_residuals.(TLC_stages{stage})) / error_sum_of_squares_Y.(TLC_stages{stage});
conbasis_adjusted_R_squared.(TLC_stages{stage}) = 1 - ((1 - conbasis_R_squared.(TLC_stages{stage})) * ((numel(technology.(TLC_stages{stage})) - 1) / (numel(technology.(TLC_stages{stage})) - numel(model_indicator_subset) - 1)));
conbasis_degrees_freedom_1 = conbasis_functional_regression.(TLC_stages{stage}).df - 1;
conbasis_degrees_freedom_2 = numel(technology.(TLC_stages{stage})) - conbasis_functional_regression.(TLC_stages{stage}).df;
conbasis_F_ratio.(TLC_stages{stage}) = ((error_sum_of_squares_Y.(TLC_stages{stage}) - conbasis_error_sum_of_squares_residuals.(TLC_stages{stage})) / conbasis_degrees_freedom_1) / (conbasis_error_sum_of_squares_residuals.(TLC_stages{stage}) / conbasis_degrees_freedom_2);
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Benchmark results of functional regression against regression functions obtained using a monomial basis system for beta coefficients
% Reset beta basis system to match default beta basis system:
monbasis_beta_basis_systems.(TLC_stages{stage}) = beta_basis_systems.(TLC_stages{stage});
% Substitute monomial basis system into beta basis system:
for i = 1:numel(model_indicator_subset)
% Substitute current lambda value into beta basis system:
monbasis_beta_basis_systems.(TLC_stages{stage}){1,i+1} = fdPar(monbasis.(TLC_stages{stage}));
end
% Re-run functional regression analysis to identify functional (beta)
% coefficients (see page 137, section 9.4.2 of 'Functional Data Analysis
% with R and MATLAB.pdf') using monomial basis systems for the beta
% coefficients:
monbasis_functional_regression.(TLC_stages{stage}) = fRegress(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),monbasis_beta_basis_systems.(TLC_stages{stage}));
% Extract the estimated beta coefficients, predicted Y values, and
% residuals when using a monomial basis system for the beta coefficients:
monbasis_estimated_beta_coefficients.(TLC_stages{stage}) = monbasis_functional_regression.(TLC_stages{stage}).betahat;
monbasis_estimated_beta_coefficients_values.(TLC_stages{stage}) = cell(numel(monbasis_estimated_beta_coefficients.(TLC_stages{stage})),1);
monbasis_estimated_beta_functional_data_object.(TLC_stages{stage}) = cell(numel(monbasis_estimated_beta_coefficients.(TLC_stages{stage})),1);
for i = 1:numel(monbasis_estimated_beta_coefficients.(TLC_stages{stage}))
monbasis_estimated_beta_coefficients_values.(TLC_stages{stage}){i} = getcoef(monbasis_estimated_beta_coefficients.(TLC_stages{stage}){i});
monbasis_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = getfd(monbasis_estimated_beta_coefficients.(TLC_stages{stage}){i});
end
monbasis_predicted_values.(TLC_stages{stage}) = monbasis_functional_regression.(TLC_stages{stage}).yhat;
monbasis_residuals.(TLC_stages{stage}) = technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - monbasis_predicted_values.(TLC_stages{stage});
% Determine the variance (SigmaE_squared = SigmaE. from page 141, section
% 9.4.4 of 'Functional Data Analysis with R and MATLAB.pdf') associated
% with the monomial functional regression fit:
monbasis_SigmaE_squared.(TLC_stages{stage}) = sum(monbasis_residuals.(TLC_stages{stage}).^2) / (numel(technology.(TLC_stages{stage})) - monbasis_functional_regression.(TLC_stages{stage}).df);
% Create the variance matrix (i.e. SigmaE from page 141, section 9.4.4 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
monbasis_SigmaE.(TLC_stages{stage}) = monbasis_SigmaE_squared.(TLC_stages{stage}) * diag(ones(numel(technology.(TLC_stages{stage})),1));
% Plot the monomial estimate of the constant regression coefficient:
figure_name = ['Monomial estimate of the regression coefficient for the constant functional basis system - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(monbasis_estimated_beta_functional_data_object.(TLC_stages{stage}){1});
hold on
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel('Beta for the constant functional basis system','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Preallocate empty cell array for storing standard error estimates for the
% monomial regression coefficient functions estimated by FREGRESS:
monbasis_standard_error_estimates.(TLC_stages{stage}) = cell((numel(monbasis_estimated_beta_coefficients.(TLC_stages{stage})) - 1),1);
% Plot the estimate of the monomial regression function for the medoid and
% time normalised functional data objects generated for each patent
% indicator considered in the model (see page 134, section 9.4.1 of
% 'Functional Data Analysis with R and MATLAB.pdf'):
for i = 1:(numel(monbasis_estimated_beta_coefficients.(TLC_stages{stage})) - 1)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Select the corresponding matrix mapping the raw data vector 'y' to
% the coefficient vector 'c' of the basis function expansion of 'x'
% (see page 61, section 5.1 of 'Functional Data Analysis with R and
% MATLAB.pdf' for explanation of y2cMap):
current_y2cMap = patent_indicator_y2cMap.(TLC_stages{stage}){i};
% Compute the standard error estimates for monomial regression
% coefficient functions estimated by FREGRESS:
monbasis_standard_error_estimates.(TLC_stages{stage}){i+1} = fRegress_stderr(monbasis_functional_regression.(TLC_stages{stage}),current_y2cMap,monbasis_SigmaE.(TLC_stages{stage}));
% Extract the function curves which are plus and minus two times the
% standard error of the monomial regression coefficient function, to
% obtain the approximate regression function (i.e. beta) confidence
% bounds:
monbasis_beta_standard_error_estimates.(TLC_stages{stage}) = monbasis_standard_error_estimates.(TLC_stages{stage}){i+1}.betastderr;
current_beta_standard_error_estimate = monbasis_beta_standard_error_estimates.(TLC_stages{stage}){i+1};
% Plot the current monomial regression coefficient along with 95%
% confidence bounds:
figure_name = ['Monomial estimate of the regression coefficient for predicting group from ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(monbasis_estimated_beta_functional_data_object.(TLC_stages{stage}){i+1});
% line(monbasis_estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}+times(current_beta_standard_error_estimate,2));
% line(monbasis_estimated_beta_functional_data_object.(TLC_stages{stage}){i+1}-times(current_beta_standard_error_estimate,2));
% Resize y-axis to fit the confidence bounds curves:
ylim('auto');
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel(['Beta for ',patent_indicator_column_names{model_component}],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
% Calculate R-Squared, adjusted R-Squared, and F-ratio statistics to assess
% the fit of the monomial basis regression model (where
% 'error_sum_of_squares_Y' (or SSE_Y) is the total sum of squares for Y,
% and 'conbasis_error_sum_of_squares_residuals' (or SSE_RES) is the total
% sum of squares for residuals using the constant basis system for beta
% coefficients):
monbasis_error_sum_of_squares_residuals.(TLC_stages{stage}) = sum((technology_data_filtered.known_cluster_id.(TLC_stages{stage}) - monbasis_predicted_values.(TLC_stages{stage})).^2);
monbasis_R_squared.(TLC_stages{stage}) = (error_sum_of_squares_Y.(TLC_stages{stage}) - monbasis_error_sum_of_squares_residuals.(TLC_stages{stage})) / error_sum_of_squares_Y.(TLC_stages{stage});
monbasis_adjusted_R_squared.(TLC_stages{stage}) = 1 - ((1 - monbasis_R_squared.(TLC_stages{stage})) * ((numel(technology.(TLC_stages{stage})) - 1) / (numel(technology.(TLC_stages{stage})) - numel(model_indicator_subset) - 1)));
monbasis_degrees_freedom_1 = monbasis_functional_regression.(TLC_stages{stage}).df - 1;
monbasis_degrees_freedom_2 = numel(technology.(TLC_stages{stage})) - monbasis_functional_regression.(TLC_stages{stage}).df;
monbasis_F_ratio.(TLC_stages{stage}) = ((error_sum_of_squares_Y.(TLC_stages{stage}) - monbasis_error_sum_of_squares_residuals.(TLC_stages{stage})) / monbasis_degrees_freedom_1) / (monbasis_error_sum_of_squares_residuals.(TLC_stages{stage}) / monbasis_degrees_freedom_2);
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Apply Functional Principal Components analysis (fPCA) to the functional covariates to build alternative functional model
% This section adapts the functional principal components analysis (fPCA)
% applied in section 9.4.5 of 'Functional Data Analysis with R and
% MATLAB.pdf' to evaluate an alternative patent indicator model
% formulation.
% Delete any pre-existing 'fPCA_table' to prevent error if only running
% this section of the script:
clear fPCA_table.(TLC_stages{stage});
% Set smoothing parameter, lambda, to apply to principal components
% analysis functional parameter objects:
if signal_alignment == 1
fPCA_lambda.(TLC_stages{stage}) = 10^0;
else
fPCA_lambda.(TLC_stages{stage}) = 10^0;
end
% Set number of principal components to keep for model building:
fPCA_num_kept_components.(TLC_stages{stage}) = 4;
% Preallocate empty cell arrays and table for storing principal components
% analysis functional parameter objects and results:
fPCA_patent_indicator_functional_parameter_object.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
fPCA_patent_indicator.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
fPCA_harmonics.(TLC_stages{stage}) = cell(numel(model_indicator_subset),1);
fPCA_variable_names.(TLC_stages{stage}) = cell(numel(model_indicator_subset)*fPCA_num_kept_components.(TLC_stages{stage}),1);
% Perform Functional Principal Components Analysis on each patent
% indicator included in the model:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Build additional functional parameter object to use in the principal
% components analysis alongside the functional data objects previously
% created for the patent indicators included in the model:
if signal_alignment == 1
fPCA_patent_indicator_functional_parameter_object.(TLC_stages{stage}){i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,fPCA_lambda.(TLC_stages{stage}));
else
fPCA_patent_indicator_functional_parameter_object.(TLC_stages{stage}){i} = fdPar(patent_indicator_functional_basis_object.(TLC_stages{stage}){i},2,fPCA_lambda.(TLC_stages{stage}));
end
% Run functional principal components analysis with regularisation on
% current patent indicator functional data object (see help on 'pca_fd'
% for more details), keeping 4 of the principal components. For notes
% on the impact of the number of retained components (or harmonics) on
% the ability to capture important model features see section 7.4 of
% 'Functional Data Analysis with R and MATLAB.pdf':
fPCA_patent_indicator.(TLC_stages{stage}){i} = pca_fd(patent_indicator_FDO.(TLC_stages{stage}){i},fPCA_num_kept_components.(TLC_stages{stage}),fPCA_patent_indicator_functional_parameter_object.(TLC_stages{stage}){i});
fPCA_harmonics.(TLC_stages{stage}){i} = fPCA_patent_indicator.(TLC_stages{stage}){i}.harmfd;
% % Plot the principal components of the current patent indicator:
% figure_name = ['Principal components for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
% figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
% plot_pca_fd(fPCA_patent_indicator.(TLC_stages{stage}){i});
% hold on
%
% % Add title and X label to subplot:
% title(figure_name);
% xlabel('Normalised time (scaled)','FontSize',12)
%
% % Set plots to be invisible now, but visible when opened later:
% set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
%
% % Save figure:
% saveas(gcf,figure_name,'fig')
% hold off
% Plot the harmonics for the current patent indicator:
figure_name = ['Harmonics for ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
plot(fPCA_harmonics.(TLC_stages{stage}){i});
hold on
% Add title and X label to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
% Generate variable names for table of predictor variables:
fPCA_component_labels.(TLC_stages{stage}) = {'A','B','C','D','E','F','G','H','I','J'};
for j = 1:fPCA_num_kept_components.(TLC_stages{stage})
fPCA_variable_names.(TLC_stages{stage}){j+((i-1)*fPCA_num_kept_components.(TLC_stages{stage}))} = [fPCA_component_labels.(TLC_stages{stage}){i},num2str(j)];
end
% Add principal components analysis scores for the 4(??) kept
% components as values in a predictor variable table:
fPCA_table.(TLC_stages{stage})(1:numel(technology.(TLC_stages{stage})),(1:fPCA_num_kept_components.(TLC_stages{stage}))+((i-1)*fPCA_num_kept_components.(TLC_stages{stage}))) = array2table(fPCA_patent_indicator.(TLC_stages{stage}){i}.harmscr);
% Assemble string describing the components included for the
% current patent indicator (for use in building factor description
% for 'fitlm' - see below):
fPCA_components.(TLC_stages{stage}){i} = strjoin(fPCA_variable_names.(TLC_stages{stage})((1:fPCA_num_kept_components.(TLC_stages{stage}))+((i-1)*fPCA_num_kept_components.(TLC_stages{stage}))),' + ');
end
% Assign headings to table variables:
fPCA_table.(TLC_stages{stage}).Properties.VariableNames = fPCA_variable_names.(TLC_stages{stage});
% Append response variable Y as the last column in the table (taken as
% default in 'fitlm' to be the response variable):
fPCA_table.(TLC_stages{stage}).Responses = technology_data_filtered.known_cluster_id.(TLC_stages{stage});
% Construct string describing the factors to include in the linear
% regression model using 'Wilkinson Notation' (see Matlab reference page
% for fitlm for explanation of this notation):
fPCA_factor_equation.(TLC_stages{stage}) = ['Responses ~ (',strjoin(fPCA_components.(TLC_stages{stage}),') + ('),')'];
% Fit linear regression model based on the principal component scores
% stored in the predictor table (equivalent of 'pcamodel' on page 143,
% section 9.4.5 of 'Functional Data Analysis with R and MATLAB.pdf'):
fPCA_model.(TLC_stages{stage}) = fitlm(fPCA_table.(TLC_stages{stage}),fPCA_factor_equation.(TLC_stages{stage}));
% Extract functional principal component analysis coefficients for building
% linear regression model (equivalent of 'pcacoefs' on page 143, section
% 9.4.5 of 'Functional Data Analysis with R and MATLAB.pdf'):
fPCA_coefficients.(TLC_stages{stage}) = fPCA_model.(TLC_stages{stage}).Coefficients;
% Determine the variance of the coefficients from the fPCA coefficients
% (equivalent of 'coefvar' on page 143, section 9.4.5 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
fPCA_coefficient_variance.(TLC_stages{stage}) = fPCA_coefficients.(TLC_stages{stage}){:,2}.^2;
% Create the corresponding functional data objects for the fPCA regression
% functions:
for i = 1:numel(model_indicator_subset)
% Select the current component of the selected model indicator subset:
model_component = model_indicator_subset(i);
% Combine the fPCA coefficients with the fPCA harmonic functions for
% the current patent indicator (equivalent of 'betafd' on page 143,
% section 9.4.5 of 'Functional Data Analysis with R and MATLAB.pdf'):
for j = 1:(fPCA_num_kept_components.(TLC_stages{stage}))
fPCA_estimated_beta_functional_data_object_harmonics.(TLC_stages{stage}){i,j} = times(fPCA_coefficients.(TLC_stages{stage}){j+1+((i-1)*fPCA_num_kept_components.(TLC_stages{stage})),1},fPCA_harmonics.(TLC_stages{stage}){i}(j));
if j == 1
fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = fPCA_estimated_beta_functional_data_object_harmonics.(TLC_stages{stage}){i,j};
else
fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i} = fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i} + fPCA_estimated_beta_functional_data_object_harmonics.(TLC_stages{stage}){i,j};
end
end
% Combine the fPCA coefficient variances with the square of the fPCA
% harmonic functions for the current patent indicator (equivalent of
% 'betavar' on page 143, section 9.4.5 of 'Functional Data Analysis
% with R and MATLAB.pdf'):
for j = 1:(fPCA_num_kept_components.(TLC_stages{stage}))
fPCA_beta_variance_harmonics.(TLC_stages{stage}){i,j} = times(fPCA_coefficient_variance.(TLC_stages{stage})(j+1+((i-1)*fPCA_num_kept_components.(TLC_stages{stage}))),power(fPCA_harmonics.(TLC_stages{stage}){i}(j),2));
if j == 1
fPCA_beta_variance.(TLC_stages{stage}){i} = fPCA_beta_variance_harmonics.(TLC_stages{stage}){i,j};
else
fPCA_beta_variance.(TLC_stages{stage}){i} = fPCA_beta_variance.(TLC_stages{stage}){i} + fPCA_beta_variance_harmonics.(TLC_stages{stage}){i,j};
end
end
% Plot the current fPCA regression coefficient along with 95% confidence
% bounds:
figure_name = ['Estimated fPCA regression coefficient for predicting technology cluster from ',patent_indicator_column_names{model_component},' - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
plot(fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i});
line(fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i}+times(sqrt(fPCA_beta_variance.(TLC_stages{stage}){i}),2));
line(fPCA_estimated_beta_functional_data_object.(TLC_stages{stage}){i}-times(sqrt(fPCA_beta_variance.(TLC_stages{stage}){i}),2));
% Resize y-axis to fit the confidence bounds curves:
ylim('auto');
% Add title, X, and Y labels to subplot:
title(figure_name);
xlabel('Normalised time (scaled)','FontSize',12)
ylabel(['Beta for ',patent_indicator_column_names{model_component}],'FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
end
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Set figures generated to be visible again by default:
set(0,'DefaultFigureVisible','on')
%% Apply permutation testing to the regression functions estimated by the high-dimensional functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the high-dimensional functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the high-dimensional functional regression model (see help
% on 'Fperm_fd' for details, along with section 9.5 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Apply permutation testing to the regression functions estimated by the low-dimensional functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the low-dimensional functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the low-dimensional functional regression model (see help
% on 'Fperm_fd' for details, along with section 9.5 of 'Functional Data
% Analysis with R and MATLAB.pdf'):
low_dimensional_F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),low_dimensional_beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Apply permutation testing to the regression functions estimated by the constant basis system functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the constant basis system functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the constant basis system functional regression model (see
% help on 'Fperm_fd' for details, along with section 9.5 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
conbasis_F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),conbasis_beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()
%% Apply permutation testing to the regression functions estimated by the monomial basis system functional regression model in order to evaluate the F-statistic
% NB: THIS SECTION OF THE SCRIPT TAKES A LONG TIME TO RUN, SO ONLY RUN IT
% WHEN NEEDED!
% Generate figure for visual displaying the null distribution versus the
% qth quantile and observed F-statistic:
figure_name = ['Permutation F-Test and null distribution for the monomial basis system functional regression model - ',TLC_stages{stage}];
figure('Name',figure_name,'NumberTitle','off','OuterPosition', [1, 1, scrsz(3), scrsz(4)]);
hold on
% Count the proportion of permutation F values that are larger than the F
% statistic for the monomial basis system functional regression model (see
% help on 'Fperm_fd' for details, along with section 9.5 of 'Functional
% Data Analysis with R and MATLAB.pdf'):
monbasis_F_permutation_test.(TLC_stages{stage}) = Fperm_fd(technology_data_filtered.known_cluster_id.(TLC_stages{stage}),covariates.(TLC_stages{stage}),monbasis_beta_basis_systems.(TLC_stages{stage}),[],1000,[],0.95,1);
% Add title and Y label to plot:
title(figure_name);
ylabel('Frequency of F values','FontSize',12)
% Set plots to be invisible now, but visible when opened later:
set(gcf,'Visible','off','CreateFcn','set(gcf,''Visible'',''on'')')
% Save figure:
saveas(gcf,figure_name,'fig')
hold off
%% Save all variables to a MAT file:
save('functional_data_analysis.mat');
toc()