% This script loops through the 'htmlTableToCell.m' script in order to
% quickly convert and save a batch of Orbit patent data files (saved as
% html tables) into .MAT files
clearvars
clear all
close all
tic()
% Request the user to choose the current working directory (NB: if no
% folder is selected in the user interface then the current working
% directory is taken to be the filepath) OR do not ask the user and just
% take the current directory:
% filepath = uigetdir;
filepath = pwd;
% Identify the name of the selected working directory:
[upperPath,deepestFolder] = fileparts(filepath);
% Identify html files in the selected working directory that match the name
% of the folder:
files = dir('*.html');
% Use first column header to identify the start of the table:
table.idTableBy.plaintextInFirstTD = 'Family Accession Nbr';
% Iterate through input .html files in the current folder directory:
for i = 1:numel(files)
% for i = 6:numel(files)
% for i = 1:3
% Reset cell array:
clearvars cell_array
% Select the next html file to convert:
name = files(i).name;
% Identify the record numbers included in each html file (NB: '(\d*)'
% selects any number of digits at this point in the string):
batch_limits = regexp(name,strcat(deepestFolder,' patents \(Orbit search results (\d*) - (\d*)\).html'),'tokens');
% Construct the variable name for this specific cell array:
cell_array_name = [strrep(lower(deepestFolder),' ','_'),'_patents_',batch_limits{1,1}{1,1},'_',batch_limits{1,1}{1,2}];
% Call the 'htmlTableToCell.m' script for the current html file:
cell_array = htmlTableToCell(name,table);
% Save the current cell array to a .MAT file:
% savefile = [deepestFolder,' patent data ',batch_limits {1,1}{1,1},' - ',batch_limits{1,1}{1,2},' test','.mat'];
savefile = [deepestFolder,' patent data ',batch_limits {1,1}{1,1},' - ',batch_limits{1,1}{1,2},' ',date,'.mat'];
save(savefile,'cell_array');
toc()
end
toc()