To Exceed The Importxml Limit On Google Spreadsheet
Solution 1:
I created a custom import function that overcomes all limits of IMPORTXML I have a sheet using this in about 800 cells and it works great.
It makes use of Google Sheet’s custom scripts (Tools > Script editor…) and searches through content using regex instead of xpath.
functionimportRegex(url, regexInput) {
var output = '';
var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
if (fetchedUrl) {
var html = fetchedUrl.getContentText();
if (html.length && regexInput.length) {
output = html.match(newRegExp(regexInput, 'i'))[1];
}
}
// Grace period to not overloadUtilities.sleep(1000);
return output;
}
You can then use this function like any function.
=importRegex("https://example.com", "<title>(.*)<\/title>")
Of course, you can also reference cells.
=importRegex(A2, "<title>(.*)<\/title>")
If you don’t want to see HTML entities in the output, you can use this function.
var htmlEntities = {
nbsp: ' ',
cent: '¢',
pound: '£',
yen: '¥',
euro: '€',
copy: '©',
reg: '®',
lt: '<',
gt: '>',
mdash: '–',
ndash: '-',
quot: '"',
amp: '&',
apos: '\''
};
functionunescapeHTML(str) {
return str.replace(/\&([^;]+);/g, function (entity, entityCode) {
var match;
if (entityCode in htmlEntities) {
return htmlEntities[entityCode];
} elseif (match = entityCode.match(/^#x([\da-fA-F]+)$/)) {
returnString.fromCharCode(parseInt(match[1], 16));
} elseif (match = entityCode.match(/^#(\d+)$/)) {
returnString.fromCharCode(~~match[1]);
} else {
return entity;
}
});
};
All together…
functionimportRegex(url, regexInput) {
var output = '';
var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
if (fetchedUrl) {
var html = fetchedUrl.getContentText();
if (html.length && regexInput.length) {
output = html.match(newRegExp(regexInput, 'i'))[1];
}
}
// Grace period to not overloadUtilities.sleep(1000);
return unescapeHTML(output);
}
var htmlEntities = {
nbsp: ' ',
cent: '¢',
pound: '£',
yen: '¥',
euro: '€',
copy: '©',
reg: '®',
lt: '<',
gt: '>',
mdash: '–',
ndash: '-',
quot: '"',
amp: '&',
apos: '\''
};
functionunescapeHTML(str) {
return str.replace(/\&([^;]+);/g, function (entity, entityCode) {
var match;
if (entityCode in htmlEntities) {
return htmlEntities[entityCode];
} elseif (match = entityCode.match(/^#x([\da-fA-F]+)$/)) {
returnString.fromCharCode(parseInt(match[1], 16));
} elseif (match = entityCode.match(/^#(\d+)$/)) {
returnString.fromCharCode(~~match[1]);
} else {
return entity;
}
});
};
Solution 2:
There is no such script to exceed the limits. Since the code is run on a Google machine (server) you can not cheat. Some limits are bind to your spreadsheet, so you could try to use multiple spreadsheets, if that helps.
Post a Comment for "To Exceed The Importxml Limit On Google Spreadsheet"