I am using axios to send a get request on google scholar. With cheerio I access the data. When the title is too long I get: title: 'Comparison of live-birth defects after luteal-phase ovarian stimulation vs. conventional ovarian stimulation for in vitro fertilization and vitrified embryo transfer�…'.
This is the code:
const free_proxy_url1 = "https://free-proxy-list.net";
request(free_proxy_url1, (err, response, body) => {
let $ = cheerio.load(body);
let ipResults = [];
$(".table-responsive > div > table > tbody > tr").each(
(i, el) => {
if ($(el).find("td:nth-child(7)").text() === "yes")
ipResults.push({
ip: $(el).find("td:nth-child(1)").text(),
port: Number($(el).find("td:nth-child(2)").text()),
https: $(el).find("td:nth-child(7)").text(),
});
}
);
let rand = Math.floor(Math.random() * ipResults.length);
let searchTerm = "AI";
const proxy = {
host: ipResults[rand].ip,
port: ipResults[rand].port,
};
axios
.get(
`https://scholar.google.com/scholar?q=${searchTerm}`,
proxy
)
.then(result => {
const $ = cheerio.load(result.data);
$("div.gs_ri").each((i, el) => {
const yearElement = $(el).find("div.gs_a");
const yearText = yearElement.text().match(/\d{4}/);
const titleElement = $(el).find("h3.gs_rt a");
scholar_results.push({
title: titleElement.text().trim(),
link: $(el).find(".gs_rt a").attr("href"),
year: yearText ? parseInt(yearText[0]) : null,
});
});
})
.catch(err => {
console.log(err);
});
});
Is there any way of getting the full title and not the truncated one?
Via Active questions tagged javascript - Stack Overflow https://ift.tt/UjF2Rak
Comments
Post a Comment