Skip to content

Commit

Permalink
chore: migrate e2e tests to new site (#247)
Browse files Browse the repository at this point in the history
* chore: migrate e2e tests to new site

* chore: bane of my existence: >=
  • Loading branch information
vladfrangu authored Oct 31, 2023
1 parent 26b10da commit dddf6db
Show file tree
Hide file tree
Showing 9 changed files with 237 additions and 159 deletions.
2 changes: 1 addition & 1 deletion test/e2e/cheerio-ignoreSslErrors-disabled/test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ await expect(stats.requestsFinished > 5 && stats.requestsFinished < 10, 'All req
await expect(stats.requestsFailed > 20 && stats.requestsFailed < 30, 'Number of failed requests');

const datasetItems = await getDatasetItems(testDir);
await expect(datasetItems.length > 5 && datasetItems.length < 10, 'Number of dataset items');
await expect(datasetItems.length >= 5 && datasetItems.length < 10, 'Number of dataset items');
await expect(validateDataset(datasetItems, ['url', 'title']), 'Dataset items validation');

process.exit(0);
42 changes: 26 additions & 16 deletions test/e2e/cheerio-page-info/test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ process.exit = () => {};

await run(testDir, 'cheerio-scraper', {
startUrls: [{
url: 'https://apify.com/apify',
url: 'https://warehouse-theme-metal.myshopify.com/collections/all-tvs',
method: 'GET',
userData: { label: 'START' },
}],
keepUrlFragments: false,
pseudoUrls: [{
purl: 'https://apify.com/apify/web-scraper',
purl: 'https://warehouse-theme-metal.myshopify.com/products/sony-xbr-65x950g-65-class-64-5-diag-bravia-4k-hdr-ultra-hd-tv',
method: 'GET',
userData: { label: 'DETAIL' },
}],
Expand All @@ -31,19 +31,30 @@ await run(testDir, 'cheerio-scraper', {
log.info(`Scraping ${url}`);
await skipLinks();

const uniqueIdentifier = url.split('/').slice(-2).join('/');
const title = $('header h1').text();
const description = $('div.Section-body > div > p').text();
const modifiedDate = $('div:nth-of-type(2) > ul > li:nth-of-type(3)').text();
const runCount = $('div:nth-of-type(2) > ul > li:nth-of-type(2)').text();
const urlPart = url.split('/').slice(-1); // ['sennheiser-mke-440-professional-stereo-shotgun-microphone-mke-440']
const manufacturer = urlPart[0].split('-')[0]; // 'sennheiser'
const title = $('.product-meta h1').text();
const sku = $('span.product-meta__sku-number').text();

const rawPrice = $('span.price')
.filter((_, el) => $(el).text().includes('$'))
.first()
.text()
.split('$')[1];
const price = Number(rawPrice.replaceAll(',', ''));

const inStock = $('span.product-form__inventory')
.first()
.filter((_, el) => $(el).text().includes('In stock'))
.length !== 0;

return {
url,
uniqueIdentifier,
manufacturer,
title,
description,
modifiedDate,
runCount,
sku,
currentPrice: price,
availableInStock: inStock,
};
}
},
Expand All @@ -66,12 +77,11 @@ await expect(
datasetItems,
[
'url',
'manufacturer',
'title',
'uniqueIdentifier',
'description',
// Skip modifiedAt and runCount since they changed
// 'modifiedDate',
// 'runCount',
'sku',
'currentPrice',
'availableInStock',
],
),
'Dataset items validation',
Expand Down
35 changes: 19 additions & 16 deletions test/e2e/jsdom-page-info/test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ process.exit = () => {};

await run(testDir, 'jsdom-scraper', {
startUrls: [{
url: 'https://apify.com/apify',
url: 'https://warehouse-theme-metal.myshopify.com/collections/all-tvs',
method: 'GET',
userData: { label: 'START' },
}],
keepUrlFragments: false,
pseudoUrls: [{
purl: 'https://apify.com/apify/web-scraper',
purl: 'https://warehouse-theme-metal.myshopify.com/products/sony-xbr-65x950g-65-class-64-5-diag-bravia-4k-hdr-ultra-hd-tv',
method: 'GET',
userData: { label: 'DETAIL' },
}],
Expand All @@ -32,19 +32,23 @@ await run(testDir, 'jsdom-scraper', {
log.info(`Scraping ${url}`);
await skipLinks();

const uniqueIdentifier = url.split('/').slice(-2).join('/');
const title = document.querySelector('header h1').textContent;
const description = document.querySelector('div.Section-body > div > p').textContent;
const modifiedDate = document.querySelector('div:nth-of-type(2) > ul > li:nth-of-type(3)').textContent;
const runCount = document.querySelector('div:nth-of-type(2) > ul > li:nth-of-type(2)').textContent;
const urlPart = url.split('/').slice(-1); // ['sennheiser-mke-440-professional-stereo-shotgun-microphone-mke-440']
const manufacturer = urlPart[0].split('-')[0]; // 'sennheiser'
const title = document.querySelector('.product-meta h1').textContent;
const sku = document.querySelector('span.product-meta__sku-number').textContent;

const rawPrice = document.querySelector('span.price').textContent.split('$')[1];
const price = Number(rawPrice.replaceAll(',', ''));

const inStock = document.querySelector('span.product-form__inventory').textContent.includes('In stock');

return {
url,
uniqueIdentifier,
manufacturer,
title,
description,
modifiedDate,
runCount,
sku,
currentPrice: price,
availableInStock: inStock,
};
}
},
Expand All @@ -68,12 +72,11 @@ await expect(
datasetItems,
[
'url',
'manufacturer',
'title',
'uniqueIdentifier',
'description',
// Skip modifiedAt and runCount since they changed
// 'modifiedDate',
// 'runCount',
'sku',
'currentPrice',
'availableInStock',
],
),
'Dataset items validation',
Expand Down
60 changes: 33 additions & 27 deletions test/e2e/puppeteer-page-info/test.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import { getTestDir, getStats, getDatasetItems, run, expect, validateDataset, skipTest } from '../tools.mjs';

skipTest('Unstable test in CI, locally it works if your internet and machine is fast enough.');
import { getTestDir, getStats, getDatasetItems, run, expect, validateDataset } from '../tools.mjs';

const testDir = getTestDir(import.meta.url);

Expand All @@ -9,12 +7,12 @@ process.exit = () => {};

await run(testDir, 'puppeteer-scraper', {
startUrls: [{
url: 'https://apify.com/store',
url: 'https://warehouse-theme-metal.myshopify.com/collections/all-tvs',
method: 'GET',
userData: { label: 'START' },
}],
pseudoUrls: [{
purl: 'https://apify.com/apify/web-scraper',
purl: 'https://warehouse-theme-metal.myshopify.com/products/sony-xbr-65x950g-65-class-64-5-diag-bravia-4k-hdr-ultra-hd-tv',
method: 'GET',
userData: { label: 'DETAIL' },
}],
Expand All @@ -32,26 +30,35 @@ await run(testDir, 'puppeteer-scraper', {
log.info(`Scraping ${url}`);
await skipLinks();

const uniqueIdentifier = url.split('/').slice(-2).join('/');
const urlPart = url.split('/').slice(-1); // ['sennheiser-mke-440-professional-stereo-shotgun-microphone-mke-440']
const manufacturer = urlPart[0].split('-')[0]; // 'sennheiser'

const titleP = page.$eval('header h1', ((el) => el.textContent));
const descriptionP = page.$eval('div.Section-body > div > p', ((el) => el.textContent));
const modifiedTimestampP = page.$eval('div:nth-of-type(2) > ul > li:nth-of-type(3)', (el) => el.textContent);
const runCountTextP = page.$eval('div:nth-of-type(2) > ul > li:nth-of-type(2)', ((el) => el.textContent));
const title = await page.locator('.product-meta h1').map((el) => el.textContent).wait();
const sku = await page.locator('span.product-meta__sku-number').map((el) => el.textContent).wait();

const [
title,
description,
modifiedTimestamp,
runCountText,
] = await Promise.all([
titleP,
descriptionP,
modifiedTimestampP,
runCountTextP,
]);
const rawPriceString = await page
.locator('span.price')
.filter((el) => el.textContent.includes('$'))
.map((el) => el.textContent)
.wait();

const rawPrice = rawPriceString.split('$')[1];
const price = Number(rawPrice.replaceAll(',', ''));

return { url, uniqueIdentifier, title, description, modifiedDate: modifiedTimestamp, runCount: runCountText };
const inStock = await page
.locator('span.product-form__inventory')
.filter((el) => el.textContent.includes('In stock'))
.map((el) => (!!el))
.wait();

return {
url,
manufacturer,
title,
sku,
currentPrice: price,
availableInStock: inStock,
};
}
},
proxyConfiguration: { useApifyProxy: false },
Expand All @@ -78,12 +85,11 @@ await expect(
datasetItems,
[
'url',
'manufacturer',
'title',
'uniqueIdentifier',
'description',
// Skip modifiedAt and runCount since they changed
// 'modifiedDate',
// 'runCount',
'sku',
'currentPrice',
'availableInStock',
],
),
'Dataset items validation',
Expand Down
71 changes: 47 additions & 24 deletions test/e2e/puppeteer-store-pagination-jquery/test.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import { getTestDir, getStats, getDatasetItems, run, expect, validateDataset, skipTest } from '../tools.mjs';

skipTest('Unstable test in CI, locally it works if your internet and machine is fast enough.');
import { getTestDir, getStats, getDatasetItems, run, expect, validateDataset } from '../tools.mjs';

const testDir = getTestDir(import.meta.url);

Expand All @@ -9,7 +7,7 @@ process.exit = () => {};

await run(testDir, 'puppeteer-scraper', {
startUrls: [{
url: 'https://apify.com/store',
url: 'https://warehouse-theme-metal.myshopify.com/collections/all-tvs',
method: 'GET',
userData: { label: 'START' },
}],
Expand All @@ -23,16 +21,16 @@ await run(testDir, 'puppeteer-scraper', {

async function handleStart({ log, page, enqueueLinks }) {
log.info('Store opened!');
const nextButtonSelector = '[data-test="pagination-button-next"]:not([disabled])';
// enqueue actor details from the first three pages of the store
for (let pageNo = 1; pageNo <= 3; pageNo++) {
const nextButtonSelector = '.pagination__next';
// enqueue product details from the first three pages of the store
for (let pageNo = 1; pageNo < 3; pageNo++) {
// Wait for network events to finish
await page.waitForNetworkIdle();
// Enqueue all loaded links
await enqueueLinks({
selector: 'div.ActorStore-main div > a',
selector: 'a.product-item__image-wrapper',
label: 'DETAIL',
globs: [{ glob: 'https://apify.com/*/*' }],
globs: ['https://warehouse-theme-metal.myshopify.com/*/*'],
});
log.info(`Enqueued actors for page ${pageNo}`);
log.info('Loading the next page');
Expand All @@ -43,18 +41,44 @@ await run(testDir, 'puppeteer-scraper', {
async function handleDetail({ request: { url }, log, page, injectJQuery }) {
log.info(`Scraping ${url}`);
await injectJQuery();
const uniqueIdentifier = url.split('/').slice(-2).join('/');
const results = await page.evaluate(() => ({
title: $('header h1').text(),
description: $('div.Section-body > div > p').text(),
modifiedDate: $('div:nth-of-type(2) > ul > li:nth-of-type(3)').text(),
runCount: $('div:nth-of-type(2) > ul > li:nth-of-type(2)').text(),
}));

return { url, uniqueIdentifier, ...results };

const urlPart = url.split('/').slice(-1); // ['sennheiser-mke-440-professional-stereo-shotgun-microphone-mke-440']
const manufacturer = urlPart[0].split('-')[0]; // 'sennheiser'

/* eslint-disable no-undef */
const results = await page.evaluate(() => {
const rawPrice = $('span.price')
.filter((_, el) => $(el).text().includes('$'))
.first()
.text()
.split('$')[1];

const price = Number(rawPrice.replaceAll(',', ''));

const inStock = $('span.product-form__inventory')
.first()
.filter((_, el) => $(el).text().includes('In stock'))
.length !== 0;

return {
title: $('.product-meta h1').text(),
sku: $('span.product-meta__sku-number').text(),
currentPrice: price,
availableInStock: inStock,
};
});

return { url, manufacturer, ...results };
}
},
preNavigationHooks: "[\n ({ session, request }, goToOptions) => {\n session?.setCookies([{ name: 'OptanonAlertBoxClosed', value: new Date().toISOString() }], request.url);\n goToOptions.waitUntil = ['networkidle2'];\n }\n]",
preNavigationHooks: `[
async ({ page }, goToOptions) => {
await page.evaluateOnNewDocument(() => {
localStorage.setItem('themeExitPopup', 'true');
});
goToOptions.waitUntil = ['networkidle2'];
}
]`,
proxyConfiguration: { useApifyProxy: false },
proxyRotation: 'RECOMMENDED',
useChrome: false,
Expand All @@ -80,12 +104,11 @@ await expect(
datasetItems,
[
'url',
'manufacturer',
'title',
'uniqueIdentifier',
'description',
// Skip modifiedAt and runCount since they changed
// 'modifiedDate',
// 'runCount',
'sku',
'currentPrice',
'availableInStock',
],
),
'Dataset items validation',
Expand Down
Loading

0 comments on commit dddf6db

Please sign in to comment.