Skip to content

Commit

Permalink
Create general scroller
Browse files Browse the repository at this point in the history
  • Loading branch information
nigelnindodev committed Oct 18, 2023
1 parent 3e9bead commit 1baf93e
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 22 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"cSpell.words": [
"BETIKA",
"Millis",
"SPORTPESA",
"typeorm"
]
Expand Down
12 changes: 10 additions & 2 deletions src/core/scrapping/orbit/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { OrbitProvider } from "../../../bet_providers/orbit";
import { RedisSingleton } from "../../../datastores/redis";
import { PuppeteerPageLoadPolicy } from "../../../utils/types/common";
import { Result } from "../../../utils/types/result_type";
import { getHtmlForPage } from "../scrolling_scrapper";
import { getHtmlForScrollingPage } from "../scrolling_scrapper";

const {logger} = getConfig();

Expand Down Expand Up @@ -47,7 +47,15 @@ export class OrbitScrapper extends BaseScrapper {

logger.info("New request to fetch game events: ", metadata);

const getHtmlResult = await getHtmlForPage(browserInstance, game.url, PuppeteerPageLoadPolicy.LOAD);
const getHtmlResult = await getHtmlForScrollingPage(
browserInstance,
game.url,
PuppeteerPageLoadPolicy.LOAD,
".biab_body.contentWrap", // scrollingElementSelector
2000, // delayBeforeNextScrollAttemptMillis
30, // numScrollAttempts
150 // scrollDelta
);

if (getHtmlResult.result === "success") {
logger.info("Successfully fetched html for url. ", metadata);
Expand Down
41 changes: 21 additions & 20 deletions src/core/scrapping/scrolling_scrapper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@ import { getConfig } from '../../..';

const {logger} = getConfig();

export async function getHtmlForPage(
export async function getHtmlForScrollingPage(
browser: puppeteer.Browser,
url: string,
waitUntilPolicy: PuppeteerPageLoadPolicy
waitUntilPolicy: PuppeteerPageLoadPolicy,
scrollingElementSelector: string,
delayBeforeNextScrollAttemptMillis: number,
numScrollAttempts: number,
scrollDelta: number
): Promise<Result<SimpleWebPage, Error>> {
try {
const page1 = await browser.newPage();
await page1.setViewport({width: 1280, height: 720});
await page1.goto(url, {waitUntil: waitUntilPolicy});
await setTimeout(15000);
await getScrollContent(page1);
await getScrollContent(page1, scrollingElementSelector, delayBeforeNextScrollAttemptMillis, numScrollAttempts, scrollDelta);
const html = await page1.content();
return {result: "success", value: {html, forUrl: url}};
} catch (e: any) {
Expand All @@ -32,33 +36,30 @@ export async function getHtmlForPage(
* TODO: Move selector code to individual provider, but keep scroll behavior same across the board.
* @param page
*/
async function getScrollContent(page: puppeteer.Page): Promise<boolean> {
async function getScrollContent(
page: puppeteer.Page,
scrollingElementSelector: string,
delayBeforeNextScrollAttemptMillis: number,
numScrollAttempts: number,
scrollDelta: number): Promise<boolean> {
logger.trace("Running scroll down function");
const section = await page.$('.biab_body.contentWrap'); // find containing body of the content. In this case it's a <div class="biab_body contentWrap">
const section = await page.$(scrollingElementSelector); // find containing body of the content. In this case it's a <div class="biab_body contentWrap">
if (section !== null) {
logger.trace("Found section");

/**
* Using a set number of scrolls to fetch new content.
* Chose this method for simplicity, but a more advanced method
* would check for no changes in the dimensions of the bounding
* box to determine that no new content is available.
*/
const numScrolls = 30;
logger.info("Found scroll section");
let counter = 1;
const delayBetweenScrollsMills = 2000; // give time for the page to make AJAX call for new content.

for await (const value of setInterval(delayBetweenScrollsMills, numScrolls)) {
for await (const value of setInterval(delayBeforeNextScrollAttemptMillis, numScrollAttempts)) {
if (counter > value) {
break; // stop scrolling for new data
} else {
const boundingBox = await getBoundingBox(section);
scrollDown(page, boundingBox);
scrollDown(page, boundingBox, scrollDelta);
counter = counter + 1;
}
}
return true;
} else {
logger.trace("Failed to find section.");
logger.error("Failed to find scroll section.");
return false;
}
}
Expand All @@ -78,13 +79,13 @@ async function getBoundingBox(elementHandle: puppeteer.ElementHandle): Promise<p
}
}

async function scrollDown(page: puppeteer.Page, boundingBox: puppeteer.BoundingBox): Promise<void> {
async function scrollDown(page: puppeteer.Page, boundingBox: puppeteer.BoundingBox, scrollDelta: number): Promise<void> {
// move mouse to the center of the element to be scrolled
page.mouse.move(
boundingBox.x + boundingBox.width / 2,
boundingBox.y + boundingBox.height / 2
);

// use the mouse scroll wheel to to scroll. Change scroll down delta according to your needs.
await page.mouse.wheel({deltaY: 300});
await page.mouse.wheel({deltaY: scrollDelta});
}
71 changes: 71 additions & 0 deletions src/testbed/build_scrapers/orbit/three_way/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
//@ts-ignore
import * as cheerio from "cheerio";

import { getConfig } from "../../../..";
import { readFileAsync } from "../../../../utils/file_system";

const {logger} = getConfig();

class OrbitThreeWayTestBed {
public async run() {
const htmlDataResult = await readFileAsync("data/test_html/orbit/football.html");
if (htmlDataResult.result === "success") {
logger.info("Success fetching html");
const $ = cheerio.load(htmlDataResult.value);


/**
* Bypassing .biab_group-markets-table div and going directly to .rowContainer
*/
$("div.rowsContainer").each((_,element) => {
const data = $(element).find("div.biab_group-markets-table-row");

data.each((_, element_1) => {
const teamNames = $(element_1).find("div > div.biab_market-title-team-names");
const clubA = $(teamNames).find("p:nth-child(1)").text().trim();
const clubB = $(teamNames).find("p:nth-child(2)").text().trim();

const numBets = $(element_1).find("div > span.cursor-help").text().trim();

const oddsWrapper = $(element_1).find("div.styles_betContent__wrapper__25jEo");

//const oddFinder = "div.styles_contents__Kf8LQ > button > span > div > span.styles_betOdds__bxapE";

const odds = $(oddsWrapper).find("div.styles_contents__Kf8LQ > button > span > div > span.styles_betOdds__bxapE");

// expecting 3 pairs of odds for W,D,L. So 6 in total
const oddsArray = [];
odds.each((_, element_2) => {
oddsArray.push($(element_2).text().trim());
logger.trace($(element_2).text().trim());
});


logger.trace(`${clubA} vs ${clubB}`);
logger.trace("numBets: ", numBets);
//logger.trace("odd1: ", $(oddsWrapper).find(`${oddFinder}:nth-child(1)`));
});
});
} else {
const message = "Could not get html data";
throw new Error(message);
}
}

/**
* Remove games with missing odds data.
* @param oddsArray
*/
isValid(oddsArray: string[]): boolean {
let missingOddFound = false;
oddsArray.forEach(odd => {
if (odd === ""){
missingOddFound = true;
}
})
return !missingOddFound;
}
}

const testBed = new OrbitThreeWayTestBed();
testBed.run();

0 comments on commit 1baf93e

Please sign in to comment.