Skip to content

Commit

Permalink
Add scrollable content
Browse files Browse the repository at this point in the history
  • Loading branch information
nigelnindodev committed Oct 17, 2023
1 parent ca1d9e7 commit 3e9bead
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 10 deletions.
14 changes: 14 additions & 0 deletions src/bet_providers/orbit.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { BetProvider } from ".";
import { BetProviders, Games } from "../utils/types/common";

export class OrbitProvider extends BetProvider {
constructor() {
super(BetProviders.ORBIT, "src/config/orbit.json");
}

override getSupportedGames(): Games[] {
return [
Games.FOOTBALL
]
}
}
5 changes: 5 additions & 0 deletions src/config/betika.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
"name": "Basketball",
"betType": "Three Way",
"url": "https://www.betika.com/lite/en-ke/?sport_id=30&tag_id=&tab_id=-2"
},
{
"name": "Football",
"betType": "Three Way",
"url": "https://www.betika.com/lite/en-ke/?tab_id=-2&page=9&sub_type_id=1,186&sport_id=14"
}
]
}
10 changes: 10 additions & 0 deletions src/config/orbit.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"version": "1.0.0",
"games": [
{
"name": "Football",
"betType": "Three Way",
"url": "https://www.orbitxch.com/customer/sport/1"
}
]
}
7 changes: 6 additions & 1 deletion src/core/scrapping/betika/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ export class BetikaScrapper extends BaseScrapper {
this.scrapeIntervalDuration = 10000;
}

/**
* Fetches data from Betika. Data is per sport from the lite version of the website,
* which is stored as pages, so fetching contains a mechanisms for scrolling to new pages.
* @returns
*/
public async fetchData(): Promise<Result<boolean, Error>> {
const getBetProviderConfigResult = await this.betProvider.getConfig();

Expand Down Expand Up @@ -55,7 +60,7 @@ export class BetikaScrapper extends BaseScrapper {
const getHtmlResult = await getHtmlForPage(browserInstance, completedUrl, PuppeteerPageLoadPolicy.DOM_CONTENT_LOADED);

if (getHtmlResult.result === "success") {
logger.info("Successfully fetched html for url", metadata);
logger.info("Successfully fetched html for url. ", metadata);
if (this.pageHasNoGameEvents(getHtmlResult.value.html)) {
logger.info("No game events found. Stopping HTML fetch for current game.", metadata);
break;
Expand Down
73 changes: 73 additions & 0 deletions src/core/scrapping/orbit/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { BaseScrapper } from "..";
import { getConfig } from "../../..";
import { BetProvider } from "../../../bet_providers";
import { OrbitProvider } from "../../../bet_providers/orbit";
import { RedisSingleton } from "../../../datastores/redis";
import { PuppeteerPageLoadPolicy } from "../../../utils/types/common";
import { Result } from "../../../utils/types/result_type";
import { getHtmlForPage } from "../scrolling_scrapper";

const {logger} = getConfig();

export class OrbitScrapper extends BaseScrapper {
public override betProvider: BetProvider;
public override scrapeIntervalDuration: number;

constructor() {
super();
this.betProvider = new OrbitProvider();
this.scrapeIntervalDuration = 10000;
}

/**
* Fetch data from Orbit, which actually mirrors BetFair data. Data is fetched per sport, which
* is available as an infinite scrolling list.
* @returns
*/
public async fetchData(): Promise<Result<boolean, Error>> {
const getBetProviderConfigResult = await this.betProvider.getConfig();

if (getBetProviderConfigResult.result === "error") {
logger.error("Failed to get config for provider: ", this.betProvider);
return getBetProviderConfigResult;
}

const getRedisPublisherResult = await RedisSingleton.getPublisher();

if (getRedisPublisherResult.result === "success") {
const betProviderConfig = getBetProviderConfigResult.value;
const browserInstance = await this.initializeBrowserInstance();

const result = betProviderConfig.games.map(async game => {
const metadata = {
betProviderName: this.betProvider.name,
game: game.name,
url: game.url
};

logger.info("New request to fetch game events: ", metadata);

const getHtmlResult = await getHtmlForPage(browserInstance, game.url, PuppeteerPageLoadPolicy.LOAD);

if (getHtmlResult.result === "success") {
logger.info("Successfully fetched html for url. ", metadata);
logger.info(getHtmlResult.value.html);
} else {
logger.error("An error occurred while fetching html for page", metadata);
}

return undefined;
});

await Promise.all(result);
await browserInstance.close();

return {
result: "success",
value: true
};
} else {
return getRedisPublisherResult;
}
}
}
90 changes: 90 additions & 0 deletions src/core/scrapping/scrolling_scrapper/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { setTimeout, setInterval } from "timers/promises";

import * as puppeteer from 'puppeteer';

import { PuppeteerPageLoadPolicy, SimpleWebPage } from '../../../utils/types/common';
import { Result } from '../../../utils/types/result_type';
import { getConfig } from '../../..';

const {logger} = getConfig();

export async function getHtmlForPage(
browser: puppeteer.Browser,
url: string,
waitUntilPolicy: PuppeteerPageLoadPolicy
): Promise<Result<SimpleWebPage, Error>> {
try {
const page1 = await browser.newPage();
await page1.setViewport({width: 1280, height: 720});
await page1.goto(url, {waitUntil: waitUntilPolicy});
await setTimeout(15000);
await getScrollContent(page1);
const html = await page1.content();
return {result: "success", value: {html, forUrl: url}};
} catch (e: any) {
const message = `An exception occurred while fetching data from scrolling page for url | ${url}`;
logger.error(message, e.message);
return {result: "error", value: new Error(e.message)};
}
}

/**
* TODO: Move selector code to individual provider, but keep scroll behavior same across the board.
* @param page
*/
async function getScrollContent(page: puppeteer.Page): Promise<boolean> {
logger.trace("Running scroll down function");
const section = await page.$('.biab_body.contentWrap'); // find containing body of the content. In this case it's a <div class="biab_body contentWrap">
if (section !== null) {
logger.trace("Found section");

/**
* Using a set number of scrolls to fetch new content.
* Chose this method for simplicity, but a more advanced method
* would check for no changes in the dimensions of the bounding
* box to determine that no new content is available.
*/
const numScrolls = 30;
let counter = 1;
const delayBetweenScrollsMills = 2000; // give time for the page to make AJAX call for new content.

for await (const value of setInterval(delayBetweenScrollsMills, numScrolls)) {
if (counter > value) {
break; // stop scrolling for new data
} else {
const boundingBox = await getBoundingBox(section);
scrollDown(page, boundingBox);
}
}
return true;
} else {
logger.trace("Failed to find section.");
return false;
}
}

/**
* Get the bounding box for the element to be scrolled.
* @param elementHandle
* @returns
*/
async function getBoundingBox(elementHandle: puppeteer.ElementHandle): Promise<puppeteer.BoundingBox> {
const boundingBox = await elementHandle.boundingBox();
if (boundingBox !== null) {
logger.trace(boundingBox);
return boundingBox;
} else {
throw new Error("Failed to find bounding box for provided element");
}
}

async function scrollDown(page: puppeteer.Page, boundingBox: puppeteer.BoundingBox): Promise<void> {
// move mouse to the center of the element to be scrolled
page.mouse.move(
boundingBox.x + boundingBox.width / 2,
boundingBox.y + boundingBox.height / 2
);

// use the mouse scroll wheel to to scroll. Change scroll down delta according to your needs.
await page.mouse.wheel({deltaY: 300});
}
4 changes: 2 additions & 2 deletions src/core/scrapping/simple_scraper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ export async function getHtmlForPage(
try {
const page1 = await browser.newPage();
await page1.goto(url, {waitUntil: waitUntilPolicy});
await setTimeout(3000); // wait for some time before fetching content
await setTimeout(15000); // wait for some time before fetching content
const html = await page1.content();
await page1.close();
return {result: "success", value: {html, forUrl: url}};
} catch (e: any) {
const message = `An exception occurred while fetching simple web page for url | ${url}`
const message = `An exception occurred while fetching simple web page for url | ${url}`;
logger.error(message, e.message);
return {result: "error", value: new Error(e.message)};
}
Expand Down
Empty file.
8 changes: 2 additions & 6 deletions src/testbed/testbed.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import { BetikaProvider } from "../bet_providers/betika";
import { BetikaScrapper } from "../core/scrapping/betika";
import { OrbitScrapper } from "../core/scrapping/orbit";

const betikaProvider = new BetikaProvider();
betikaProvider.getConfig();

const betikaScrapper = new BetikaScrapper();
const betikaScrapper = new OrbitScrapper();
betikaScrapper.fetchData();
3 changes: 2 additions & 1 deletion src/utils/types/common/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export enum BetProviders {
BETIKA = "BETIKA",
SPORTPESA = "SPORTPESA"
SPORTPESA = "SPORTPESA",
ORBIT = "ORBIT"
}

export enum Games {
Expand Down

0 comments on commit 3e9bead

Please sign in to comment.