Skip to content

Commit

Permalink
MWPW-144478 - Bulk Preview & Extract .md files for the documents unde…
Browse files Browse the repository at this point in the history
…r a Graybox experience (#5)

- Initial PR for Bulk Preview and extracting .md files
  • Loading branch information
arshadparwaiz authored Mar 14, 2024
2 parents d87f121 + 554a1ff commit 8dbb839
Show file tree
Hide file tree
Showing 5 changed files with 245 additions and 4 deletions.
3 changes: 3 additions & 0 deletions actions/appConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ class AppConfig {
this.configMap.certPassword = params.certPassword;
this.configMap.certKey = params.certKey;
this.configMap.certThumbprint = params.certThumbprint;
this.configMap.enablePreview = this.getJsonFromStr(params.enablePreview, []);
this.configMap.helixAdminApiKeys = this.getJsonFromStr(params.helixAdminApiKeys);
this.configMap.bulkPreviewCheckInterval = parseInt(params.bulkPreviewCheckInterval || '30', 10);
this.configMap.maxBulkPreviewChecks = parseInt(params.maxBulkPreviewChecks || '30', 10);
this.configMap.groupCheckUrl = params.groupCheckUrl || 'https://graph.microsoft.com/v1.0/groups/{groupOid}/members?$count=true';
this.configMap.grayboxUserGroups = this.getJsonFromStr(params.grayboxUserGroups, []);
this.configMap.ignoreUserCheck = (params.ignoreUserCheck || '').trim().toLowerCase() === 'true';
Expand Down
33 changes: 31 additions & 2 deletions actions/graybox/promote-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@
* from Adobe.
************************************************************************* */

const { getAioLogger, isFilePatternMatched, toUTCStr } = require('../utils');
const { getAioLogger, handleExtension, logMemUsage, delay, isFilePatternMatched, toUTCStr } = require('../utils');
const appConfig = require('../appConfig');
const { getConfig } = require('../config');
const { getAuthorizedRequestOption, fetchWithRetry, updateExcelTable } = require('../sharepoint');
const { getAuthorizedRequestOption, fetchWithRetry, updateExcelTable, bulkCreateFolders } = require('../sharepoint');
const helixUtils = require('../helixUtils');
const sharepointAuth = require('../sharepointAuth');

const logger = getAioLogger();
const MAX_CHILDREN = 1000;
const IS_GRAYBOX = true;
const BATCH_REQUEST_PREVIEW = 200;
const DELAY_TIME_COPY = 3000;

async function main(params) {
logger.info('Graybox Promote Worker invoked');
Expand All @@ -47,6 +51,31 @@ async function main(params) {
logger.info(`Files in graybox folder in ${experienceName}`);
logger.info(JSON.stringify(gbFiles));


// create batches to process the data
const batchArray = [];
for (let i = 0; i < gbFiles.length; i += BATCH_REQUEST_PREVIEW) {
const arrayChunk = gbFiles.slice(i, i + BATCH_REQUEST_PREVIEW);
batchArray.push(arrayChunk);
}

// process data in batches
const previewStatuses = [];

if (helixUtils.canBulkPreview()) {
const paths = [];

batchArray.forEach((batch) => {
batch.forEach((gbFile) => paths.push(handleExtension(gbFile.filePath)));
});

previewStatuses.push(await helixUtils.bulkPreview(paths, helixUtils.getOperations().PREVIEW, experienceName));

logger.info(`Preview Statuses >> ${JSON.stringify(previewStatuses)}`);

const failedPreviews = previewStatuses.filter((status) => !status.success).map((status) => status.path);
}

// Update project excel file with status (sample)
logger.info('Updating project excel file with status');
const curreDateTime = new Date();
Expand Down
166 changes: 166 additions & 0 deletions actions/helixUtils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/* ************************************************************************
* ADOBE CONFIDENTIAL
* ___________________
*
* Copyright 2024 Adobe
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe and its suppliers, if any. The intellectual
* and technical concepts contained herein are proprietary to Adobe
* and its suppliers and are protected by all applicable intellectual
* property laws, including trade secret and copyright laws.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe.
************************************************************************* */

const fetch = require('node-fetch');
const appConfig = require('./appConfig');
const { getAioLogger, delay } = require('./utils');

const MAX_RETRIES = 5;
const RETRY_DELAY = 5;
const JOB_STATUS_CODES = [200, 304];
const AUTH_ERRORS = [401, 403];
const PREVIEW = 'preview';
const PUBLISH = 'publish';
const LIVE = 'live';

const logger = getAioLogger();

class HelixUtils {
getOperations() {
return { PREVIEW, LIVE };
}

getRepo() {
const urlInfo = appConfig.getUrlInfo();
return urlInfo.getRepo();
}

getAdminApiKey() {
const repo = this.getRepo();
const { helixAdminApiKeys = {} } = appConfig.getConfig();
return helixAdminApiKeys[repo];
}

/**
* Checks if the preview is enabled for the main or graybox site
* @returns true if preview is enabled
*/
canBulkPreview() {
const repo = this.getRepo();
const { enablePreview } = appConfig.getConfig();
const repoRegexArr = enablePreview.map((ps) => new RegExp(`^${ps}$`));
return true && repoRegexArr.find((rx) => rx.test(repo));
}

/**
* Trigger a preview of the files using the franklin bulk api. Franklin bulk api returns a job id/name which is used to
* check back the completion of the preview.
* @param {*} paths Paths of the files that needs to be previewed.
* @param {*} operation Preivew
* @param {*} grayboxExperienceName Graybox Experience Name
* @param {*} retryAttempt Iteration number of the retry attempt (Default = 1)
* @returns List of path with preview/pubish status e.g. [{path:'/draft/file1', success: true}..]
*/
async bulkPreview(paths, operation, grayboxExperienceName, retryAttempt = 1) {
let prevPubStatuses = paths.filter((p) => p).map((path) => ({ success: false, path, resourcePath: '' }));
if (!prevPubStatuses.length) {
return prevPubStatuses;
}
try {
const repo = this.getRepo();
const urlInfo = appConfig.getUrlInfo();
let experienceName = grayboxExperienceName || '';
experienceName = experienceName ? `${experienceName}/` : '';

const bulkUrl = `https://admin.hlx.page/${operation}/${urlInfo.getOwner()}/${repo}/${urlInfo.getBranch()}/${experienceName}*`;
const options = {
method: 'POST',
body: JSON.stringify({ forceUpdate: true, paths }),
headers: new fetch.Headers([['Accept', 'application/json'], ['Content-Type', 'application/json']])
};

const helixAdminApiKey = this.getAdminApiKey();
if (helixAdminApiKey) {
options.headers.append('Authorization', `token ${helixAdminApiKey}`);
}

const response = await fetch(bulkUrl, options);
logger.info(`${operation} call response ${response.status} for ${bulkUrl}`);
if (!response.ok && !AUTH_ERRORS.includes(response.status) && retryAttempt <= MAX_RETRIES) {
await delay(RETRY_DELAY * 1000);
prevPubStatuses = await this.bulkPreview(paths, operation, grayboxExperienceName, retryAttempt + 1);
} else if (response.ok) {
// Get job details
const jobResp = await response.json();
const jobName = jobResp.job?.name;
if (jobName) {
logger.info(`check again jobName : ${jobName} operation : ${operation} repo : ${repo}`);
const jobStatus = await this.bulkJobStatus(jobName, operation, repo);
logger.info(`jobStatus : ${JSON.stringify(jobStatus)}`);
prevPubStatuses.forEach((e) => {
logger.info(`Job details : ${jobName} / ${jobResp.messageId} / ${jobResp.job?.state}`);
if (jobStatus[e.path]?.success) {
e.success = true;
e.resourcePath = jobStatus[e.path]?.resourcePath;
}
});
}
}
} catch (error) {
logger.info(`Error in bulk ${operation} status: ${error.message}`);
prevPubStatuses.forEach((e) => {
e.success = false;
});
}
return prevPubStatuses;
}

/**
* Checks the preview/publish job status and returns the file statuses
* @param {*} jobName Bulk job to be checked
* @param {*} operation Job Type (preview/publish)
* @param {*} repo Repo for which the job was triggered
* @param {*} bulkPreviewStatus Accumulated status of the files (default is empty)
* @param {*} retryAttempt Iteration number of the retry attempt (Default = 1)
* @returns List of path with preview/pubish status e.g. ['/draft/file1': {success: true}..]
*/
async bulkJobStatus(jobName, operation, repo, bulkPreviewStatus = {}, retryAttempt = 1) {
logger.info(`Checking job status of ${jobName} for ${operation}`);
try {
const { helixAdminApiKeys } = appConfig.getConfig();
const options = {};
if (helixAdminApiKeys && helixAdminApiKeys[repo]) {
options.headers = new fetch.Headers();
options.headers.append('Authorization', `token ${helixAdminApiKeys[repo]}`);
}
const bulkOperation = operation === LIVE ? PUBLISH : operation;
const urlInfo = appConfig.getUrlInfo();
const statusUrl = `https://admin.hlx.page/job/${urlInfo.getOwner()}/${repo}/${urlInfo.getBranch()}/${bulkOperation}/${jobName}/details`;
const response = await fetch(statusUrl, options);
if (!response.ok && retryAttempt <= appConfig.getConfig().maxBulkPreviewChecks) {
await delay(appConfig.getConfig().bulkPreviewCheckInterval * 1000);
await this.bulkJobStatus(jobName, operation, repo, bulkPreviewStatus, retryAttempt + 1);
} else if (response.ok) {
const jobStatusJson = await response.json();
logger.info(`${operation} progress ${JSON.stringify(jobStatusJson.progress)}`);
jobStatusJson.data?.resources?.forEach((rs) => {
bulkPreviewStatus[rs.path] = { success: JOB_STATUS_CODES.includes(rs.status), resourcePath: rs?.resourcePath };
});
if (jobStatusJson.state !== 'stopped' && !jobStatusJson.cancelled &&
retryAttempt <= appConfig.getConfig().maxBulkPreviewChecks) {
await delay(appConfig.getConfig().bulkPreviewCheckInterval * 1000);
await this.bulkJobStatus(jobName, operation, repo, bulkPreviewStatus, retryAttempt + 1);
}
}
} catch (error) {
logger.info(`Error in checking status: ${error.message}`);
}
return bulkPreviewStatus;
}
}

module.exports = new HelixUtils();
41 changes: 40 additions & 1 deletion actions/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,48 @@ function isFilePatternMatched(filePath, patterns) {
return isFilePathWithWildcard(filePath, patterns);
}

function logMemUsage() {
const logger = getAioLogger();
const memStr = JSON.stringify(process.memoryUsage());
logger.info(`Memory Usage : ${memStr}`);
}

async function delay(milliseconds = 100) {
// eslint-disable-next-line no-promise-executor-return
await new Promise((resolve) => setTimeout(resolve, milliseconds));
}

function handleExtension(path) {
const pidx = path.lastIndexOf('/');
const fld = path.substring(0, pidx + 1);
let fn = path.substring(pidx + 1);

if (fn.endsWith('.xlsx')) {
fn = fn.replace('.xlsx', '.json');
}
if (fn.toLowerCase() === 'index.docx') {
fn = '';
}
if (fn.endsWith('.docx')) {
fn = fn.substring(0, fn.lastIndexOf('.'));
}

fn = fn
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^a-z0-9.]+/g, '-')
.replace(/^-|-$/g, '');

return `${fld}${fn}`;
}

module.exports = {
getAioLogger,
strToArray,
isFilePatternMatched,
toUTCStr
toUTCStr,
logMemUsage,
delay,
handleExtension
};
6 changes: 5 additions & 1 deletion app.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@ application:
certPassword: $CERT_PASSWORD
certKey: $CERT_KEY
certThumbprint: $CERT_THUMB_PRINT
enablePreview: $ENABLE_PREVIEW
groupCheckUrl: $GROUP_CHECK_URL
grayboxUserGroups: $GRAYBOX_USER_GROUPS
grayboxUserGroups: $GRAYBOX_USER_GROUPS
helixAdminApiKeys: $HELIX_ADMIN_API_KEYS
bulkPreviewCheckInterval: $BULK_PREVIEW_CHECK_INTERVAL
maxBulkPreviewChecks: $MAX_BULK_PREVIEW_CHECKS
actions:
promote:
function: actions/graybox/promote.js
Expand Down

0 comments on commit 8dbb839

Please sign in to comment.