Skip to content

Commit

Permalink
fix(msar)!: download is back to status quo
Browse files Browse the repository at this point in the history
- AuthenticatedFetch now extends PuppteerSession.Authenticated, and each download is a baseFork()
- Reorganized parameters to clean up redundancies and pass CLI arguments more consistently throughout
- Updated AuthenticatedFetch, HTTPFetch debugging output for greater consistency
- Followed `defaults` model in Download.args modules, as in other *.args modules

BREAKING: —haltOnError replaced by standard —ignoreErrors (or —no-ignoreErrors)
  • Loading branch information
battis committed Jan 3, 2025
1 parent 80a09b7 commit 3c782c4
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 121 deletions.
47 changes: 25 additions & 22 deletions packages/msar/src/bin/commands/download.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ import * as Snapshot from '../../workflows/Snapshot.js';
} = cli.init({
args: {
requirePositionals: 1,
options: Download.args.options,
flags: Download.args.flags,
...Download.args,
man: [
{
text: 'Download the supporting files for an existing snapshot JSON file.. This command expects either 1 or 2 arguments: at least a path to an existing snapshot file, and optionally also the desired path to the output folder of supporting files.'
Expand All @@ -23,32 +22,26 @@ import * as Snapshot from '../../workflows/Snapshot.js';
}
});

const {
downloadOptions,
puppeteerOptions,
credentials,
outputOptions: { pretty, outputPath: _outputPath },
quit
} = Download.args.parse(values);
const { outputOptions, ...options } = Download.args.parse(values);
const { quit } = options;
const { pretty } = outputOptions;
let { outputPath } = outputOptions;

const spinner = cli.spinner();
spinner.start('Reading snaphot file');

const snapshotPath = path.resolve(process.cwd(), snapshotPathArg!);

let outputPath: string;
if (!_outputPath) {
if (!outputPath) {
outputPath = path.join(
path.dirname(snapshotPath!),
path.basename(snapshotPath!, '.json')
);
} else {
if (fs.existsSync(_outputPath)) {
if (fs.existsSync(outputPath)) {
outputPath = await common.output.avoidOverwrite(
path.join(_outputPath, path.basename(snapshotPath!, '.json'))
path.join(outputPath, path.basename(snapshotPath!, '.json'))
);
} else {
outputPath = _outputPath;
}
}

Expand All @@ -63,10 +56,22 @@ import * as Snapshot from '../../workflows/Snapshot.js';
`Read ${snapshots.length} snapshots from ${cli.colors.url(snapshotPath)}`
);

const host = snapshots
.map((snapshot) => snapshot.Metadata.Host)
.reduce((host: string | undefined, other: string) => {
if (!host) {
return other;
} else if (host !== other) {
throw new Error('Multiple hosts present in snapshot file.');
}
}, undefined);
if (!host) {
throw new Error('No host present in snapshot file.');
}
const spider = new Download.Spider({
outputPath,
credentials,
host: snapshots[0].Metadata.Host
host,
outputOptions: { ...outputOptions, outputPath },
...options
});
const indices: (string | undefined)[] = [];

Expand All @@ -78,10 +83,8 @@ import * as Snapshot from '../../workflows/Snapshot.js';
);
indices.push(
await spider.download(snapshot, {
...downloadOptions,
outputPath,
...puppeteerOptions,
pretty
...options,
outputOptions: { ...outputOptions, outputPath }
})
);
bar.increment();
Expand Down
33 changes: 22 additions & 11 deletions packages/msar/src/workflows/Download/Downloader.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,33 @@
import * as common from '../../common.js';
import * as Cache from './Cache.js';
import {
AuthenticatedFetch,
Options as AuthOptions
} from './Downloader/AuthenticatedFetch.js';
import { HTTPFetch, Options as HTTPOptions } from './Downloader/HTTPFetch.js';
import * as AuthenticatedFetch from './Downloader/AuthenticatedFetch.js';
import * as HTTPFetch from './Downloader/HTTPFetch.js';
import { Strategy } from './Downloader/Strategy.js';

export type Options = AuthOptions & HTTPOptions;
export type Options = {
host: string;
} & common.output.args.Parsed &
common.PuppeteerSession.args.Parsed &
common.workflow.args.Parsed;

// TODO Downloader needs to honor --concurrentThreads
export class Downloader implements Strategy {
private auth: AuthenticatedFetch;
private http: HTTPFetch;
private auth: AuthenticatedFetch.Downloader;
private http: HTTPFetch.Downloader;
private host: string;

public constructor({ outputPath, host, ...options }: Options) {
public constructor({ host, outputOptions, ...options }: Options) {
const { outputPath } = outputOptions;
if (!outputPath) {
throw new common.output.OutputError('Downloader requires outputPath');
}
this.host = host;
this.auth = new AuthenticatedFetch({ outputPath, host, ...options });
this.http = new HTTPFetch({ outputPath });
this.auth = new AuthenticatedFetch.Downloader({
host,
outputOptions,
...options
});
this.http = new HTTPFetch.Downloader({ outputPath, ...options });
}

public async download(original: string, filename?: string) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import cli from '@battis/qui-cli';
import { Mutex } from 'async-mutex';
import { PuppeteerSession } from 'datadirect-puppeteer';
import { EventEmitter } from 'node:events';
import fs from 'node:fs';
Expand All @@ -21,48 +20,38 @@ type FilepathVariantsOptions = {
};

export type Options = {
outputPath: string;
host: string;
} & PuppeteerSession.Options;
host: URL | string;
} & common.output.args.Parsed &
PuppeteerSession.Options;

const TEMP = path.join('/tmp/msar/download', crypto.randomUUID());
const DOWNLOADS = path.join(os.homedir(), 'Downloads');

export class AuthenticatedFetch
export class Downloader
extends PuppeteerSession.Authenticated
implements Strategy
{
private outputPath: string;
private preparing = new Mutex();
private emitter = new EventEmitter();

public constructor({ outputPath, host, ...options }: Options) {
super(host, options);
public constructor({
host,
outputOptions: { outputPath },
...options
}: Options) {
super(`https://${host}`, options);
if (!outputPath) {
throw new common.output.OutputError(
'AuthenticatedFetch requires outputPath'
);
}
this.outputPath = outputPath;
}

public async download(url: string, filename?: string) {
/*
* FIXME refactoring broke `msar download`
* ```sh
* - Connecting to /path/to/myschoolapp-reporting/var/download.log
* ✔ Logging level all to /path/to/myschoolapp-reporting/var/download.log
* - Reading snaphot file
* ✔ Read 1 snapshots from /path/to/myschoolapp-reporting/var/2024 - 2025 - Horace Bixby - Sandbox (Y) - 97551579.json
* Group 97551579: Downloading supporting files
* Task Terminated with exit code 1
* node:internal/url:806
* const href = bindingUrl.parse(input, base, raiseException);
* ^
* TypeError: Invalid URL
* at new URL (node:internal/url:806:29)
* at AuthenticatedFetch.openURL (file:///path/to/myschoolapp-reporting/packages/datadirect-puppeteer/dist/PuppeteerSession/Base.js:45:25) {
* code: 'ERR_INVALID_URL',
* input: 'example.myschoolapp.com'
* }
* ```
*/
const session = await this.fork('about:blank');
cli.log.debug(`AuthenticatedFetch: ${url}`);
await this.ready();
const session = await this.baseFork('about:blank');
const client = await session.page.createCDPSession();

await client.send('Fetch.enable', {
Expand Down Expand Up @@ -125,7 +114,7 @@ export class AuthenticatedFetch
if (fs.existsSync(possiblePaths[key])) {
fs.renameSync(possiblePaths[key], destFilepath);
cli.log.debug(
`Moved ${key} file to ${cli.colors.url(localPath)}`
`Moved ${key} file ${cli.colors.url(possiblePaths[key])} to ${cli.colors.url(localPath)}`
);
this.emitter.emit(url, { localPath, filename });
return;
Expand Down
24 changes: 20 additions & 4 deletions packages/msar/src/workflows/Download/Downloader/HTTPFetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,34 @@ import {
} from '../filenameFromDisposition.js';
import { Strategy } from './Strategy.js';

export type Options = { outputPath: string };
export type Options = { outputPath: string } & common.workflow.args.Parsed;

export class HTTPFetch implements Strategy {
export class Downloader implements Strategy {
private outputPath: string;
private logRequests: boolean;

public constructor({ outputPath }: Options) {
public constructor({ outputPath, logRequests }: Options) {
this.outputPath = outputPath;
this.logRequests = logRequests;
}

public async download(url: string, filename?: string) {
cli.log.debug(`Directly fetching ${cli.colors.url(url)}`);
cli.log.debug(`HTTPFetch: ${cli.colors.url(url)}`);
const response = await fetch(url);
if (this.logRequests) {
cli.log.debug({
url,
response: {
url: response.url,
redirected: response.redirected,
type: response.type,
ok: response.ok,
status: response.status,
statusText: response.statusText,
headers: response.headers
}
});
}
if (response.ok && response.body) {
return {
localPath: await common.output.writeFetchedFile({
Expand Down
48 changes: 20 additions & 28 deletions packages/msar/src/workflows/Download/Spider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,40 @@ import path from 'node:path';
import * as common from '../../common.js';
import * as Snapshot from '../Snapshot.js';
import * as Cache from './Cache.js';
import { Downloader, Options as DownloaderOptions } from './Downloader.js';
import * as Downloader from './Downloader.js';

export type BaseOptions = {
export type Options = {
include?: RegExp[];
exclude?: RegExp[];
haltOnError?: boolean;
};
} & common.args.Parsed;

type TraverseOptions = BaseOptions & {
type TraverseOptions = Options & {
host: string;
pathToComponent: string;
};

type DownloadOptions = BaseOptions & {
pretty?: boolean;
outputPath: string;
};

export type Options = DownloaderOptions;

export class Spider {
private downloader: Downloader;
private downloader: Downloader.Downloader;

public constructor(options: Options) {
this.downloader = new Downloader(options);
public constructor(options: Downloader.Options) {
this.downloader = new Downloader.Downloader(options);
}

public async download(
snapshot: Snapshot.Data,
{ pretty = false, outputPath, ...options }: DownloadOptions
{ outputOptions, ...options }: Options
) {
const { outputPath, pretty } = outputOptions;
if (!outputPath) {
throw new common.output.OutputError('Spider requires outputPath');
}
if (snapshot) {
cli.log.debug(
`Group ${snapshot.SectionInfo?.Id || cli.colors.error('unknown')}: Downloading supporting files`
);
await this.traverse(snapshot, {
host: snapshot.Metadata.Host,
outputOptions,
...options,
pathToComponent: path.basename(outputPath)
});
Expand All @@ -63,17 +60,15 @@ export class Spider {

private async traverse(
snapshotComponent: object,
{ host, pathToComponent, include, exclude, haltOnError }: TraverseOptions
{ pathToComponent, ...options }: TraverseOptions
) {
const { include, exclude, ignoreErrors } = options;
if (Array.isArray(snapshotComponent)) {
await Promise.allSettled(
snapshotComponent.map(async (elt, i) => {
await this.traverse(elt, {
host,
pathToComponent: `${pathToComponent}[${i}]`,
include,
exclude,
haltOnError
...options
});
})
);
Expand All @@ -87,11 +82,8 @@ export class Spider {
return;
} else if (typeof snapshotComponent[key] === 'object') {
await this.traverse(snapshotComponent[key], {
host,
pathToComponent: `${pathToComponent}.${key}`,
include,
exclude,
haltOnError
...options
});
/*
* FIXME FileName files in topics are at /ftpimages/:SchoolId/topics/:FileName
Expand Down Expand Up @@ -130,9 +122,7 @@ export class Spider {
`${pathToComponent}[${key}]: ${item.localPath || item.error}`
);
} catch (error) {
if (haltOnError) {
throw error;
} else {
if (ignoreErrors) {
const message = `Download ${cli.colors.value(key)} ${cli.colors.url(
snapshotComponent[key]
)} failed: ${error}`;
Expand All @@ -142,6 +132,8 @@ export class Spider {
accessed: new Date(),
error: message
};
} else {
throw error;
}
}
}
Expand Down
9 changes: 7 additions & 2 deletions packages/msar/src/workflows/Download/args.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
export * from './args/flags.js';
export * from './args/options.js';
import { defaults as flagDefaults } from './args/flags.js';
import { defaults as optionsDefaults } from './args/options.js';

export { flags } from './args/flags.js';
export { options } from './args/options.js';
export * from './args/parse.js';

export const defaults = { ...flagDefaults, ...optionsDefaults };
10 changes: 3 additions & 7 deletions packages/msar/src/workflows/Download/args/flags.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
import cli from '@battis/qui-cli';
import * as common from '../../../common.js';

export const flags = {
...common.args.flags,
haltOnError: {
description: `Halt on an error downloading a supporting file (default: ${cli.colors.value('false')}`
}
};
export const defaults = {};

export const flags = common.args.flags;
Loading

0 comments on commit 3c782c4

Please sign in to comment.