diff --git a/index.js b/index.js index 51cf825..a0b19f4 100644 --- a/index.js +++ b/index.js @@ -1,9 +1,10 @@ 'use strict'; -var Command = require('./lib/Command.js'), - Queue = require('./lib/Queue.js'), - request = require('./lib/Request.js'), - libxml = require('libxmljs-dom'), +var Command = require('./lib/Command.js'), + Queue = require('./lib/Queue.js'), + request = require('./lib/Request.js'), + libxml = require('libxmljs-dom'), + RateLimiter = require('limiter').RateLimiter, instanceId = 0, memoryUsage = 0, cachedSelectors = {}, @@ -58,9 +59,10 @@ function Osmosis(url, params) { return Osmosis.get(url, params); } - this.queue = new Queue(this); - this.command = new Command(this); - this.id = ++instanceId; + this.queue = new Queue(this); + this.command = new Command(this); + this.id = ++instanceId; + this.throttle = new RateLimiter(999, 1, true); } @@ -147,7 +149,7 @@ Osmosis.prototype.config = function (option, value) { /** * Run (or re-run) an Osmosis instance. - *g + * * If you frequently use the same Osmosis instance * (such as in an Express server), it's much more efficient to * initialize the instance once and repeatedly use `run` as needed. @@ -184,6 +186,7 @@ Osmosis.prototype.request = function (url, opts, callback, tries) { opts.user_agent = opts.user_agent(); } + this.throttle.removeTokens(1, function(err, remainingRequests) { request(url.method, url, url.params, @@ -229,6 +232,7 @@ Osmosis.prototype.request = function (url, opts, callback, tries) { href + ' -> ' + new_url); } }); + }); }; /** @@ -325,6 +329,8 @@ Osmosis.prototype.resources = function () { 'requests: ' + this.requests + ' (' + this.queue.requests + ' queued), ' + + 'tokens: ' + parseInt(this.throttle.getTokensRemaining()) + ', ' + + 'RAM: ' + toMB(mem.rss) + ' (' + memDiff + '), ' + 'libxml: ' + ((libxml_mem / mem.rss) * 100).toFixed(1) + diff --git a/lib/commands/throttle.js b/lib/commands/throttle.js new file mode 100644 index 0000000..efeddce --- /dev/null +++ b/lib/commands/throttle.js @@ -0,0 +1,22 @@ +/** + * Set a throttle. Short for `.config({ throttle: ... })` + * + * @function throttle + * @memberof Command + * @param {Number} tokensPerInterval Maximum number of tokens that can be + * removed at any given moment and over the course of one interval. + * @param {String|Number} interval The interval length in milliseconds, or as + * one of the following strings: 'second', 'minute', 'hour', day'. + * @see Osmosis.config + */ + +var RateLimiter = require('limiter').RateLimiter; + +module.exports = function (tokensPerInterval, interval) { + this.instance.throttle = new RateLimiter( + tokensPerInterval || 1000, + interval || 1 + ); + + return this; +}; diff --git a/package.json b/package.json index 5105313..29fdd96 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "osmosis", - "version": "1.1.8", + "version": "1.1.6", "description": "Web scraper for NodeJS", "keywords": [ "web", @@ -21,6 +21,7 @@ }, "dependencies": { "libxmljs-dom": "~0.0.11", + "limiter": "^1.1.0", "needle": "^1.6.0" }, "devDependencies": {