diff --git a/lib/amperize.js b/lib/amperize.js index a77c85b..37dcb6a 100644 --- a/lib/amperize.js +++ b/lib/amperize.js @@ -7,7 +7,8 @@ var EventEmitter = require('events').EventEmitter, uuid = require('uuid'), async = require('async'), url = require('url'), - got = require('got'), + request = require('request-promise'), + probeImageSize = require('probe-image-size'), _ = require('lodash'), sizeOf = require('image-size'), validator = require('validator'), @@ -105,6 +106,16 @@ Amperize.prototype.amperizer = function amperizer(id, error, dom) { */ Amperize.prototype.traverse = function traverse(data, html, done) { var self = this; + var imageSizeCache = {}; + var timeout = 3000; + var requestOptions = { + // We need the user-agent, otherwise some https request may fail (e. g. cloudfare) + headers: { + 'User-Agent': 'Mozilla/5.0 Safari/537.36' + }, + timeout: timeout, + encoding: null + }; async.reduce(data, html, function reduce(html, element, step) { var children; @@ -158,62 +169,86 @@ Amperize.prototype.traverse = function traverse(data, html, done) { return enter(); } + // probe will fetch the minimal amount of data needed to determine + // the image dimensions so it's more performant than a full fetch + function _probeImageSize(url) { + return probeImageSize( + url, + requestOptions + ).then(function (result) { + imageSizeCache[url] = result; + return result; + }); + } + + // fetch the full image before reading dimensions using image-size, + // it's slower but has better format support + function _fetchImageSize(url) { + return request( + url, + Object.assign({}, requestOptions, { + encoding: null + }) + ).then(function (response) { + var result = sizeOf(response); + imageSizeCache[url] = result; + return result; + }); + } + + // select appropriate method to get image size + function _getImageSize(url) { + var [, extension] = url.match(/(?:\.)([a-zA-Z]{3,4})$/) || []; + + // use cached image size if we've already seen this url + if (imageSizeCache[url]) { + return Promise.resolve(imageSizeCache[url]); + } + + // // fetch full image for formats we can't probe + if (['cur', 'icns', 'ico', 'dds'].includes(extension)) { + return _fetchImageSize(url); + } + + // // probe partial image everything else + return _probeImageSize(url); + } + /** - * Get the image sizes (width and heigth plus type of image) + * Get the image sizes (width and height plus type of image) * - * https://github.com/image-size/image-size + * https://github.com/nodeca/probe-image-size * * @param {Object} element * @return {Object} element incl. width and height */ function getImageSize(element) { - var imageObj = url.parse(element.attribs.src), - requestOptions, - timeout = 3000; + var imageObj = url.parse(element.attribs.src); if (!validator.isURL(imageObj.href)) { // revert this element, do not show element.name = 'img'; - return enter(); } - // We need the user-agent, otherwise some https request may fail (e. g. cloudfare) - requestOptions = { - headers: { - 'User-Agent': 'Mozilla/5.0 Safari/537.36' - }, - timeout: timeout, - retry: 0, - encoding: null - }; - - return got( - imageObj.href, - requestOptions - ).then(function (response) { - try { - // Using the Buffer rather than an URL requires to use sizeOf synchronously. - // See https://github.com/image-size/image-size#asynchronous - var dimensions = sizeOf(response.body); - - // CASE: `.ico` files might have multiple images and therefore multiple sizes. - // We return the largest size found (image-size default is the first size found) - if (dimensions.images) { - dimensions.width = _.maxBy(dimensions.images, function (w) {return w.width;}).width; - dimensions.height = _.maxBy(dimensions.images, function (h) {return h.height;}).height; - } - - element.attribs.width = dimensions.width; - element.attribs.height = dimensions.height; - - return getLayoutAttribute(element); - } catch (err) { - // revert this element, do not show + return _getImageSize(imageObj.href).then(function (result) { + if ((!result.width || !result.height) && !result.images) { element.name = 'img'; return enter(); } - }).catch(function () { + + // CASE: `.ico` files might have multiple images and therefore multiple sizes. + // We return the largest size found (image-size default is the first size found) + if (result.images) { + result.width = _.maxBy(result.images, function (w) {return w.width;}).width; + result.height = _.maxBy(result.images, function (h) {return h.height;}).height; + } + + element.attribs.width = result.width; + element.attribs.height = result.height; + + return getLayoutAttribute(element); + }).catch(function (err) { // revert this element, do not show element.name = 'img'; return enter(); diff --git a/package.json b/package.json index 642a4e6..5806418 100644 --- a/package.json +++ b/package.json @@ -30,10 +30,12 @@ "dependencies": { "async": "^2.1.4", "emits": "^3.0.0", - "got": "^9.6.0", "htmlparser2": "^3.9.2", - "image-size": "0.6.1", + "image-size": "^0.7.4", "lodash": "^4.17.4", + "probe-image-size": "^4.0.0", + "request": "^2.83.0", + "request-promise": "^4.2.4", "uuid": "^3.0.0", "validator": "^9.1.1" }, @@ -42,11 +44,11 @@ "cz-conventional-changelog": "2.1.0", "istanbul": "^0.4.5", "mocha": "^4.0.1", + "nock": "^9.0.2", + "rewire": "^2.5.2", "semantic-release": "9.1.0", "sinon": "1.17.7", - "sinon-chai": "^2.8.0", - "nock": "^9.0.2", - "rewire": "^2.5.2" + "sinon-chai": "^2.8.0" }, "config": { "commitizen": { diff --git a/test/amperize.test.js b/test/amperize.test.js index b10a6fe..7421664 100644 --- a/test/amperize.test.js +++ b/test/amperize.test.js @@ -4,6 +4,7 @@ var chai = require('chai'), sinonChai = require('sinon-chai'), nock = require('nock'), rewire = require('rewire'), + path = require('path'), Amperize = rewire('../lib/amperize'), amperize; @@ -69,12 +70,17 @@ describe('Amperize', function () { }); describe('#parse', function () { - var sizeOfMock, - sizeOfStub; + var resetProbeImageSize, + imageSizeMock, + probeImageSizeStub; beforeEach(function () { - // stubbing the `image-size` lib, so we don't to a request everytime - sizeOfStub = sinon.stub(); + // reset rewire so tests are independent + if (resetProbeImageSize) { + resetProbeImageSize(); + } + // stubbing the `probe-probe-image-size` lib, so we don't make a request everytime + probeImageSizeStub = sinon.stub(); }); afterEach(function () { @@ -90,14 +96,14 @@ describe('Amperize', function () { }); it('transforms small into with full image dimensions and fixed layout', function (done) { - sizeOfMock = nock('http://static.wixstatic.com') + imageSizeMock = nock('http://static.wixstatic.com') .get('/media/355241_d31358572a2542c5a44738ddcb59e7ea.jpg_256') .reply(200, { body: '' }); - sizeOfStub.returns({width: 50, height: 50, type: 'jpg'}); - Amperize.__set__('sizeOf', sizeOfStub); + probeImageSizeStub.returns(Promise.resolve({width: 50, height: 50, type: 'jpg'})); + resetProbeImageSize = Amperize.__set__('probeImageSize', probeImageSizeStub); amperize.parse('', function (error, result) { expect(result).to.exist; @@ -112,14 +118,14 @@ describe('Amperize', function () { }); it('transforms big into with full image dimensions and responsive layout', function (done) { - sizeOfMock = nock('http://static.wixstatic.com') + imageSizeMock = nock('http://static.wixstatic.com') .get('/media/355241_d31358572a2542c5a44738ddcb59e7ea.jpg_256') .reply(200, { body: '' }); - sizeOfStub.returns({width: 350, height: 200, type: 'jpg'}); - Amperize.__set__('sizeOf', sizeOfStub); + probeImageSizeStub.returns(Promise.resolve({width: 350, height: 200, type: 'jpg'})); + resetProbeImageSize = Amperize.__set__('probeImageSize', probeImageSizeStub); amperize.parse('', function (error, result) { expect(result).to.exist; @@ -134,14 +140,14 @@ describe('Amperize', function () { }); it('transforms into when width and height is set and overwrites it', function (done) { - sizeOfMock = nock('http://somestockwebsite.com') + imageSizeMock = nock('http://somestockwebsite.com') .get('/image.jpg') .reply(200, { body: '' }); - sizeOfStub.returns({width: 350, height: 200, type: 'jpg'}); - Amperize.__set__('sizeOf', sizeOfStub); + probeImageSizeStub.returns(Promise.resolve({width: 350, height: 200, type: 'jpg'})); + resetProbeImageSize = Amperize.__set__('probeImageSize', probeImageSizeStub); amperize.parse('', function (error, result) { expect(result).to.exist; @@ -156,14 +162,14 @@ describe('Amperize', function () { }); it('transforms into does not overwrite layout attribute', function (done) { - sizeOfMock = nock('http://somestockwebsite.com') + imageSizeMock = nock('http://somestockwebsite.com') .get('/image.jpg') .reply(200, { body: '' }); - sizeOfStub.returns({width: 350, height: 200, type: 'jpg'}); - Amperize.__set__('sizeOf', sizeOfStub); + probeImageSizeStub.returns(Promise.resolve({width: 350, height: 200, type: 'jpg'})); + resetProbeImageSize = Amperize.__set__('probeImageSize', probeImageSizeStub); amperize.parse('', function (error, result) { expect(result).to.exist; @@ -180,15 +186,15 @@ describe('Amperize', function () { it('transforms into when no file extension is given', function (done) { // This test is mocked, but works with this specific example. // You can comment out the mocks and the test should still pass. - sizeOfMock = nock('https://www.zomato.com') + imageSizeMock = nock('https://www.zomato.com') .matchHeader('User-Agent', /Mozilla\/.*Safari\/.*/) .get('/logo/18163505/minilogo') .reply(200, { body: '' }); - sizeOfStub.returns({width: 104, height: 15, type: 'png'}); - Amperize.__set__('sizeOf', sizeOfStub); + probeImageSizeStub.returns(Promise.resolve({width: 104, height: 15, type: 'png'})); + resetProbeImageSize = Amperize.__set__('probeImageSize', probeImageSizeStub); amperize.parse('', function (error, result) { expect(result).to.exist; @@ -202,14 +208,29 @@ describe('Amperize', function () { }); }); + it('falls back to image-size for unprobable images', function (done) { + imageSizeMock = nock('https://somewebsite.com') + .get('/favicon.ico') + .replyWithFile(200, path.join(__dirname, 'fixtures/multi-size.ico')); + + amperize.parse('', function (error, result) { + expect(result).to.exist; + expect(result).to.contain(''); + done(); + }); + }); + it('returns largest image value for .ico files', function (done) { - sizeOfMock = nock('https://somewebsite.com') + imageSizeMock = nock('https://somewebsite.com') .get('/favicon.ico') - .reply(200, { - body: '' - }); + .replyWithFile(200, path.join(__dirname, 'fixtures/multi-size.ico')); - sizeOfStub.returns({ + probeImageSizeStub.returns(Promise.resolve({ width: 32, height: 32, type: 'ico', @@ -218,8 +239,8 @@ describe('Amperize', function () { {width: 32, height: 32}, {width: 16, height: 16} ] - }); - Amperize.__set__('sizeOf', sizeOfStub); + })); + resetProbeImageSize = Amperize.__set__('sizeOf', probeImageSizeStub); amperize.parse('', function (error, result) { expect(result).to.exist; @@ -233,15 +254,36 @@ describe('Amperize', function () { }); }); + it('uses cached size rather than extra requests for duplicated images in html', function (done) { + var GIF1x1 = Buffer.from('R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'base64'); + var secondImageSizeMock; + + imageSizeMock = nock('http://example.com') + .get('/image.jpg') + .reply(200, GIF1x1); + + secondImageSizeMock = nock('http://example.com') + .get('/image.jpg') + .reply(200, GIF1x1); + + amperize.parse('', function (error, result) { + expect(imageSizeMock.isDone()).to.equal(true); + expect(secondImageSizeMock.isDone()).to.equal(false); + expect(result).to.exist; + expect(result).to.match(/ with only height property into with full dimensions by overriding them', function (done) { - sizeOfMock = nock('https://media.giphy.com') + imageSizeMock = nock('https://media.giphy.com') .get('/media/l46CtzgjhTm29Cbjq/giphy.gif') .reply(200, { body: '' }); - sizeOfStub.returns({width: 800, height: 600, type: 'gif'}); - Amperize.__set__('sizeOf', sizeOfStub); + probeImageSizeStub.returns(Promise.resolve({width: 800, height: 600, type: 'gif'})); + resetProbeImageSize = Amperize.__set__('probeImageSize', probeImageSizeStub); amperize.parse('', function (error, result) { expect(result).to.exist; @@ -397,37 +439,33 @@ describe('Amperize', function () { }); it('can handle redirects', function (done) { - var secondSizeOfMock; + var secondImageSizeMock; + var GIF1x1 = Buffer.from('R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'base64'); - sizeOfMock = nock('http://noimagehere.com') + imageSizeMock = nock('http://noimagehere.com') .get('/files/f/feedough/x/11/1540353_20925115.jpg') .reply(301, { - body: '' + body: '' }, { location: 'http://someredirectedurl.com/files/f/feedough/x/11/1540353_20925115.jpg' }); - secondSizeOfMock = nock('http://someredirectedurl.com') + secondImageSizeMock = nock('http://someredirectedurl.com') .get('/files/f/feedough/x/11/1540353_20925115.jpg') - .reply(200, { - body: '' - }); - - sizeOfStub.returns({width: 100, height: 100, type: 'jpg'}); - Amperize.__set__('sizeOf', sizeOfStub); + .reply(200, GIF1x1); amperize.parse('', function (error, result) { - expect(sizeOfMock.isDone()).to.be.equal(true); - expect(secondSizeOfMock.isDone()).to.be.equal(true); + expect(imageSizeMock.isDone()).to.be.equal(true, 'imageSizeMock isn\'t done'); + expect(secondImageSizeMock.isDone()).to.be.equal(true, 'secondImageSizeMock isn\'t done'); expect(error).to.be.null; - expect(result).to.contain(''); + expect(result).to.contain(''); done(); }); }); it('can handle request errors', function (done) { - sizeOfMock = nock('http://example.com') + imageSizeMock = nock('http://example.com') .get('/images/IMG_xyz.jpg') .reply(404, {message: 'something awful happened', code: 'AWFUL_ERROR'}); @@ -438,14 +476,11 @@ describe('Amperize', function () { }); }); - it('can handle errors of image-size module', function (done) { - sizeOfMock = nock('http://example.com') + it('can handle errors of probe-image-size module', function (done) { + // will throw ProbeError: unrecognized file format + imageSizeMock = nock('http://example.com') .get('/images/IMG_xyz.jpg') - .reply(200, { - body: '' - }); - sizeOfStub.throws({error: 'image-size could not find dimensions'}); - Amperize.__set__('sizeOf', sizeOfStub); + .reply(200, 'not an image'); amperize.parse('', function (error, result) { expect(error).to.be.null; @@ -456,13 +491,12 @@ describe('Amperize', function () { it('can handle timeout errors', function (done) { this.timeout(3500); + var GIF1x1 = Buffer.from('R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'base64'); - sizeOfMock = nock('http://example.com') + imageSizeMock = nock('http://example.com') .get('/images/IMG_xyz.jpg') .delay(3500) - .reply(200, { - body: '' - }); + .reply(200, GIF1x1); amperize.parse('', function (error, result) { expect(error).to.be.null; diff --git a/test/fixtures/multi-size.ico b/test/fixtures/multi-size.ico new file mode 100644 index 0000000..10b20d6 Binary files /dev/null and b/test/fixtures/multi-size.ico differ