From 737a32417f296dc0f870fb528ad0fc7921b4ede9 Mon Sep 17 00:00:00 2001 From: Amjad Masad Date: Wed, 24 Jun 2015 17:40:29 -0700 Subject: [PATCH] [react-packager] Injectible file crawlers (2x crawl speedup) --- .../__tests__/DependencyGraph-test.js | 7 +- .../DependencyGraph/index.js | 35 +++--- .../src/DependencyResolver/crawlers/index.js | 36 ++++++ .../src/DependencyResolver/crawlers/node.js | 61 ++++++++++ .../DependencyResolver/crawlers/watchman.js | 70 ++++++++++++ .../src/DependencyResolver/fastfs.js | 84 +++++--------- .../FileWatcher/__tests__/FileWatcher-test.js | 4 +- react-packager/src/FileWatcher/index.js | 108 ++++++++++-------- 8 files changed, 285 insertions(+), 120 deletions(-) create mode 100644 react-packager/src/DependencyResolver/crawlers/index.js create mode 100644 react-packager/src/DependencyResolver/crawlers/node.js create mode 100644 react-packager/src/DependencyResolver/crawlers/watchman.js diff --git a/react-packager/src/DependencyResolver/DependencyGraph/__tests__/DependencyGraph-test.js b/react-packager/src/DependencyResolver/DependencyGraph/__tests__/DependencyGraph-test.js index 09861b52..673d9c58 100644 --- a/react-packager/src/DependencyResolver/DependencyGraph/__tests__/DependencyGraph-test.js +++ b/react-packager/src/DependencyResolver/DependencyGraph/__tests__/DependencyGraph-test.js @@ -13,6 +13,8 @@ jest .dontMock('crypto') .dontMock('absolute-path') .dontMock('../docblock') + .dontMock('../../crawlers') + .dontMock('../../crawlers/node') .dontMock('../../replacePatterns') .dontMock('../../../lib/getAssetDataFromName') .dontMock('../../fastfs') @@ -22,6 +24,8 @@ jest .dontMock('../../Package') .dontMock('../../ModuleCache'); +const Promise = require('promise'); + jest.mock('fs'); describe('DependencyGraph', function() { @@ -36,7 +40,8 @@ describe('DependencyGraph', function() { fileWatcher = { on: function() { return this; - } + }, + isWatchman: () => Promise.resolve(false) }; }); diff --git a/react-packager/src/DependencyResolver/DependencyGraph/index.js b/react-packager/src/DependencyResolver/DependencyGraph/index.js index 9390a10e..8145bfa0 100644 --- a/react-packager/src/DependencyResolver/DependencyGraph/index.js +++ b/react-packager/src/DependencyResolver/DependencyGraph/index.js @@ -8,17 +8,19 @@ */ 'use strict'; -const path = require('path'); +const Activity = require('../../Activity'); +const AssetModule_DEPRECATED = require('../AssetModule_DEPRECATED'); const Fastfs = require('../fastfs'); const ModuleCache = require('../ModuleCache'); -const AssetModule_DEPRECATED = require('../AssetModule_DEPRECATED'); -const declareOpts = require('../../lib/declareOpts'); -const isAbsolutePath = require('absolute-path'); -const debug = require('debug')('DependencyGraph'); -const getAssetDataFromName = require('../../lib/getAssetDataFromName'); -const util = require('util'); const Promise = require('promise'); const _ = require('underscore'); +const crawl = require('../crawlers'); +const debug = require('debug')('DependencyGraph'); +const declareOpts = require('../../lib/declareOpts'); +const getAssetDataFromName = require('../../lib/getAssetDataFromName'); +const isAbsolutePath = require('absolute-path'); +const path = require('path'); +const util = require('util'); const validateOpts = declareOpts({ roots: { @@ -68,13 +70,18 @@ class DependencyGraph { return this._loading; } - const modulePattern = new RegExp( - '\.(' + ['js', 'json'].concat(this._assetExts).join('|') + ')$' - ); + const crawlActivity = Activity.startEvent('fs crawl'); + const allRoots = this._opts.roots.concat(this._opts.assetRoots_DEPRECATED); + this._crawling = crawl(allRoots, { + ignore: this._opts.ignoreFilePath, + exts: ['js', 'json'].concat(this._opts.assetExts), + fileWatcher: this._opts.fileWatcher, + }); + this._crawling.then((files) => Activity.endEvent(crawlActivity)); this._fastfs = new Fastfs(this._opts.roots,this._opts.fileWatcher, { - pattern: modulePattern, ignore: this._opts.ignoreFilePath, + crawling: this._crawling, }); this._fastfs.on('change', this._processFileChange.bind(this)); @@ -454,14 +461,10 @@ class DependencyGraph { this._assetMap_DEPRECATED = Object.create(null); - const pattern = new RegExp( - '\.(' + this._opts.assetExts.join('|') + ')$' - ); - const fastfs = new Fastfs( this._opts.assetRoots_DEPRECATED, this._opts.fileWatcher, - { pattern, ignore: this._opts.ignoreFilePath } + { ignore: this._opts.ignoreFilePath, crawling: this._crawling } ); fastfs.on('change', this._processAssetChange_DEPRECATED.bind(this)); diff --git a/react-packager/src/DependencyResolver/crawlers/index.js b/react-packager/src/DependencyResolver/crawlers/index.js new file mode 100644 index 00000000..71290af4 --- /dev/null +++ b/react-packager/src/DependencyResolver/crawlers/index.js @@ -0,0 +1,36 @@ +'use strict'; + +const nodeCrawl = require('./node'); +//const watchmanCrawl = require('./watchman'); + +function crawl(roots, options) { + return nodeCrawl(roots, options); + + // Although, in theory, watchman should be much faster; + // there is currently a bottleneck somewhere in the + // encoding/decoding that is causing it to be slower + // than node crawling. However, this should be fixed soon. + // https://github.com/facebook/watchman/issues/113 + /* + const {fileWatcher} = options; + return fileWatcher.isWatchman().then(isWatchman => { + + console.log(isWatchman); + if (!isWatchman) { + return false; + } + + // Make sure we're dealing with a version of watchman + // that's using `watch-project` + // TODO(amasad): properly expose (and document) used sane internals. + return fileWatcher.getWatchers().then(([watcher]) => !!watcher.watchProjectInfo.root); + }).then(isWatchman => { + if (isWatchman) { + return watchmanCrawl(roots, options); + } + + return nodeCrawl(roots, options); + });*/ +} + +module.exports = crawl; diff --git a/react-packager/src/DependencyResolver/crawlers/node.js b/react-packager/src/DependencyResolver/crawlers/node.js new file mode 100644 index 00000000..72030905 --- /dev/null +++ b/react-packager/src/DependencyResolver/crawlers/node.js @@ -0,0 +1,61 @@ +'use strict'; + +const Promise = require('promise'); +const debug = require('debug')('DependencyGraph'); +const fs = require('fs'); +const path = require('path'); + +const readDir = Promise.denodeify(fs.readdir); +const stat = Promise.denodeify(fs.stat); + +function nodeRecReadDir(roots, {ignore, exts}) { + const queue = roots.slice(); + const retFiles = []; + const extPattern = new RegExp( + '\.(' + exts.join('|') + ')$' + ); + + function search() { + const currDir = queue.shift(); + if (!currDir) { + return Promise.resolve(); + } + + return readDir(currDir) + .then(files => files.map(f => path.join(currDir, f))) + .then(files => Promise.all( + files.map(f => stat(f).catch(handleBrokenLink)) + ).then(stats => [ + // Remove broken links. + files.filter((file, i) => !!stats[i]), + stats.filter(Boolean), + ])) + .then(([files, stats]) => { + files.forEach((filePath, i) => { + if (ignore(filePath)) { + return; + } + + if (stats[i].isDirectory()) { + queue.push(filePath); + return; + } + + if (filePath.match(extPattern)) { + retFiles.push(filePath); + } + }); + + return search(); + }); + } + + return search().then(() => retFiles); +} + +function handleBrokenLink(e) { + debug('WARNING: error stating, possibly broken symlink', e.message); + return Promise.resolve(); +} + +module.exports = nodeRecReadDir; diff --git a/react-packager/src/DependencyResolver/crawlers/watchman.js b/react-packager/src/DependencyResolver/crawlers/watchman.js new file mode 100644 index 00000000..d6479a51 --- /dev/null +++ b/react-packager/src/DependencyResolver/crawlers/watchman.js @@ -0,0 +1,70 @@ +'use strict'; + +const Promise = require('promise'); +const path = require('path'); + +function watchmanRecReadDir(roots, {ignore, fileWatcher, exts}) { + const files = []; + return Promise.all( + roots.map( + root => fileWatcher.getWatcherForRoot(root) + ) + ).then( + watchers => { + // All watchman roots for all watches we have. + const watchmanRoots = watchers.map( + watcher => watcher.watchProjectInfo.root + ); + + // Actual unique watchers (because we use watch-project we may end up with + // duplicate "real" watches, and that's by design). + // TODO(amasad): push this functionality into the `FileWatcher`. + const uniqueWatchers = watchers.filter( + (watcher, i) => watchmanRoots.indexOf(watcher.watchProjectInfo.root) === i + ); + + return Promise.all( + uniqueWatchers.map(watcher => { + const watchedRoot = watcher.watchProjectInfo.root; + + // Build up an expression to filter the output by the relevant roots. + const dirExpr = ['anyof']; + for (let i = 0; i < roots.length; i++) { + const root = roots[i]; + if (isDescendant(watchedRoot, root)) { + dirExpr.push(['dirname', path.relative(watchedRoot, root)]); + } + } + + const cmd = Promise.promisify(watcher.client.command.bind(watcher.client)); + return cmd(['query', watchedRoot, { + 'suffix': exts, + 'expression': ['allof', ['type', 'f'], 'exists', dirExpr], + 'fields': ['name'], + }]).then(resp => { + if ('warning' in resp) { + console.warn('watchman warning: ', resp.warning); + } + + resp.files.forEach(filePath => { + filePath = path.join( + watchedRoot, + filePath + ); + + if (!ignore(filePath)) { + files.push(filePath); + } + return false; + }); + }); + }) + ); + }).then(() => files); +} + +function isDescendant(root, child) { + return path.relative(root, child).indexOf('..') !== 0; +} + +module.exports = watchmanRecReadDir; diff --git a/react-packager/src/DependencyResolver/fastfs.js b/react-packager/src/DependencyResolver/fastfs.js index 0053b14e..67f7076e 100644 --- a/react-packager/src/DependencyResolver/fastfs.js +++ b/react-packager/src/DependencyResolver/fastfs.js @@ -4,28 +4,46 @@ const Promise = require('promise'); const {EventEmitter} = require('events'); const _ = require('underscore'); -const debug = require('debug')('DependencyGraph'); const fs = require('fs'); const path = require('path'); -const readDir = Promise.denodeify(fs.readdir); const readFile = Promise.denodeify(fs.readFile); const stat = Promise.denodeify(fs.stat); + const hasOwn = Object.prototype.hasOwnProperty; class Fastfs extends EventEmitter { - constructor(roots, fileWatcher, {ignore, pattern}) { + constructor(roots, fileWatcher, {ignore, crawling}) { super(); this._fileWatcher = fileWatcher; this._ignore = ignore; - this._pattern = pattern; this._roots = roots.map(root => new File(root, { isDir: true })); this._fastPaths = Object.create(null); + this._crawling = crawling; } build() { - const queue = this._roots.slice(); - return this._search(queue).then(() => { + const rootsPattern = new RegExp( + '^(' + this._roots.map(root => escapeRegExp(root.path)).join('|') + ')' + ); + + return this._crawling.then(files => { + files.forEach(filePath => { + if (filePath.match(rootsPattern)) { + const newFile = new File(filePath, { isDir: false }); + const parent = this._fastPaths[path.dirname(filePath)]; + if (parent) { + parent.addChild(newFile); + } else { + this._add(newFile); + for (let file = newFile; file; file = file.parent) { + if (!this._fastPaths[file.path]) { + this._fastPaths[file.path] = file; + } + } + } + } + }); this._fileWatcher.on('all', this._processFileChange.bind(this)); }); } @@ -134,32 +152,6 @@ class Fastfs extends EventEmitter { this._getAndAssertRoot(file.path).addChild(file); } - _search(queue) { - const dir = queue.shift(); - if (!dir) { - return Promise.resolve(); - } - - return readAndStatDir(dir.path).then(([filePaths, stats]) => { - filePaths.forEach((filePath, i) => { - if (this._ignore(filePath)) { - return; - } - - if (stats[i].isDirectory()) { - queue.push( - new File(filePath, { isDir: true, fstat: stats[i] }) - ); - return; - } - - if (filePath.match(this._pattern)) { - this._add(new File(filePath, { fstat: stats[i] })); - } - }); - return this._search(queue); - }); - } _processFileChange(type, filePath, root, fstat) { const absPath = path.join(root, filePath); @@ -182,10 +174,7 @@ class Fastfs extends EventEmitter { delete this._fastPaths[path.normalize(absPath)]; if (type !== 'delete') { - this._add(new File(absPath, { - isDir: false, - fstat - })); + this._add(new File(absPath, { isDir: false })); } this.emit('change', type, filePath, root, fstat); @@ -193,16 +182,12 @@ class Fastfs extends EventEmitter { } class File { - constructor(filePath, {isDir, fstat}) { + constructor(filePath, { isDir }) { this.path = filePath; this.isDir = Boolean(isDir); if (this.isDir) { this.children = Object.create(null); } - - if (fstat) { - this._stat = Promise.resolve(fstat); - } } read() { @@ -290,21 +275,8 @@ function isDescendant(root, child) { return path.relative(root, child).indexOf('..') !== 0; } -function readAndStatDir(dir) { - return readDir(dir) - .then(files => Promise.all(files.map(f => path.join(dir, f)))) - .then(files => Promise.all( - files.map(f => stat(f).catch(handleBrokenLink)) - ).then(stats => [ - // Remove broken links. - files.filter((file, i ) => !!stats[i]), - stats.filter(Boolean), - ])); -} - -function handleBrokenLink(e) { - debug('WARNING: error stating, possibly broken symlink', e.message); - return Promise.resolve(); +function escapeRegExp(str) { + return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&'); } module.exports = Fastfs; diff --git a/react-packager/src/FileWatcher/__tests__/FileWatcher-test.js b/react-packager/src/FileWatcher/__tests__/FileWatcher-test.js index fc45205a..a5eeda3d 100644 --- a/react-packager/src/FileWatcher/__tests__/FileWatcher-test.js +++ b/react-packager/src/FileWatcher/__tests__/FileWatcher-test.js @@ -33,7 +33,7 @@ describe('FileWatcher', function() { pit('it should get the watcher instance when ready', function() { var fileWatcher = new FileWatcher(['rootDir']); - return fileWatcher._loading.then(function(watchers) { + return fileWatcher.getWatchers().then(function(watchers) { watchers.forEach(function(watcher) { expect(watcher instanceof Watcher).toBe(true); }); @@ -48,7 +48,7 @@ describe('FileWatcher', function() { var fileWatcher = new FileWatcher(['rootDir']); var handler = jest.genMockFn(); fileWatcher.on('all', handler); - return fileWatcher._loading.then(function(){ + return fileWatcher.getWatchers().then(function(){ cb(1, 2, 3, 4); jest.runAllTimers(); expect(handler.mock.calls[0]).toEqual([1, 2, 3, 4]); diff --git a/react-packager/src/FileWatcher/index.js b/react-packager/src/FileWatcher/index.js index aac211ad..46b4667e 100644 --- a/react-packager/src/FileWatcher/index.js +++ b/react-packager/src/FileWatcher/index.js @@ -8,13 +8,14 @@ */ 'use strict'; -var EventEmitter = require('events').EventEmitter; -var sane = require('sane'); -var Promise = require('promise'); -var util = require('util'); -var exec = require('child_process').exec; +const EventEmitter = require('events').EventEmitter; +const sane = require('sane'); +const Promise = require('promise'); +const exec = require('child_process').exec; -var detectingWatcherClass = new Promise(function(resolve) { +const MAX_WAIT_TIME = 25000; + +const detectingWatcherClass = new Promise(function(resolve) { exec('which watchman', function(err, out) { if (err || out.length === 0) { resolve(sane.NodeWatcher); @@ -24,53 +25,76 @@ var detectingWatcherClass = new Promise(function(resolve) { }); }); -module.exports = FileWatcher; +let inited = false; -var MAX_WAIT_TIME = 25000; +class FileWatcher extends EventEmitter { -// Singleton -var fileWatcher = null; + constructor(rootConfigs) { + if (inited) { + throw new Error('FileWatcher can only be instantiated once'); + } + inited = true; -function FileWatcher(rootConfigs) { - if (fileWatcher) { - // This allows us to optimize watching in the future by merging roots etc. - throw new Error('FileWatcher can only be instantiated once'); + super(); + this._watcherByRoot = Object.create(null); + + this._loading = Promise.all( + rootConfigs.map(createWatcher) + ).then(watchers => { + watchers.forEach((watcher, i) => { + this._watcherByRoot[rootConfigs[i].dir] = watcher; + watcher.on( + 'all', + // args = (type, filePath, root, stat) + (...args) => this.emit('all', ...args) + ); + }); + return watchers; + }); + + this._loading.done(); } - fileWatcher = this; + getWatchers() { + return this._loading; + } - this._loading = Promise.all( - rootConfigs.map(createWatcher) - ).then(function(watchers) { - watchers.forEach(function(watcher) { - watcher.on('all', function(type, filepath, root, stat) { - fileWatcher.emit('all', type, filepath, root, stat); - }); - }); - return watchers; - }); - this._loading.done(); + getWatcherForRoot(root) { + return this._loading.then(() => this._watcherByRoot[root]); + } + + isWatchman() { + return detectingWatcherClass.then( + Watcher => Watcher === sane.WatchmanWatcher + ); + } + + end() { + return this._loading.then( + (watchers) => watchers.map( + watcher => Promise.denodeify(watcher.close).call(watcher) + ) + ); + } + + static createDummyWatcher() { + const ev = new EventEmitter(); + ev.end = function() { + return Promise.resolve(); + }; + return ev; + } } -util.inherits(FileWatcher, EventEmitter); - -FileWatcher.prototype.end = function() { - return this._loading.then(function(watchers) { - watchers.forEach(function(watcher) { - return Promise.denodeify(watcher.close).call(watcher); - }); - }); -}; - function createWatcher(rootConfig) { return detectingWatcherClass.then(function(Watcher) { - var watcher = new Watcher(rootConfig.dir, { + const watcher = new Watcher(rootConfig.dir, { glob: rootConfig.globs, dot: false, }); return new Promise(function(resolve, reject) { - var rejectTimeout = setTimeout(function() { + const rejectTimeout = setTimeout(function() { reject(new Error([ 'Watcher took too long to load', 'Try running `watchman version` from your terminal', @@ -86,10 +110,4 @@ function createWatcher(rootConfig) { }); } -FileWatcher.createDummyWatcher = function() { - var ev = new EventEmitter(); - ev.end = function() { - return Promise.resolve(); - }; - return ev; -}; +module.exports = FileWatcher;