packager: preprocess outdated dependencies

Summary:
Note: if this changeset causes some breakage, consider disabling rather than reverting. To disable, the call to `_preprocessPotentialDependencies` in `ResolutionRequest` can be removed.

It's a bit of an experiment. I couldn't see any particular regression caused by this, but I could see net improvement of the global cache performance, as it unlock much, much stronger batching: indeed, instead of discovering dependencies progressively, we synchronously figure out the list of all potential modules of a bundle, and kick-off cache fetching and/or transformations. So when it comes to fetching from the global cache, it'll do less requests, and each requests will ask for considerably more keys at a time.

Potential problem caused by this changeset: if a module's dependencies completely changed, then the first time we try to build the bundle it'll start transforming modules that we probably don't care at all anymore, spending precious CPU time for nothing. I've been thinking about it and I cannot see such a case happening much often. Even if it happens, it should not cause any bug or corruption, it would just take additional time.

Other potential problem: that this new code doesn't handle some types of edge cases. It's quite hard to figure out what could possibly break in the `ResolutionRequest` code (and I think it would benefit from a larger refactor). We do have a good test coverage for `DependencyGraph` and it seems to work smoothly, so I'm relatively confident we're not breaking edge cases.

Reviewed By: davidaurelio

Differential Revision: D4875467

fbshipit-source-id: 2dfcc755bec638d3d1c47862ec1de5220953e812
This commit is contained in:
Jean Lauliac 2017-04-13 10:28:46 -07:00 committed by Facebook Github Bot
parent fea8f7079d
commit 5a87cc6e4b
9 changed files with 159 additions and 45 deletions

View File

@ -11,6 +11,7 @@
'use strict';
import type {CachedReadResult, ReadResult} from '../../node-haste/Module';
import type {TransformedFile} from '../types.flow';
import type {ModuleCache} from './node-haste.flow';
@ -33,6 +34,14 @@ module.exports = class Module {
this.type = 'Module';
}
readCached(): CachedReadResult {
throw new Error('not implemented');
}
readFresh(): Promise<ReadResult> {
return Promise.reject(new Error('not implemented'));
}
getName() {
return this.name;
}

View File

@ -63,6 +63,8 @@ const nullModule = {
hash() {
throw new Error('not implemented');
},
readCached() { throw new Error('not implemented'); },
readFresh() { return Promise.reject(new Error('not implemented')); },
};
exports.createResolveFn = function(options: ResolveOptions): ResolveFn {

View File

@ -95,7 +95,7 @@ class KeyURIFetcher {
this._batchProcessor = new BatchProcessor({
maximumDelayMs: 10,
maximumItems: 500,
concurrency: 25,
concurrency: 2,
}, this._processKeys.bind(this));
}

View File

@ -95,6 +95,11 @@ export type CachedResult = {
map?: ?SourceMap,
};
export type TransformCacheResult = {|
+result: ?CachedResult,
+outdatedDependencies: $ReadOnlyArray<string>,
|};
/**
* We want to unlink all cache files before writing, so that it is as much
* atomic as possible.
@ -299,6 +304,8 @@ export type ReadTransformProps = {
cacheOptions: CacheOptions,
};
const EMPTY_ARRAY = [];
/**
* We verify the source hash matches to ensure we always favor rebuilding when
* source change (rather than just using fs.mtime(), a bit less robust).
@ -312,41 +319,44 @@ export type ReadTransformProps = {
* Meanwhile we store transforms with different options in different files so
* that it is fast to switch between ex. minified, or not.
*/
function readSync(props: ReadTransformProps): ?CachedResult {
function readSync(props: ReadTransformProps): TransformCacheResult {
GARBAGE_COLLECTOR.collectIfNecessarySync(props.cacheOptions);
const cacheFilePaths = getCacheFilePaths(props);
let metadata, transformedCode;
try {
metadata = readMetadataFileSync(cacheFilePaths.metadata);
if (metadata == null) {
return null;
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
const sourceHash = hashSourceCode(props);
if (sourceHash !== metadata.cachedSourceHash) {
return null;
return {result: null, outdatedDependencies: metadata.dependencies};
}
transformedCode = fs.readFileSync(cacheFilePaths.transformedCode, 'utf8');
const codeHash = crypto.createHash('sha1').update(transformedCode).digest('hex');
if (metadata.cachedResultHash !== codeHash) {
return null;
return {result: null, outdatedDependencies: metadata.dependencies};
}
} catch (error) {
if (error.code === 'ENOENT') {
return null;
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
throw error;
}
return {
code: transformedCode,
dependencies: metadata.dependencies,
dependencyOffsets: metadata.dependencyOffsets,
map: metadata.sourceMap,
result: {
code: transformedCode,
dependencies: metadata.dependencies,
dependencyOffsets: metadata.dependencyOffsets,
map: metadata.sourceMap,
},
outdatedDependencies: EMPTY_ARRAY,
};
}
module.exports = {
writeSync,
readSync(props: ReadTransformProps): ?CachedResult {
readSync(props: ReadTransformProps): TransformCacheResult {
const result = readSync(props);
const msg = result ? 'Cache hit: ' : 'Cache miss: ';
debugRead(msg + props.filePath);

View File

@ -34,7 +34,7 @@ function writeSync(props) {
}
function readSync(props) {
return transformCache.get(transformCacheKeyOf(props));
return {result: transformCache.get(transformCacheKeyOf(props)), outdatedDependencies: []};
}
module.exports = {

View File

@ -92,7 +92,7 @@ describe('TransformCache', () => {
...args,
cacheOptions: {resetCache: false},
});
expect(cachedResult).toEqual(result);
expect(cachedResult.result).toEqual(result);
});
});
@ -107,7 +107,7 @@ describe('TransformCache', () => {
transformOptionsKey: 'boo!',
result: {
code: `/* result for ${key} */`,
dependencies: ['foo', `dep of ${key}`],
dependencies: ['foo', 'bar'],
dependencyOffsets: [12, imurmurhash('dep' + key).result()],
map: {desc: `source map for ${key}`},
},
@ -125,7 +125,7 @@ describe('TransformCache', () => {
...args,
cacheOptions: {resetCache: false},
});
expect(cachedResult).toEqual(result);
expect(cachedResult.result).toEqual(result);
});
allCases.pop();
allCases.forEach(entry => {
@ -133,7 +133,8 @@ describe('TransformCache', () => {
...argsFor(entry),
cacheOptions: {resetCache: false},
});
expect(cachedResult).toBeNull();
expect(cachedResult.result).toBeNull();
expect(cachedResult.outdatedDependencies).toEqual(['foo', 'bar']);
});
});

View File

@ -15,7 +15,7 @@ const Module = require('./Module');
const getAssetDataFromName = require('./lib/getAssetDataFromName');
import type {ConstructorArgs, ReadResult} from './Module';
import type {CachedReadResult, ConstructorArgs, ReadResult} from './Module';
class AssetModule extends Module {
@ -37,14 +37,15 @@ class AssetModule extends Module {
return Promise.resolve(false);
}
readCached(): ReadResult {
readCached(): CachedReadResult {
/** $FlowFixMe: improper OOP design. AssetModule, being different from a
* normal Module, shouldn't inherit it in the first place. */
return {dependencies: this._dependencies};
return {result: {dependencies: this._dependencies}, outdatedDependencies: []};
}
/** $FlowFixMe: improper OOP design. */
readFresh(): Promise<ReadResult> {
return Promise.resolve(this.readCached());
return Promise.resolve({dependencies: this._dependencies});
}
getName() {

View File

@ -24,6 +24,8 @@ const getAssetDataFromName = require('../lib/getAssetDataFromName');
import type {HasteFS} from '../types';
import type DependencyGraphHelpers from './DependencyGraphHelpers';
import type ResolutionResponse from './ResolutionResponse';
import type {Options as TransformWorkerOptions} from '../../JSTransformer/worker/worker';
import type {ReadResult, CachedReadResult} from '../Module';
type DirExistsFn = (filePath: string) => boolean;
@ -45,6 +47,8 @@ type Moduleish = {
+path: string,
getPackage(): ?Packageish,
hash(): string,
readCached(transformOptions: TransformWorkerOptions): CachedReadResult,
readFresh(transformOptions: TransformWorkerOptions): Promise<ReadResult>,
};
type ModuleishCache<TModule, TPackage> = {
@ -163,8 +167,8 @@ class ResolutionRequest<TModule: Moduleish, TPackage: Packageish> {
resolveModuleDependencies(
module: TModule,
dependencyNames: Array<string>,
): [Array<string>, Array<TModule>] {
dependencyNames: $ReadOnlyArray<string>,
): [$ReadOnlyArray<string>, $ReadOnlyArray<TModule>] {
const dependencies = dependencyNames.map(name => this.resolveDependency(module, name));
return [dependencyNames, dependencies];
}
@ -176,7 +180,7 @@ class ResolutionRequest<TModule: Moduleish, TPackage: Packageish> {
recursive = true,
}: {
response: ResolutionResponse<TModule>,
transformOptions: Object,
transformOptions: TransformWorkerOptions,
onProgress?: ?(finishedModules: number, totalModules: number) => mixed,
recursive: boolean,
}) {
@ -186,10 +190,23 @@ class ResolutionRequest<TModule: Moduleish, TPackage: Packageish> {
let totalModules = 1;
let finishedModules = 0;
const resolveDependencies = module => Promise.resolve().then(() => {
const result = module.readCached(transformOptions);
if (result != null) {
return this.resolveModuleDependencies(module, result.dependencies);
let preprocessedModuleCount = 1;
if (recursive) {
this._preprocessPotentialDependencies(transformOptions, entry, count => {
if (count + 1 <= preprocessedModuleCount) {
return;
}
preprocessedModuleCount = count + 1;
if (onProgress != null) {
onProgress(finishedModules, preprocessedModuleCount);
}
});
}
const resolveDependencies = (module: TModule) => Promise.resolve().then(() => {
const cached = module.readCached(transformOptions);
if (cached.result != null) {
return this.resolveModuleDependencies(module, cached.result.dependencies);
}
return module.readFresh(transformOptions)
.then(({dependencies}) => this.resolveModuleDependencies(module, dependencies));
@ -221,7 +238,7 @@ class ResolutionRequest<TModule: Moduleish, TPackage: Packageish> {
if (onProgress) {
finishedModules += 1;
totalModules += newDependencies.length;
onProgress(finishedModules, totalModules);
onProgress(finishedModules, Math.max(totalModules, preprocessedModuleCount));
}
if (recursive) {
@ -273,6 +290,71 @@ class ResolutionRequest<TModule: Moduleish, TPackage: Packageish> {
});
}
/**
* This synchronously look at all the specified modules and recursively kicks off global cache
* fetching or transforming (via `readFresh`). This is a hack that workaround the current
* structure, because we could do better. First off, the algorithm that resolves dependencies
* recursively should be synchronous itself until it cannot progress anymore (and needs to
* call `readFresh`), so that this algo would be integrated into it.
*/
_preprocessPotentialDependencies(
transformOptions: TransformWorkerOptions,
module: TModule,
onProgress: (moduleCount: number) => mixed,
): void {
const visitedModulePaths = new Set();
const pendingBatches = [this.preprocessModule(transformOptions, module, visitedModulePaths)];
onProgress(visitedModulePaths.size);
while (pendingBatches.length > 0) {
const dependencyModules = pendingBatches.pop();
while (dependencyModules.length > 0) {
const dependencyModule = dependencyModules.pop();
const deps = this.preprocessModule(transformOptions, dependencyModule, visitedModulePaths);
pendingBatches.push(deps);
onProgress(visitedModulePaths.size);
}
}
}
preprocessModule(
transformOptions: TransformWorkerOptions,
module: TModule,
visitedModulePaths: Set<string>,
): Array<TModule> {
const cached = module.readCached(transformOptions);
if (cached.result == null) {
module.readFresh(transformOptions).catch(error => {
/* ignore errors, they'll be handled later if the dependency is actually
* not obsolete, and required from somewhere */
});
}
const dependencies = cached.result != null
? cached.result.dependencies : cached.outdatedDependencies;
return this.tryResolveModuleDependencies(module, dependencies, visitedModulePaths);
}
tryResolveModuleDependencies(
module: TModule,
dependencyNames: $ReadOnlyArray<string>,
visitedModulePaths: Set<string>,
): Array<TModule> {
const result = [];
for (let i = 0; i < dependencyNames.length; ++i) {
try {
const depModule = this.resolveDependency(module, dependencyNames[i]);
if (!visitedModulePaths.has(depModule.path)) {
visitedModulePaths.add(depModule.path);
result.push(depModule);
}
} catch (error) {
if (!(error instanceof UnableToResolveError)) {
throw error;
}
}
}
return result;
}
_resolveHasteDependency(fromModule: TModule, toModuleName: string): TModule {
toModuleName = normalizePath(toModuleName);

View File

@ -33,13 +33,18 @@ import type DependencyGraphHelpers from './DependencyGraph/DependencyGraphHelper
import type ModuleCache from './ModuleCache';
export type ReadResult = {
code: string,
dependencies?: ?Array<string>,
dependencyOffsets?: ?Array<number>,
map?: ?SourceMap,
source: string,
+code: string,
+dependencies: Array<string>,
+dependencyOffsets?: ?Array<number>,
+map?: ?SourceMap,
+source: string,
};
export type CachedReadResult = {|
+result: ?ReadResult,
+outdatedDependencies: $ReadOnlyArray<string>,
|};
export type TransformCode = (
module: Module,
sourceCode: string,
@ -95,7 +100,7 @@ class Module {
_sourceCode: ?string;
_readPromises: Map<string, Promise<ReadResult>>;
_readResultsByOptionsKey: Map<string, ?ReadResult>;
_readResultsByOptionsKey: Map<string, CachedReadResult>;
constructor({
cache,
@ -346,8 +351,8 @@ class Module {
read(transformOptions: TransformOptions): Promise<ReadResult> {
return Promise.resolve().then(() => {
const cached = this.readCached(transformOptions);
if (cached != null) {
return cached;
if (cached.result != null) {
return cached.result;
}
return this.readFresh(transformOptions);
});
@ -358,12 +363,13 @@ class Module {
* the file from source. This has the benefit of being synchronous. As a
* result it is possible to read many cached Module in a row, synchronously.
*/
readCached(transformOptions: TransformOptions): ?ReadResult {
readCached(transformOptions: TransformOptions): CachedReadResult {
const key = stableObjectHash(transformOptions || {});
if (this._readResultsByOptionsKey.has(key)) {
return this._readResultsByOptionsKey.get(key);
let result = this._readResultsByOptionsKey.get(key);
if (result != null) {
return result;
}
const result = this._readFromTransformCache(transformOptions, key);
result = this._readFromTransformCache(transformOptions, key);
this._readResultsByOptionsKey.set(key, result);
return result;
}
@ -375,13 +381,16 @@ class Module {
_readFromTransformCache(
transformOptions: TransformOptions,
transformOptionsKey: string,
): ?ReadResult {
): CachedReadResult {
const cacheProps = this._getCacheProps(transformOptions, transformOptionsKey);
const cachedResult = TransformCache.readSync(cacheProps);
if (cachedResult) {
return this._finalizeReadResult(cacheProps.sourceCode, cachedResult);
if (cachedResult.result == null) {
return {result: null, outdatedDependencies: cachedResult.outdatedDependencies};
}
return null;
return {
result: this._finalizeReadResult(cacheProps.sourceCode, cachedResult.result),
outdatedDependencies: [],
};
}
/**
@ -411,7 +420,7 @@ class Module {
},
);
}).then(result => {
this._readResultsByOptionsKey.set(key, result);
this._readResultsByOptionsKey.set(key, {result, outdatedDependencies: []});
return result;
});
});