packager: make TransformCache a stateful class

Summary: This removes the single-use "garbage collection" class and make a single `TransformCache` instead. The reason for doing this is that I'd like to experiment with using a local dir based on the root path instead of the `tmpdir()` folder, because on some platform using the temp dir is subject to permissions problem, sometimes it is empty, and on all platform it is vulnerable to concurrency issues.

Reviewed By: davidaurelio

Differential Revision: D5027591

fbshipit-source-id: e1176e0e88111116256f7b2b173a0b36837a887d
This commit is contained in:
Jean Lauliac 2017-05-10 04:44:10 -07:00 committed by Facebook Github Bot
parent da7f06e115
commit ddf7bebad3
5 changed files with 267 additions and 249 deletions

View File

@ -7,6 +7,7 @@
* of patent rights can be found in the PATENTS file in the same directory. * of patent rights can be found in the PATENTS file in the same directory.
* *
* @flow * @flow
* @format
*/ */
'use strict'; 'use strict';
@ -30,62 +31,6 @@ export type GetTransformCacheKey = (options: {}) => string;
const CACHE_NAME = 'react-native-packager-cache'; const CACHE_NAME = 'react-native-packager-cache';
const CACHE_SUB_DIR = 'cache'; const CACHE_SUB_DIR = 'cache';
/**
* If packager is running for two different directories, we don't want the
* caches to conflict with each other. `__dirname` carries that because packager
* will be, for example, installed in a different `node_modules/` folder for
* different projects.
*/
const getCacheDirPath = (function() {
let dirPath;
return function() {
if (dirPath != null) {
return dirPath;
}
const hash = crypto.createHash('sha1').update(__dirname);
if (process.getuid != null) {
hash.update(process.getuid().toString());
}
dirPath = path.join(require('os').tmpdir(), CACHE_NAME + '-' + hash.digest('hex'));
require('debug')('RNP:TransformCache:Dir')(
`transform cache directory: ${dirPath}`
);
return dirPath;
};
})();
function hashSourceCode(props: {
filePath: string,
sourceCode: string,
getTransformCacheKey: GetTransformCacheKey,
transformOptions: WorkerOptions,
transformOptionsKey: string,
}): string {
return crypto.createHash('sha1')
.update(props.getTransformCacheKey(props.transformOptions))
.update(props.sourceCode)
.digest('hex');
}
/**
* The path, built as a hash, does not take the source code itself into account
* because it would generate lots of file during development. (The source hash
* is stored in the metadata instead).
*/
function getCacheFilePaths(props: {
filePath: string,
transformOptionsKey: string,
}): CacheFilePaths {
const hasher = crypto.createHash('sha1')
.update(props.filePath)
.update(props.transformOptionsKey);
const hash = hasher.digest('hex');
const prefix = hash.substr(0, 2);
const fileName = `${hash.substr(2)}`;
const base = path.join(getCacheDirPath(), CACHE_SUB_DIR, prefix, fileName);
return {transformedCode: base, metadata: base + '.meta'};
}
export type CachedResult = { export type CachedResult = {
code: string, code: string,
dependencies: Array<string>, dependencies: Array<string>,
@ -98,27 +43,40 @@ export type TransformCacheResult = {|
+outdatedDependencies: $ReadOnlyArray<string>, +outdatedDependencies: $ReadOnlyArray<string>,
|}; |};
/** export type CacheOptions = {
* We want to unlink all cache files before writing, so that it is as much reporter: Reporter,
* atomic as possible. resetCache?: boolean,
*/ };
function unlinkIfExistsSync(filePath: string) {
try { export type ReadTransformProps = {
fs.unlinkSync(filePath); filePath: string,
} catch (error) { sourceCode: string,
if (error.code === 'ENOENT') { transformOptions: WorkerOptions,
return; transformOptionsKey: string,
} getTransformCacheKey: GetTransformCacheKey,
throw error; cacheOptions: CacheOptions,
} };
const EMPTY_ARRAY = [];
/* 1 day */
const GARBAGE_COLLECTION_PERIOD = 24 * 60 * 60 * 1000;
/* 4 days */
const CACHE_FILE_MAX_LAST_ACCESS_TIME = GARBAGE_COLLECTION_PERIOD * 4;
class TransformCache {
_cacheWasReset: boolean;
_dirPath: string;
_lastCollected: ?number;
constructor() {
this._cacheWasReset = false;
} }
/** /**
* In the workers we are fine doing sync work because a worker is meant to
* process a single source file at a time.
*
* We store the transformed JS because it is likely to be much bigger than the * We store the transformed JS because it is likely to be much bigger than the
* rest of the data JSON. Probably the map should be stored separately as well. * rest of the data JSON. Probably the map should be stored separately as
* well.
* *
* We make the write operation as much atomic as possible: indeed, if another * We make the write operation as much atomic as possible: indeed, if another
* process is reading the cache at the same time, there would be a risk it * process is reading the cache at the same time, there would be a risk it
@ -130,7 +88,7 @@ function unlinkIfExistsSync(filePath: string) {
* close to each others, one of the workers is going to loose its results no * close to each others, one of the workers is going to loose its results no
* matter what. * matter what.
*/ */
function writeSync(props: { writeSync(props: {
filePath: string, filePath: string,
sourceCode: string, sourceCode: string,
getTransformCacheKey: GetTransformCacheKey, getTransformCacheKey: GetTransformCacheKey,
@ -138,72 +96,88 @@ function writeSync(props: {
transformOptionsKey: string, transformOptionsKey: string,
result: CachedResult, result: CachedResult,
}): void { }): void {
const cacheFilePath = getCacheFilePaths(props); const cacheFilePath = this._getCacheFilePaths(props);
mkdirp.sync(path.dirname(cacheFilePath.transformedCode)); mkdirp.sync(path.dirname(cacheFilePath.transformedCode));
const {result} = props; const {result} = props;
unlinkIfExistsSync(cacheFilePath.transformedCode); unlinkIfExistsSync(cacheFilePath.transformedCode);
unlinkIfExistsSync(cacheFilePath.metadata); unlinkIfExistsSync(cacheFilePath.metadata);
writeFileAtomicSync(cacheFilePath.transformedCode, result.code); writeFileAtomicSync(cacheFilePath.transformedCode, result.code);
writeFileAtomicSync(cacheFilePath.metadata, JSON.stringify([ writeFileAtomicSync(
cacheFilePath.metadata,
JSON.stringify([
crypto.createHash('sha1').update(result.code).digest('hex'), crypto.createHash('sha1').update(result.code).digest('hex'),
hashSourceCode(props), hashSourceCode(props),
result.dependencies, result.dependencies,
result.dependencyOffsets, result.dependencyOffsets,
result.map, result.map,
])); ]),
}
export type CacheOptions = {
reporter: Reporter,
resetCache?: boolean,
};
/* 1 day */
const GARBAGE_COLLECTION_PERIOD = 24 * 60 * 60 * 1000;
/* 4 days */
const CACHE_FILE_MAX_LAST_ACCESS_TIME = GARBAGE_COLLECTION_PERIOD * 4;
/**
* Temporary folder is cleaned up only on boot, ex. on OS X, as far as I'm
* concerned. Since generally people reboot only very rarely, we need to clean
* up old stuff from time to time.
*
* This code should be safe even if two different React Native projects are
* running at the same time.
*/
const GARBAGE_COLLECTOR = new (class GarbageCollector {
_lastCollected: ?number;
_cacheWasReset: boolean;
constructor() {
this._cacheWasReset = false;
}
/**
* We want to avoid preventing tool use if the cleanup fails for some reason,
* but still provide some chance for people to report/fix things.
*/
_collectSyncNoThrow() {
try {
collectCacheIfOldSync();
} catch (error) {
terminal.log(error.stack);
terminal.log(
'Error: Cleaning up the cache folder failed. Continuing anyway.',
); );
terminal.log('The cache folder is: %s', getCacheDirPath());
}
this._lastCollected = Date.now();
} }
_resetCache(reporter: Reporter) { readSync(props: ReadTransformProps): TransformCacheResult {
rimraf.sync(getCacheDirPath()); const result = this._readSync(props);
reporter.update({type: 'transform_cache_reset'}); const msg = result ? 'Cache hit: ' : 'Cache miss: ';
this._cacheWasReset = true; debugRead(msg + props.filePath);
this._lastCollected = Date.now(); return result;
} }
collectIfNecessarySync(options: CacheOptions) { /**
* We verify the source hash matches to ensure we always favor rebuilding when
* source change (rather than just using fs.mtime(), a bit less robust).
*
* That means when the source changes, we override the old transformed code
* with the new one. This is, I believe, preferable, so as to avoid bloating
* the cache during development cycles, where people changes files all the
* time. If we implement a global cache ability at some point, we'll be able
* to store old artifacts as well.
*
* Meanwhile we store transforms with different options in different files so
* that it is fast to switch between ex. minified, or not.
*/
_readSync(props: ReadTransformProps): TransformCacheResult {
this._collectIfNecessarySync(props.cacheOptions);
const cacheFilePaths = this._getCacheFilePaths(props);
let metadata, transformedCode;
try {
metadata = readMetadataFileSync(cacheFilePaths.metadata);
if (metadata == null) {
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
const sourceHash = hashSourceCode(props);
if (sourceHash !== metadata.cachedSourceHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
transformedCode = fs.readFileSync(cacheFilePaths.transformedCode, 'utf8');
const codeHash = crypto
.createHash('sha1')
.update(transformedCode)
.digest('hex');
if (metadata.cachedResultHash !== codeHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
} catch (error) {
if (error.code === 'ENOENT') {
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
throw error;
}
return {
result: {
code: transformedCode,
dependencies: metadata.dependencies,
dependencyOffsets: metadata.dependencyOffsets,
map: metadata.sourceMap,
},
outdatedDependencies: EMPTY_ARRAY,
};
}
/**
* Temporary folder is never cleaned up automatically, we need to clean up old
* stuff ourselves. This code should be safe even if two different React
* Native projects are running at the same time.
*/
_collectIfNecessarySync(options: CacheOptions) {
if (options.resetCache && !this._cacheWasReset) { if (options.resetCache && !this._cacheWasReset) {
this._resetCache(options.reporter); this._resetCache(options.reporter);
return; return;
@ -217,18 +191,47 @@ const GARBAGE_COLLECTOR = new (class GarbageCollector {
} }
} }
})(); _resetCache(reporter: Reporter) {
rimraf.sync(this._getCacheDirPath());
reporter.update({type: 'transform_cache_reset'});
this._cacheWasReset = true;
this._lastCollected = Date.now();
}
/** /**
* When restarting packager we want to avoid running the collection over again, so we store * We want to avoid preventing tool use if the cleanup fails for some reason,
* the last collection time in a file and we check that first. * but still provide some chance for people to report/fix things.
*/ */
function collectCacheIfOldSync() { _collectSyncNoThrow() {
const cacheDirPath = getCacheDirPath(); try {
this._collectCacheIfOldSync();
} catch (error) {
terminal.log(error.stack);
terminal.log(
'Error: Cleaning up the cache folder failed. Continuing anyway.',
);
terminal.log('The cache folder is: %s', this._getCacheDirPath());
}
this._lastCollected = Date.now();
}
/**
* When restarting packager we want to avoid running the collection over
* again, so we store the last collection time in a file and we check that
* first.
*/
_collectCacheIfOldSync() {
const cacheDirPath = this._getCacheDirPath();
mkdirp.sync(cacheDirPath); mkdirp.sync(cacheDirPath);
const cacheCollectionFilePath = path.join(cacheDirPath, 'last_collected'); const cacheCollectionFilePath = path.join(cacheDirPath, 'last_collected');
const lastCollected = Number.parseInt(tryReadFileSync(cacheCollectionFilePath, 'utf8'), 10); const lastCollected = Number.parseInt(
if (Number.isInteger(lastCollected) && Date.now() - lastCollected > GARBAGE_COLLECTION_PERIOD) { tryReadFileSync(cacheCollectionFilePath, 'utf8'),
10,
);
if (
Number.isInteger(lastCollected) &&
Date.now() - lastCollected > GARBAGE_COLLECTION_PERIOD
) {
return; return;
} }
const effectiveCacheDirPath = path.join(cacheDirPath, CACHE_SUB_DIR); const effectiveCacheDirPath = path.join(cacheDirPath, CACHE_SUB_DIR);
@ -238,7 +241,58 @@ function collectCacheIfOldSync() {
} }
/** /**
* Remove all the cache files from the specified folder that are older than a certain duration. * The path, built as a hash, does not take the source code itself into
* account because it would generate lots of file during development. (The
* source hash is stored in the metadata instead).
*/
_getCacheFilePaths(props: {
filePath: string,
transformOptionsKey: string,
}): CacheFilePaths {
const hasher = crypto
.createHash('sha1')
.update(props.filePath)
.update(props.transformOptionsKey);
const hash = hasher.digest('hex');
const prefix = hash.substr(0, 2);
const fileName = `${hash.substr(2)}`;
const base = path.join(
this._getCacheDirPath(),
CACHE_SUB_DIR,
prefix,
fileName,
);
return {transformedCode: base, metadata: base + '.meta'};
}
/**
* If packager is running for two different directories, we don't want the
* caches to conflict with each other. `__dirname` carries that because
* packager will be, for example, installed in a different `node_modules/`
* folder for different projects.
*/
_getCacheDirPath() {
if (this._dirPath != null) {
return this._dirPath;
}
const hash = crypto.createHash('sha1').update(__dirname);
if (process.getuid != null) {
hash.update(process.getuid().toString());
}
this._dirPath = path.join(
require('os').tmpdir(),
CACHE_NAME + '-' + hash.digest('hex'),
);
require('debug')('RNP:TransformCache:Dir')(
`transform cache directory: ${this._dirPath}`,
);
return this._dirPath;
}
}
/**
* Remove all the cache files from the specified folder that are older than a
* certain duration.
*/ */
function collectCacheSync(dirPath: string) { function collectCacheSync(dirPath: string) {
const prefixDirs = fs.readdirSync(dirPath); const prefixDirs = fs.readdirSync(dirPath);
@ -302,14 +356,10 @@ function readMetadataFileSync(
if ( if (
typeof cachedResultHash !== 'string' || typeof cachedResultHash !== 'string' ||
typeof cachedSourceHash !== 'string' || typeof cachedSourceHash !== 'string' ||
!( !(Array.isArray(dependencies) &&
Array.isArray(dependencies) && dependencies.every(dep => typeof dep === 'string')) ||
dependencies.every(dep => typeof dep === 'string') !(Array.isArray(dependencyOffsets) &&
) || dependencyOffsets.every(offset => typeof offset === 'number')) ||
!(
Array.isArray(dependencyOffsets) &&
dependencyOffsets.every(offset => typeof offset === 'number')
) ||
!(sourceMap == null || typeof sourceMap === 'object') !(sourceMap == null || typeof sourceMap === 'object')
) { ) {
return null; return null;
@ -323,71 +373,33 @@ function readMetadataFileSync(
}; };
} }
export type ReadTransformProps = { function hashSourceCode(props: {
filePath: string, filePath: string,
sourceCode: string, sourceCode: string,
getTransformCacheKey: GetTransformCacheKey,
transformOptions: WorkerOptions, transformOptions: WorkerOptions,
transformOptionsKey: string, transformOptionsKey: string,
getTransformCacheKey: GetTransformCacheKey, }): string {
cacheOptions: CacheOptions, return crypto
}; .createHash('sha1')
.update(props.getTransformCacheKey(props.transformOptions))
const EMPTY_ARRAY = []; .update(props.sourceCode)
.digest('hex');
}
/** /**
* We verify the source hash matches to ensure we always favor rebuilding when * We want to unlink all cache files before writing, so that it is as much
* source change (rather than just using fs.mtime(), a bit less robust). * atomic as possible.
*
* That means when the source changes, we override the old transformed code with
* the new one. This is, I believe, preferable, so as to avoid bloating the
* cache during development cycles, where people changes files all the time.
* If we implement a global cache ability at some point, we'll be able to store
* old artifacts as well.
*
* Meanwhile we store transforms with different options in different files so
* that it is fast to switch between ex. minified, or not.
*/ */
function readSync(props: ReadTransformProps): TransformCacheResult { function unlinkIfExistsSync(filePath: string) {
GARBAGE_COLLECTOR.collectIfNecessarySync(props.cacheOptions);
const cacheFilePaths = getCacheFilePaths(props);
let metadata, transformedCode;
try { try {
metadata = readMetadataFileSync(cacheFilePaths.metadata); fs.unlinkSync(filePath);
if (metadata == null) {
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
const sourceHash = hashSourceCode(props);
if (sourceHash !== metadata.cachedSourceHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
transformedCode = fs.readFileSync(cacheFilePaths.transformedCode, 'utf8');
const codeHash = crypto.createHash('sha1').update(transformedCode).digest('hex');
if (metadata.cachedResultHash !== codeHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
} catch (error) { } catch (error) {
if (error.code === 'ENOENT') { if (error.code === 'ENOENT') {
return {result: null, outdatedDependencies: EMPTY_ARRAY}; return;
} }
throw error; throw error;
} }
return {
result: {
code: transformedCode,
dependencies: metadata.dependencies,
dependencyOffsets: metadata.dependencyOffsets,
map: metadata.sourceMap,
},
outdatedDependencies: EMPTY_ARRAY,
};
} }
module.exports = { module.exports = TransformCache;
writeSync,
readSync(props: ReadTransformProps): TransformCacheResult {
const result = readSync(props);
const msg = result ? 'Cache hit: ' : 'Cache miss: ';
debugRead(msg + props.filePath);
return result;
},
};

View File

@ -14,14 +14,6 @@ const jsonStableStringify = require('json-stable-stringify');
const transformCache = new Map(); const transformCache = new Map();
const mock = {
lastWrite: null,
reset() {
transformCache.clear();
mock.lastWrite = null;
},
};
const transformCacheKeyOf = props => const transformCacheKeyOf = props =>
props.filePath + '-' + crypto.createHash('md5') props.filePath + '-' + crypto.createHash('md5')
.update(props.sourceCode) .update(props.sourceCode)
@ -29,17 +21,27 @@ const transformCacheKeyOf = props =>
.update(jsonStableStringify(props.transformOptions || {})) .update(jsonStableStringify(props.transformOptions || {}))
.digest('hex'); .digest('hex');
function writeSync(props) { class TransformCacheMock {
transformCache.set(transformCacheKeyOf(props), props.result);
mock.lastWrite = props; constructor() {
this.mock = {
lastWrite: null,
reset: () => {
transformCache.clear();
this.mock.lastWrite = null;
},
};
} }
function readSync(props) { writeSync(props) {
transformCache.set(transformCacheKeyOf(props), props.result);
this.mock.lastWrite = props;
}
readSync(props) {
return {result: transformCache.get(transformCacheKeyOf(props)), outdatedDependencies: []}; return {result: transformCache.get(transformCacheKeyOf(props)), outdatedDependencies: []};
} }
module.exports = { }
writeSync,
readSync, module.exports = TransformCacheMock;
mock,
};

View File

@ -50,12 +50,12 @@ function cartesianProductOf(a1, a2) {
describe('TransformCache', () => { describe('TransformCache', () => {
let TransformCache; let transformCache;
beforeEach(() => { beforeEach(() => {
jest.resetModules(); jest.resetModules();
mockFS.clear(); mockFS.clear();
TransformCache = require('../TransformCache'); transformCache = new (require('../TransformCache'))();
}); });
it('is caching different files and options separately', () => { it('is caching different files and options separately', () => {
@ -81,12 +81,12 @@ describe('TransformCache', () => {
[{foo: 1}, {foo: 2}], [{foo: 1}, {foo: 2}],
); );
allCases.forEach( allCases.forEach(
entry => TransformCache.writeSync(argsFor(entry)), entry => transformCache.writeSync(argsFor(entry)),
); );
allCases.forEach(entry => { allCases.forEach(entry => {
const args = argsFor(entry); const args = argsFor(entry);
const {result} = args; const {result} = args;
const cachedResult = TransformCache.readSync({ const cachedResult = transformCache.readSync({
...args, ...args,
cacheOptions: {resetCache: false}, cacheOptions: {resetCache: false},
}); });
@ -116,10 +116,10 @@ describe('TransformCache', () => {
['abcd', 'efgh'], ['abcd', 'efgh'],
); );
allCases.forEach(entry => { allCases.forEach(entry => {
TransformCache.writeSync(argsFor(entry)); transformCache.writeSync(argsFor(entry));
const args = argsFor(entry); const args = argsFor(entry);
const {result} = args; const {result} = args;
const cachedResult = TransformCache.readSync({ const cachedResult = transformCache.readSync({
...args, ...args,
cacheOptions: {resetCache: false}, cacheOptions: {resetCache: false},
}); });
@ -127,7 +127,7 @@ describe('TransformCache', () => {
}); });
allCases.pop(); allCases.pop();
allCases.forEach(entry => { allCases.forEach(entry => {
const cachedResult = TransformCache.readSync({ const cachedResult = transformCache.readSync({
...argsFor(entry), ...argsFor(entry),
cacheOptions: {resetCache: false}, cacheOptions: {resetCache: false},
}); });

View File

@ -78,6 +78,8 @@ export type ConstructorArgs = {
type DocBlock = {+[key: string]: string}; type DocBlock = {+[key: string]: string};
const TRANSFORM_CACHE = new TransformCache();
class Module { class Module {
path: string; path: string;
@ -315,7 +317,7 @@ class Module {
return; return;
} }
invariant(result != null, 'missing result'); invariant(result != null, 'missing result');
TransformCache.writeSync({...cacheProps, result}); TRANSFORM_CACHE.writeSync({...cacheProps, result});
callback(undefined, result); callback(undefined, result);
}); });
} }
@ -359,7 +361,7 @@ class Module {
transformOptionsKey: string, transformOptionsKey: string,
): CachedReadResult { ): CachedReadResult {
const cacheProps = this._getCacheProps(transformOptions, transformOptionsKey); const cacheProps = this._getCacheProps(transformOptions, transformOptionsKey);
const cachedResult = TransformCache.readSync(cacheProps); const cachedResult = TRANSFORM_CACHE.readSync(cacheProps);
if (cachedResult.result == null) { if (cachedResult.result == null) {
return {result: null, outdatedDependencies: cachedResult.outdatedDependencies}; return {result: null, outdatedDependencies: cachedResult.outdatedDependencies};
} }

View File

@ -32,6 +32,8 @@ const packageJson =
description: "A require('foo') story", description: "A require('foo') story",
}); });
const TRANSFORM_CACHE = new TransformCache();
function mockFS(rootChildren) { function mockFS(rootChildren) {
fs.__setMockFilesystem({root: rootChildren}); fs.__setMockFilesystem({root: rootChildren});
} }
@ -79,7 +81,7 @@ describe('Module', () => {
process.platform = 'linux'; process.platform = 'linux';
cache = createCache(); cache = createCache();
transformCacheKey = 'abcdef'; transformCacheKey = 'abcdef';
TransformCache.mock.reset(); TRANSFORM_CACHE.mock.reset();
}); });
describe('Module ID', () => { describe('Module ID', () => {
@ -182,7 +184,7 @@ describe('Module', () => {
transformResult = {code: ''}; transformResult = {code: ''};
transformCode = jest.genMockFn() transformCode = jest.genMockFn()
.mockImplementation((module, sourceCode, options) => { .mockImplementation((module, sourceCode, options) => {
TransformCache.writeSync({ TRANSFORM_CACHE.writeSync({
filePath: module.path, filePath: module.path,
sourceCode, sourceCode,
transformOptions: options, transformOptions: options,