packager: make TransformCache a stateful class

Summary: This removes the single-use "garbage collection" class and make a single `TransformCache` instead. The reason for doing this is that I'd like to experiment with using a local dir based on the root path instead of the `tmpdir()` folder, because on some platform using the temp dir is subject to permissions problem, sometimes it is empty, and on all platform it is vulnerable to concurrency issues.

Reviewed By: davidaurelio

Differential Revision: D5027591

fbshipit-source-id: e1176e0e88111116256f7b2b173a0b36837a887d
This commit is contained in:
Jean Lauliac 2017-05-10 04:44:10 -07:00 committed by Facebook Github Bot
parent da7f06e115
commit ddf7bebad3
5 changed files with 267 additions and 249 deletions

View File

@ -7,6 +7,7 @@
* of patent rights can be found in the PATENTS file in the same directory.
*
* @flow
* @format
*/
'use strict';
@ -30,62 +31,6 @@ export type GetTransformCacheKey = (options: {}) => string;
const CACHE_NAME = 'react-native-packager-cache';
const CACHE_SUB_DIR = 'cache';
/**
* If packager is running for two different directories, we don't want the
* caches to conflict with each other. `__dirname` carries that because packager
* will be, for example, installed in a different `node_modules/` folder for
* different projects.
*/
const getCacheDirPath = (function() {
let dirPath;
return function() {
if (dirPath != null) {
return dirPath;
}
const hash = crypto.createHash('sha1').update(__dirname);
if (process.getuid != null) {
hash.update(process.getuid().toString());
}
dirPath = path.join(require('os').tmpdir(), CACHE_NAME + '-' + hash.digest('hex'));
require('debug')('RNP:TransformCache:Dir')(
`transform cache directory: ${dirPath}`
);
return dirPath;
};
})();
function hashSourceCode(props: {
filePath: string,
sourceCode: string,
getTransformCacheKey: GetTransformCacheKey,
transformOptions: WorkerOptions,
transformOptionsKey: string,
}): string {
return crypto.createHash('sha1')
.update(props.getTransformCacheKey(props.transformOptions))
.update(props.sourceCode)
.digest('hex');
}
/**
* The path, built as a hash, does not take the source code itself into account
* because it would generate lots of file during development. (The source hash
* is stored in the metadata instead).
*/
function getCacheFilePaths(props: {
filePath: string,
transformOptionsKey: string,
}): CacheFilePaths {
const hasher = crypto.createHash('sha1')
.update(props.filePath)
.update(props.transformOptionsKey);
const hash = hasher.digest('hex');
const prefix = hash.substr(0, 2);
const fileName = `${hash.substr(2)}`;
const base = path.join(getCacheDirPath(), CACHE_SUB_DIR, prefix, fileName);
return {transformedCode: base, metadata: base + '.meta'};
}
export type CachedResult = {
code: string,
dependencies: Array<string>,
@ -98,112 +43,141 @@ export type TransformCacheResult = {|
+outdatedDependencies: $ReadOnlyArray<string>,
|};
/**
* We want to unlink all cache files before writing, so that it is as much
* atomic as possible.
*/
function unlinkIfExistsSync(filePath: string) {
try {
fs.unlinkSync(filePath);
} catch (error) {
if (error.code === 'ENOENT') {
return;
}
throw error;
}
}
/**
* In the workers we are fine doing sync work because a worker is meant to
* process a single source file at a time.
*
* We store the transformed JS because it is likely to be much bigger than the
* rest of the data JSON. Probably the map should be stored separately as well.
*
* We make the write operation as much atomic as possible: indeed, if another
* process is reading the cache at the same time, there would be a risk it
* reads new transformed code, but old metadata. This is avoided by removing
* the files first.
*
* There is still a risk of conflincting writes, that is mitigated by hashing
* the result code, that is verified at the end. In case of writes happening
* close to each others, one of the workers is going to loose its results no
* matter what.
*/
function writeSync(props: {
filePath: string,
sourceCode: string,
getTransformCacheKey: GetTransformCacheKey,
transformOptions: WorkerOptions,
transformOptionsKey: string,
result: CachedResult,
}): void {
const cacheFilePath = getCacheFilePaths(props);
mkdirp.sync(path.dirname(cacheFilePath.transformedCode));
const {result} = props;
unlinkIfExistsSync(cacheFilePath.transformedCode);
unlinkIfExistsSync(cacheFilePath.metadata);
writeFileAtomicSync(cacheFilePath.transformedCode, result.code);
writeFileAtomicSync(cacheFilePath.metadata, JSON.stringify([
crypto.createHash('sha1').update(result.code).digest('hex'),
hashSourceCode(props),
result.dependencies,
result.dependencyOffsets,
result.map,
]));
}
export type CacheOptions = {
reporter: Reporter,
resetCache?: boolean,
};
export type ReadTransformProps = {
filePath: string,
sourceCode: string,
transformOptions: WorkerOptions,
transformOptionsKey: string,
getTransformCacheKey: GetTransformCacheKey,
cacheOptions: CacheOptions,
};
const EMPTY_ARRAY = [];
/* 1 day */
const GARBAGE_COLLECTION_PERIOD = 24 * 60 * 60 * 1000;
/* 4 days */
const CACHE_FILE_MAX_LAST_ACCESS_TIME = GARBAGE_COLLECTION_PERIOD * 4;
/**
* Temporary folder is cleaned up only on boot, ex. on OS X, as far as I'm
* concerned. Since generally people reboot only very rarely, we need to clean
* up old stuff from time to time.
*
* This code should be safe even if two different React Native projects are
* running at the same time.
*/
const GARBAGE_COLLECTOR = new (class GarbageCollector {
_lastCollected: ?number;
class TransformCache {
_cacheWasReset: boolean;
_dirPath: string;
_lastCollected: ?number;
constructor() {
this._cacheWasReset = false;
}
/**
* We want to avoid preventing tool use if the cleanup fails for some reason,
* but still provide some chance for people to report/fix things.
* We store the transformed JS because it is likely to be much bigger than the
* rest of the data JSON. Probably the map should be stored separately as
* well.
*
* We make the write operation as much atomic as possible: indeed, if another
* process is reading the cache at the same time, there would be a risk it
* reads new transformed code, but old metadata. This is avoided by removing
* the files first.
*
* There is still a risk of conflincting writes, that is mitigated by hashing
* the result code, that is verified at the end. In case of writes happening
* close to each others, one of the workers is going to loose its results no
* matter what.
*/
_collectSyncNoThrow() {
writeSync(props: {
filePath: string,
sourceCode: string,
getTransformCacheKey: GetTransformCacheKey,
transformOptions: WorkerOptions,
transformOptionsKey: string,
result: CachedResult,
}): void {
const cacheFilePath = this._getCacheFilePaths(props);
mkdirp.sync(path.dirname(cacheFilePath.transformedCode));
const {result} = props;
unlinkIfExistsSync(cacheFilePath.transformedCode);
unlinkIfExistsSync(cacheFilePath.metadata);
writeFileAtomicSync(cacheFilePath.transformedCode, result.code);
writeFileAtomicSync(
cacheFilePath.metadata,
JSON.stringify([
crypto.createHash('sha1').update(result.code).digest('hex'),
hashSourceCode(props),
result.dependencies,
result.dependencyOffsets,
result.map,
]),
);
}
readSync(props: ReadTransformProps): TransformCacheResult {
const result = this._readSync(props);
const msg = result ? 'Cache hit: ' : 'Cache miss: ';
debugRead(msg + props.filePath);
return result;
}
/**
* We verify the source hash matches to ensure we always favor rebuilding when
* source change (rather than just using fs.mtime(), a bit less robust).
*
* That means when the source changes, we override the old transformed code
* with the new one. This is, I believe, preferable, so as to avoid bloating
* the cache during development cycles, where people changes files all the
* time. If we implement a global cache ability at some point, we'll be able
* to store old artifacts as well.
*
* Meanwhile we store transforms with different options in different files so
* that it is fast to switch between ex. minified, or not.
*/
_readSync(props: ReadTransformProps): TransformCacheResult {
this._collectIfNecessarySync(props.cacheOptions);
const cacheFilePaths = this._getCacheFilePaths(props);
let metadata, transformedCode;
try {
collectCacheIfOldSync();
metadata = readMetadataFileSync(cacheFilePaths.metadata);
if (metadata == null) {
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
const sourceHash = hashSourceCode(props);
if (sourceHash !== metadata.cachedSourceHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
transformedCode = fs.readFileSync(cacheFilePaths.transformedCode, 'utf8');
const codeHash = crypto
.createHash('sha1')
.update(transformedCode)
.digest('hex');
if (metadata.cachedResultHash !== codeHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
} catch (error) {
terminal.log(error.stack);
terminal.log(
'Error: Cleaning up the cache folder failed. Continuing anyway.',
);
terminal.log('The cache folder is: %s', getCacheDirPath());
if (error.code === 'ENOENT') {
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
throw error;
}
this._lastCollected = Date.now();
return {
result: {
code: transformedCode,
dependencies: metadata.dependencies,
dependencyOffsets: metadata.dependencyOffsets,
map: metadata.sourceMap,
},
outdatedDependencies: EMPTY_ARRAY,
};
}
_resetCache(reporter: Reporter) {
rimraf.sync(getCacheDirPath());
reporter.update({type: 'transform_cache_reset'});
this._cacheWasReset = true;
this._lastCollected = Date.now();
}
collectIfNecessarySync(options: CacheOptions) {
/**
* Temporary folder is never cleaned up automatically, we need to clean up old
* stuff ourselves. This code should be safe even if two different React
* Native projects are running at the same time.
*/
_collectIfNecessarySync(options: CacheOptions) {
if (options.resetCache && !this._cacheWasReset) {
this._resetCache(options.reporter);
return;
@ -217,28 +191,108 @@ const GARBAGE_COLLECTOR = new (class GarbageCollector {
}
}
})();
/**
* When restarting packager we want to avoid running the collection over again, so we store
* the last collection time in a file and we check that first.
*/
function collectCacheIfOldSync() {
const cacheDirPath = getCacheDirPath();
mkdirp.sync(cacheDirPath);
const cacheCollectionFilePath = path.join(cacheDirPath, 'last_collected');
const lastCollected = Number.parseInt(tryReadFileSync(cacheCollectionFilePath, 'utf8'), 10);
if (Number.isInteger(lastCollected) && Date.now() - lastCollected > GARBAGE_COLLECTION_PERIOD) {
return;
_resetCache(reporter: Reporter) {
rimraf.sync(this._getCacheDirPath());
reporter.update({type: 'transform_cache_reset'});
this._cacheWasReset = true;
this._lastCollected = Date.now();
}
/**
* We want to avoid preventing tool use if the cleanup fails for some reason,
* but still provide some chance for people to report/fix things.
*/
_collectSyncNoThrow() {
try {
this._collectCacheIfOldSync();
} catch (error) {
terminal.log(error.stack);
terminal.log(
'Error: Cleaning up the cache folder failed. Continuing anyway.',
);
terminal.log('The cache folder is: %s', this._getCacheDirPath());
}
this._lastCollected = Date.now();
}
/**
* When restarting packager we want to avoid running the collection over
* again, so we store the last collection time in a file and we check that
* first.
*/
_collectCacheIfOldSync() {
const cacheDirPath = this._getCacheDirPath();
mkdirp.sync(cacheDirPath);
const cacheCollectionFilePath = path.join(cacheDirPath, 'last_collected');
const lastCollected = Number.parseInt(
tryReadFileSync(cacheCollectionFilePath, 'utf8'),
10,
);
if (
Number.isInteger(lastCollected) &&
Date.now() - lastCollected > GARBAGE_COLLECTION_PERIOD
) {
return;
}
const effectiveCacheDirPath = path.join(cacheDirPath, CACHE_SUB_DIR);
mkdirp.sync(effectiveCacheDirPath);
collectCacheSync(effectiveCacheDirPath);
fs.writeFileSync(cacheCollectionFilePath, Date.now().toString());
}
/**
* The path, built as a hash, does not take the source code itself into
* account because it would generate lots of file during development. (The
* source hash is stored in the metadata instead).
*/
_getCacheFilePaths(props: {
filePath: string,
transformOptionsKey: string,
}): CacheFilePaths {
const hasher = crypto
.createHash('sha1')
.update(props.filePath)
.update(props.transformOptionsKey);
const hash = hasher.digest('hex');
const prefix = hash.substr(0, 2);
const fileName = `${hash.substr(2)}`;
const base = path.join(
this._getCacheDirPath(),
CACHE_SUB_DIR,
prefix,
fileName,
);
return {transformedCode: base, metadata: base + '.meta'};
}
/**
* If packager is running for two different directories, we don't want the
* caches to conflict with each other. `__dirname` carries that because
* packager will be, for example, installed in a different `node_modules/`
* folder for different projects.
*/
_getCacheDirPath() {
if (this._dirPath != null) {
return this._dirPath;
}
const hash = crypto.createHash('sha1').update(__dirname);
if (process.getuid != null) {
hash.update(process.getuid().toString());
}
this._dirPath = path.join(
require('os').tmpdir(),
CACHE_NAME + '-' + hash.digest('hex'),
);
require('debug')('RNP:TransformCache:Dir')(
`transform cache directory: ${this._dirPath}`,
);
return this._dirPath;
}
const effectiveCacheDirPath = path.join(cacheDirPath, CACHE_SUB_DIR);
mkdirp.sync(effectiveCacheDirPath);
collectCacheSync(effectiveCacheDirPath);
fs.writeFileSync(cacheCollectionFilePath, Date.now().toString());
}
/**
* Remove all the cache files from the specified folder that are older than a certain duration.
* Remove all the cache files from the specified folder that are older than a
* certain duration.
*/
function collectCacheSync(dirPath: string) {
const prefixDirs = fs.readdirSync(dirPath);
@ -302,14 +356,10 @@ function readMetadataFileSync(
if (
typeof cachedResultHash !== 'string' ||
typeof cachedSourceHash !== 'string' ||
!(
Array.isArray(dependencies) &&
dependencies.every(dep => typeof dep === 'string')
) ||
!(
Array.isArray(dependencyOffsets) &&
dependencyOffsets.every(offset => typeof offset === 'number')
) ||
!(Array.isArray(dependencies) &&
dependencies.every(dep => typeof dep === 'string')) ||
!(Array.isArray(dependencyOffsets) &&
dependencyOffsets.every(offset => typeof offset === 'number')) ||
!(sourceMap == null || typeof sourceMap === 'object')
) {
return null;
@ -323,71 +373,33 @@ function readMetadataFileSync(
};
}
export type ReadTransformProps = {
function hashSourceCode(props: {
filePath: string,
sourceCode: string,
getTransformCacheKey: GetTransformCacheKey,
transformOptions: WorkerOptions,
transformOptionsKey: string,
getTransformCacheKey: GetTransformCacheKey,
cacheOptions: CacheOptions,
};
const EMPTY_ARRAY = [];
}): string {
return crypto
.createHash('sha1')
.update(props.getTransformCacheKey(props.transformOptions))
.update(props.sourceCode)
.digest('hex');
}
/**
* We verify the source hash matches to ensure we always favor rebuilding when
* source change (rather than just using fs.mtime(), a bit less robust).
*
* That means when the source changes, we override the old transformed code with
* the new one. This is, I believe, preferable, so as to avoid bloating the
* cache during development cycles, where people changes files all the time.
* If we implement a global cache ability at some point, we'll be able to store
* old artifacts as well.
*
* Meanwhile we store transforms with different options in different files so
* that it is fast to switch between ex. minified, or not.
* We want to unlink all cache files before writing, so that it is as much
* atomic as possible.
*/
function readSync(props: ReadTransformProps): TransformCacheResult {
GARBAGE_COLLECTOR.collectIfNecessarySync(props.cacheOptions);
const cacheFilePaths = getCacheFilePaths(props);
let metadata, transformedCode;
function unlinkIfExistsSync(filePath: string) {
try {
metadata = readMetadataFileSync(cacheFilePaths.metadata);
if (metadata == null) {
return {result: null, outdatedDependencies: EMPTY_ARRAY};
}
const sourceHash = hashSourceCode(props);
if (sourceHash !== metadata.cachedSourceHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
transformedCode = fs.readFileSync(cacheFilePaths.transformedCode, 'utf8');
const codeHash = crypto.createHash('sha1').update(transformedCode).digest('hex');
if (metadata.cachedResultHash !== codeHash) {
return {result: null, outdatedDependencies: metadata.dependencies};
}
fs.unlinkSync(filePath);
} catch (error) {
if (error.code === 'ENOENT') {
return {result: null, outdatedDependencies: EMPTY_ARRAY};
return;
}
throw error;
}
return {
result: {
code: transformedCode,
dependencies: metadata.dependencies,
dependencyOffsets: metadata.dependencyOffsets,
map: metadata.sourceMap,
},
outdatedDependencies: EMPTY_ARRAY,
};
}
module.exports = {
writeSync,
readSync(props: ReadTransformProps): TransformCacheResult {
const result = readSync(props);
const msg = result ? 'Cache hit: ' : 'Cache miss: ';
debugRead(msg + props.filePath);
return result;
},
};
module.exports = TransformCache;

View File

@ -14,14 +14,6 @@ const jsonStableStringify = require('json-stable-stringify');
const transformCache = new Map();
const mock = {
lastWrite: null,
reset() {
transformCache.clear();
mock.lastWrite = null;
},
};
const transformCacheKeyOf = props =>
props.filePath + '-' + crypto.createHash('md5')
.update(props.sourceCode)
@ -29,17 +21,27 @@ const transformCacheKeyOf = props =>
.update(jsonStableStringify(props.transformOptions || {}))
.digest('hex');
function writeSync(props) {
transformCache.set(transformCacheKeyOf(props), props.result);
mock.lastWrite = props;
class TransformCacheMock {
constructor() {
this.mock = {
lastWrite: null,
reset: () => {
transformCache.clear();
this.mock.lastWrite = null;
},
};
}
writeSync(props) {
transformCache.set(transformCacheKeyOf(props), props.result);
this.mock.lastWrite = props;
}
readSync(props) {
return {result: transformCache.get(transformCacheKeyOf(props)), outdatedDependencies: []};
}
}
function readSync(props) {
return {result: transformCache.get(transformCacheKeyOf(props)), outdatedDependencies: []};
}
module.exports = {
writeSync,
readSync,
mock,
};
module.exports = TransformCacheMock;

View File

@ -50,12 +50,12 @@ function cartesianProductOf(a1, a2) {
describe('TransformCache', () => {
let TransformCache;
let transformCache;
beforeEach(() => {
jest.resetModules();
mockFS.clear();
TransformCache = require('../TransformCache');
transformCache = new (require('../TransformCache'))();
});
it('is caching different files and options separately', () => {
@ -81,12 +81,12 @@ describe('TransformCache', () => {
[{foo: 1}, {foo: 2}],
);
allCases.forEach(
entry => TransformCache.writeSync(argsFor(entry)),
entry => transformCache.writeSync(argsFor(entry)),
);
allCases.forEach(entry => {
const args = argsFor(entry);
const {result} = args;
const cachedResult = TransformCache.readSync({
const cachedResult = transformCache.readSync({
...args,
cacheOptions: {resetCache: false},
});
@ -116,10 +116,10 @@ describe('TransformCache', () => {
['abcd', 'efgh'],
);
allCases.forEach(entry => {
TransformCache.writeSync(argsFor(entry));
transformCache.writeSync(argsFor(entry));
const args = argsFor(entry);
const {result} = args;
const cachedResult = TransformCache.readSync({
const cachedResult = transformCache.readSync({
...args,
cacheOptions: {resetCache: false},
});
@ -127,7 +127,7 @@ describe('TransformCache', () => {
});
allCases.pop();
allCases.forEach(entry => {
const cachedResult = TransformCache.readSync({
const cachedResult = transformCache.readSync({
...argsFor(entry),
cacheOptions: {resetCache: false},
});

View File

@ -78,6 +78,8 @@ export type ConstructorArgs = {
type DocBlock = {+[key: string]: string};
const TRANSFORM_CACHE = new TransformCache();
class Module {
path: string;
@ -315,7 +317,7 @@ class Module {
return;
}
invariant(result != null, 'missing result');
TransformCache.writeSync({...cacheProps, result});
TRANSFORM_CACHE.writeSync({...cacheProps, result});
callback(undefined, result);
});
}
@ -359,7 +361,7 @@ class Module {
transformOptionsKey: string,
): CachedReadResult {
const cacheProps = this._getCacheProps(transformOptions, transformOptionsKey);
const cachedResult = TransformCache.readSync(cacheProps);
const cachedResult = TRANSFORM_CACHE.readSync(cacheProps);
if (cachedResult.result == null) {
return {result: null, outdatedDependencies: cachedResult.outdatedDependencies};
}

View File

@ -32,6 +32,8 @@ const packageJson =
description: "A require('foo') story",
});
const TRANSFORM_CACHE = new TransformCache();
function mockFS(rootChildren) {
fs.__setMockFilesystem({root: rootChildren});
}
@ -79,7 +81,7 @@ describe('Module', () => {
process.platform = 'linux';
cache = createCache();
transformCacheKey = 'abcdef';
TransformCache.mock.reset();
TRANSFORM_CACHE.mock.reset();
});
describe('Module ID', () => {
@ -182,7 +184,7 @@ describe('Module', () => {
transformResult = {code: ''};
transformCode = jest.genMockFn()
.mockImplementation((module, sourceCode, options) => {
TransformCache.writeSync({
TRANSFORM_CACHE.writeSync({
filePath: module.path,
sourceCode,
transformOptions: options,