combo: begin simple parser combinator library (#1816)

Summary:
We often want to parse data from JSON files on disk into similar object
structures in memory. But `JSON.parse` is untyped both statically and
dynamically: it has type `(string) => any`, and it’s happy to accept
structures that aren’t in the shape that you expected. Whenever we write
something like `const c: MyConfig = JSON.parse(raw)` where `raw` comes
from a user-editable file on disk, we’re introducing a trivial soundness
hole. Furthermore, we often want to use a different in-memory state from
the serialized form: perhaps we use ES6 `Map`s in memory, or perhaps
we’ve refined a raw string type to an opaque validated type like
`RepoId` or `NodeAddressT`. These can be done by manually walking the
output of `JSON.parse`, but it’s not pretty: see `instanceConfig.js` or
`github/config.js`.

Parser combinators are a solution to this problem that enable building
parsers for simple primitives and composing them to form parsers for
larger structures. This patch introduces the skeleton of a parser
combinator library, supporting JSON primitives and arrays (but not
objects) along with tests that show its usage. Support for heterogeneous
object (“struct”) types will come in a subsequent patch because the
typing implementation is more complicated, though the interface to
clients is just as simple.

For comparison, this is essentially the `FromJSON` half of the Haskell
library [Aeson][aeson].

It’s possible that we’ll want to generalize this to a broader system of
profunctor optics, maybe over monad transformers, which would make it
easier to both parse and serialize these structures (using “isos” rather
than just parsers everywhere). But manually serializing the structures
is easier than manually parsing them, because they start out strongly
typed. The profunctor generalization is more complicated, and in the
meantime this solves a useful problem, so let’s defer the generality
until we decide that we need it.

[aeson]: https://hackage.haskell.org/package/aeson

Test Plan:
Unit tests included, with full coverage.

wchargin-branch: combo-init
This commit is contained in:
William Chargin 2020-05-30 15:43:11 -07:00 committed by GitHub
parent 40426f353c
commit 297c4e9156
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 226 additions and 0 deletions

104
src/util/combo.js Normal file
View File

@ -0,0 +1,104 @@
// @flow
// Simple parser combinator library for structured types rather than
// bytestring parsing.
export type JsonObject =
| string
| number
| boolean
| null
| JsonObject[]
| {[string]: JsonObject};
export type ParseResult<+T> =
| {|+ok: true, +value: T|}
| {|+ok: false, +err: string|};
export class Parser<+T> {
+_f: (JsonObject) => ParseResult<T>;
constructor(f: (JsonObject) => ParseResult<T>) {
this._f = f;
}
parse(raw: JsonObject): ParseResult<T> {
return this._f(raw);
}
parseOrThrow(raw: JsonObject): T {
const result = this.parse(raw);
if (result.ok) {
return result.value;
} else {
throw new Error(result.err);
}
}
}
// Helper to make a successful parse result. For readability.
function success<T>(t: T): ParseResult<T> {
return {ok: true, value: t};
}
// Helper to make a failed parse result. For readability.
function failure(err: string): ParseResult<empty> {
return {ok: false, err};
}
// Helper to nicely render a JSON object's typename, accounting for
// nulls and arrays.
function typename(x: JsonObject): string {
if (x === null) {
return "null";
}
if (Array.isArray(x)) {
return "array";
}
return typeof x;
}
export const string: Parser<string> = new Parser((x) => {
if (typeof x !== "string") {
return failure("expected string, got " + typename(x));
}
return success(x);
});
export const number: Parser<number> = new Parser((x) => {
if (typeof x !== "number") {
return failure("expected number, got " + typename(x));
}
return success(x);
});
export const boolean: Parser<boolean> = new Parser((x) => {
if (typeof x !== "boolean") {
return failure("expected boolean, got " + typename(x));
}
return success(x);
});
// Parser that only accepts a literal `null`. (Called `null_` rather
// than `null` to avoid conflicting with keyword.)
export const null_: Parser<null> = new Parser((x) => {
if (x !== null) {
return failure("expected null, got " + typename(x));
}
return success(x);
});
export function array<T>(p: Parser<T>): Parser<T[]> {
return new Parser((x) => {
if (!Array.isArray(x)) {
return failure("expected array, got " + typename(x));
}
const result = Array(x.length);
for (let i = 0; i < result.length; i++) {
const raw = x[i];
const parsed = p.parse(raw);
if (!parsed.ok) {
return failure(`index ${i}: ${parsed.err}`);
}
result[i] = parsed.value;
}
return success(result);
});
}

122
src/util/combo.test.js Normal file
View File

@ -0,0 +1,122 @@
// @flow
import * as C from "./combo";
describe("src/util/combo", () => {
describe("primitives", () => {
describe("string", () => {
it("accepts strings", () => {
expect(C.string.parseOrThrow("hey")).toEqual("hey");
});
it("rejects numbers", () => {
const thunk = () => C.string.parseOrThrow(77);
expect(thunk).toThrow("expected string, got number");
});
it("rejects nulls", () => {
const thunk = () => C.string.parseOrThrow(null);
expect(thunk).toThrow("expected string, got null");
});
});
describe("number", () => {
it("accepts numbers", () => {
expect(C.number.parseOrThrow(77)).toEqual(77);
});
it("rejects strings", () => {
const thunk = () => C.number.parseOrThrow("hey");
expect(thunk).toThrow("expected number, got string");
});
it("rejects arrays", () => {
const thunk = () => C.number.parseOrThrow([2, 3, 4]);
expect(thunk).toThrow("expected number, got array");
});
it("rejects strings that look like numbers", () => {
const thunk = () => C.number.parseOrThrow("77");
expect(thunk).toThrow("expected number, got string");
});
});
describe("boolean", () => {
it("accepts true", () => {
expect(C.boolean.parseOrThrow(true)).toEqual(true);
});
it("accepts false", () => {
expect(C.boolean.parseOrThrow(true)).toEqual(true);
});
it("rejects null", () => {
const thunk = () => C.boolean.parseOrThrow(null);
expect(thunk).toThrow("expected boolean, got null");
});
it("rejects objects", () => {
const thunk = () => C.boolean.parseOrThrow({});
expect(thunk).toThrow("expected boolean, got object");
});
});
describe("null_", () => {
it("accepts null", () => {
expect(C.null_.parseOrThrow(null)).toEqual(null);
});
it("rejects undefined", () => {
// This is a defense-in-depth test---undefined isn't actually a
// valid JSON value---so silence Flow's justified complaint.
const undef: C.JsonObject = (undefined: any);
const thunk = () => C.null_.parseOrThrow(undef);
expect(thunk).toThrow("expected null, got undefined");
});
it("rejects falsy strings", () => {
const thunk = () => C.null_.parseOrThrow("");
expect(thunk).toThrow("expected null, got string");
});
it("rejects falsy numbers", () => {
const thunk = () => C.null_.parseOrThrow(0);
expect(thunk).toThrow("expected null, got number");
});
});
});
describe("array", () => {
it("accepts an empty array", () => {
const p: C.Parser<string[]> = C.array(C.string);
expect(p.parseOrThrow([])).toEqual([]);
});
it("accepts a singleton array", () => {
const p: C.Parser<string[]> = C.array(C.string);
expect(p.parseOrThrow(["one"])).toEqual(["one"]);
});
it("accepts a long array", () => {
const p: C.Parser<string[]> = C.array(C.string);
expect(p.parseOrThrow(["a", "b", "c"])).toEqual(["a", "b", "c"]);
});
it("works for nested array types", () => {
const p: C.Parser<string[][]> = C.array(C.array(C.string));
expect(p.parseOrThrow([["a", "b"], ["c"]])).toEqual([["a", "b"], ["c"]]);
});
it("rejects on an object with numeric-string keys", () => {
const p: C.Parser<string[][]> = C.array(C.array(C.string));
const input = {"0": "hmm", "1": "hum"};
const thunk = () => p.parseOrThrow(input);
expect(thunk).toThrow("expected array, got object");
});
it("rejects arrays with elements of the wrong type", () => {
const p: C.Parser<string[]> = C.array(C.string);
const input = ["one", "two", 5];
const thunk = () => p.parseOrThrow(input);
expect(thunk).toThrow("index 2: expected string, got number");
});
it("has nice error messages on nested arrays", () => {
const p: C.Parser<string[][]> = C.array(C.array(C.string));
const input = [["one"], ["two"], [5, "---three, sir"]];
const thunk = () => p.parseOrThrow(input);
expect(thunk).toThrow("index 2: index 0: expected string, got number");
});
it("is type-safe", () => {
// $ExpectFlowError
(C.array(C.string): C.Parser<string>);
// $ExpectFlowError
(C.array(C.string): C.Parser<number[]>);
// $ExpectFlowError
(C.array(C.string): C.Parser<string[][]>);
});
});
});