From 297c4e915670fe63c171f06022fa665598b7524a Mon Sep 17 00:00:00 2001 From: William Chargin Date: Sat, 30 May 2020 15:43:11 -0700 Subject: [PATCH] combo: begin simple parser combinator library (#1816) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: We often want to parse data from JSON files on disk into similar object structures in memory. But `JSON.parse` is untyped both statically and dynamically: it has type `(string) => any`, and it’s happy to accept structures that aren’t in the shape that you expected. Whenever we write something like `const c: MyConfig = JSON.parse(raw)` where `raw` comes from a user-editable file on disk, we’re introducing a trivial soundness hole. Furthermore, we often want to use a different in-memory state from the serialized form: perhaps we use ES6 `Map`s in memory, or perhaps we’ve refined a raw string type to an opaque validated type like `RepoId` or `NodeAddressT`. These can be done by manually walking the output of `JSON.parse`, but it’s not pretty: see `instanceConfig.js` or `github/config.js`. Parser combinators are a solution to this problem that enable building parsers for simple primitives and composing them to form parsers for larger structures. This patch introduces the skeleton of a parser combinator library, supporting JSON primitives and arrays (but not objects) along with tests that show its usage. Support for heterogeneous object (“struct”) types will come in a subsequent patch because the typing implementation is more complicated, though the interface to clients is just as simple. For comparison, this is essentially the `FromJSON` half of the Haskell library [Aeson][aeson]. It’s possible that we’ll want to generalize this to a broader system of profunctor optics, maybe over monad transformers, which would make it easier to both parse and serialize these structures (using “isos” rather than just parsers everywhere). But manually serializing the structures is easier than manually parsing them, because they start out strongly typed. The profunctor generalization is more complicated, and in the meantime this solves a useful problem, so let’s defer the generality until we decide that we need it. [aeson]: https://hackage.haskell.org/package/aeson Test Plan: Unit tests included, with full coverage. wchargin-branch: combo-init --- src/util/combo.js | 104 +++++++++++++++++++++++++++++++++++ src/util/combo.test.js | 122 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 src/util/combo.js create mode 100644 src/util/combo.test.js diff --git a/src/util/combo.js b/src/util/combo.js new file mode 100644 index 0000000..95093a3 --- /dev/null +++ b/src/util/combo.js @@ -0,0 +1,104 @@ +// @flow + +// Simple parser combinator library for structured types rather than +// bytestring parsing. + +export type JsonObject = + | string + | number + | boolean + | null + | JsonObject[] + | {[string]: JsonObject}; + +export type ParseResult<+T> = + | {|+ok: true, +value: T|} + | {|+ok: false, +err: string|}; + +export class Parser<+T> { + +_f: (JsonObject) => ParseResult; + constructor(f: (JsonObject) => ParseResult) { + this._f = f; + } + parse(raw: JsonObject): ParseResult { + return this._f(raw); + } + parseOrThrow(raw: JsonObject): T { + const result = this.parse(raw); + if (result.ok) { + return result.value; + } else { + throw new Error(result.err); + } + } +} + +// Helper to make a successful parse result. For readability. +function success(t: T): ParseResult { + return {ok: true, value: t}; +} + +// Helper to make a failed parse result. For readability. +function failure(err: string): ParseResult { + return {ok: false, err}; +} + +// Helper to nicely render a JSON object's typename, accounting for +// nulls and arrays. +function typename(x: JsonObject): string { + if (x === null) { + return "null"; + } + if (Array.isArray(x)) { + return "array"; + } + return typeof x; +} + +export const string: Parser = new Parser((x) => { + if (typeof x !== "string") { + return failure("expected string, got " + typename(x)); + } + return success(x); +}); + +export const number: Parser = new Parser((x) => { + if (typeof x !== "number") { + return failure("expected number, got " + typename(x)); + } + return success(x); +}); + +export const boolean: Parser = new Parser((x) => { + if (typeof x !== "boolean") { + return failure("expected boolean, got " + typename(x)); + } + return success(x); +}); + +// Parser that only accepts a literal `null`. (Called `null_` rather +// than `null` to avoid conflicting with keyword.) +export const null_: Parser = new Parser((x) => { + if (x !== null) { + return failure("expected null, got " + typename(x)); + } + return success(x); +}); + +export function array(p: Parser): Parser { + return new Parser((x) => { + if (!Array.isArray(x)) { + return failure("expected array, got " + typename(x)); + } + const result = Array(x.length); + for (let i = 0; i < result.length; i++) { + const raw = x[i]; + const parsed = p.parse(raw); + if (!parsed.ok) { + return failure(`index ${i}: ${parsed.err}`); + } + result[i] = parsed.value; + } + return success(result); + }); +} diff --git a/src/util/combo.test.js b/src/util/combo.test.js new file mode 100644 index 0000000..bda7b6f --- /dev/null +++ b/src/util/combo.test.js @@ -0,0 +1,122 @@ +// @flow + +import * as C from "./combo"; + +describe("src/util/combo", () => { + describe("primitives", () => { + describe("string", () => { + it("accepts strings", () => { + expect(C.string.parseOrThrow("hey")).toEqual("hey"); + }); + it("rejects numbers", () => { + const thunk = () => C.string.parseOrThrow(77); + expect(thunk).toThrow("expected string, got number"); + }); + it("rejects nulls", () => { + const thunk = () => C.string.parseOrThrow(null); + expect(thunk).toThrow("expected string, got null"); + }); + }); + + describe("number", () => { + it("accepts numbers", () => { + expect(C.number.parseOrThrow(77)).toEqual(77); + }); + it("rejects strings", () => { + const thunk = () => C.number.parseOrThrow("hey"); + expect(thunk).toThrow("expected number, got string"); + }); + it("rejects arrays", () => { + const thunk = () => C.number.parseOrThrow([2, 3, 4]); + expect(thunk).toThrow("expected number, got array"); + }); + it("rejects strings that look like numbers", () => { + const thunk = () => C.number.parseOrThrow("77"); + expect(thunk).toThrow("expected number, got string"); + }); + }); + + describe("boolean", () => { + it("accepts true", () => { + expect(C.boolean.parseOrThrow(true)).toEqual(true); + }); + it("accepts false", () => { + expect(C.boolean.parseOrThrow(true)).toEqual(true); + }); + it("rejects null", () => { + const thunk = () => C.boolean.parseOrThrow(null); + expect(thunk).toThrow("expected boolean, got null"); + }); + it("rejects objects", () => { + const thunk = () => C.boolean.parseOrThrow({}); + expect(thunk).toThrow("expected boolean, got object"); + }); + }); + + describe("null_", () => { + it("accepts null", () => { + expect(C.null_.parseOrThrow(null)).toEqual(null); + }); + it("rejects undefined", () => { + // This is a defense-in-depth test---undefined isn't actually a + // valid JSON value---so silence Flow's justified complaint. + const undef: C.JsonObject = (undefined: any); + const thunk = () => C.null_.parseOrThrow(undef); + expect(thunk).toThrow("expected null, got undefined"); + }); + it("rejects falsy strings", () => { + const thunk = () => C.null_.parseOrThrow(""); + expect(thunk).toThrow("expected null, got string"); + }); + it("rejects falsy numbers", () => { + const thunk = () => C.null_.parseOrThrow(0); + expect(thunk).toThrow("expected null, got number"); + }); + }); + }); + + describe("array", () => { + it("accepts an empty array", () => { + const p: C.Parser = C.array(C.string); + expect(p.parseOrThrow([])).toEqual([]); + }); + it("accepts a singleton array", () => { + const p: C.Parser = C.array(C.string); + expect(p.parseOrThrow(["one"])).toEqual(["one"]); + }); + it("accepts a long array", () => { + const p: C.Parser = C.array(C.string); + expect(p.parseOrThrow(["a", "b", "c"])).toEqual(["a", "b", "c"]); + }); + it("works for nested array types", () => { + const p: C.Parser = C.array(C.array(C.string)); + expect(p.parseOrThrow([["a", "b"], ["c"]])).toEqual([["a", "b"], ["c"]]); + }); + it("rejects on an object with numeric-string keys", () => { + const p: C.Parser = C.array(C.array(C.string)); + const input = {"0": "hmm", "1": "hum"}; + const thunk = () => p.parseOrThrow(input); + expect(thunk).toThrow("expected array, got object"); + }); + it("rejects arrays with elements of the wrong type", () => { + const p: C.Parser = C.array(C.string); + const input = ["one", "two", 5]; + const thunk = () => p.parseOrThrow(input); + expect(thunk).toThrow("index 2: expected string, got number"); + }); + it("has nice error messages on nested arrays", () => { + const p: C.Parser = C.array(C.array(C.string)); + const input = [["one"], ["two"], [5, "---three, sir"]]; + const thunk = () => p.parseOrThrow(input); + expect(thunk).toThrow("index 2: index 0: expected string, got number"); + }); + it("is type-safe", () => { + // $ExpectFlowError + (C.array(C.string): C.Parser); + // $ExpectFlowError + (C.array(C.string): C.Parser); + // $ExpectFlowError + (C.array(C.string): C.Parser); + }); + }); +});