diff --git a/package.json b/package.json index 5b86237..d86e197 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "express": "^4.16.3", "fs-extra": "8.1.0", "history": "^3.0.0", + "htmlparser2": "^4.0.0", "isomorphic-fetch": "^2.2.1", "json-stable-stringify": "^1.0.1", "lodash.clonedeep": "^4.5.0", diff --git a/sharness/test_no_raw_anchor_elements.t b/sharness/test_no_raw_anchor_elements.t index 4292be2..4263a04 100755 --- a/sharness/test_no_raw_anchor_elements.t +++ b/sharness/test_no_raw_anchor_elements.t @@ -23,6 +23,7 @@ test_expect_success "application components must use instead of " ' ":/src/*.js" \ ":(exclude,top)*/__snapshots__/*" \ ":(exclude,top)*/snapshots/*" \ + ":(exclude,top)src/plugins/discourse/references.test.js" \ ":(exclude,top)src/webutil/Link.js" \ ; ' diff --git a/src/plugins/discourse/references.js b/src/plugins/discourse/references.js new file mode 100644 index 0000000..a92278e --- /dev/null +++ b/src/plugins/discourse/references.js @@ -0,0 +1,25 @@ +// @flow + +const htmlparser2 = require("htmlparser2"); + +export type Hyperlink = string; + +export function parseLinks(cookedHtml: string): Hyperlink[] { + const links = []; + const httpRegex = /^https?:\/\//; + const parser = new htmlparser2.Parser({ + onopentag(name, attribs) { + if (name === "a") { + const href = attribs.href; + if (href != null) { + if (href.match(httpRegex)) { + links.push(href); + } + } + } + }, + }); + parser.write(cookedHtml); + parser.end(); + return links; +} diff --git a/src/plugins/discourse/references.test.js b/src/plugins/discourse/references.test.js new file mode 100644 index 0000000..ed08ef2 --- /dev/null +++ b/src/plugins/discourse/references.test.js @@ -0,0 +1,30 @@ +// @flow + +import {parseLinks} from "./references"; + +describe("plugins/discourse/references", () => { + describe("parseLinks", () => { + it("does not error on empty string", () => { + expect(parseLinks("")).toEqual([]); + }); + it("does not error on non-html", () => { + expect(parseLinks("foo bar")).toEqual([]); + }); + it("does not pick up raw urls", () => { + expect(parseLinks("https://www.google.com")).toEqual([]); + }); + it("picks up a (https://) hyperlink in href", () => { + expect(parseLinks(`A Link`)).toEqual( + ["https://www.google.com"] + ); + }); + it("picks up a (http://) hyperlink in href", () => { + expect(parseLinks(`A Link`)).toEqual([ + "http://www.google.com", + ]); + }); + it("doesn't pick up anchor hrefs", () => { + expect(parseLinks(`A Link`)).toEqual([]); + }); + }); +}); diff --git a/yarn.lock b/yarn.lock index 551ae48..90220ee 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2953,6 +2953,14 @@ dom-serializer@0, dom-serializer@~0.1.0, dom-serializer@~0.1.1: domelementtype "^1.3.0" entities "^1.1.1" +dom-serializer@^0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.2.1.tgz#13650c850daffea35d8b626a4cfc4d3a17643fdb" + integrity sha512-sK3ujri04WyjwQXVoK4PU3y8ula1stq10GJZpqHIUgoGZdsGzAGu65BnU3d08aTVSvO7mGPZUc0wTEDL+qGE0Q== + dependencies: + domelementtype "^2.0.1" + entities "^2.0.0" + domain-browser@^1.1.1: version "1.2.0" resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda" @@ -2963,6 +2971,11 @@ domelementtype@1, domelementtype@^1.3.0, domelementtype@^1.3.1: resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f" integrity sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w== +domelementtype@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.0.1.tgz#1f8bdfe91f5a78063274e803b4bdcedf6e94f94d" + integrity sha512-5HOHUDsYZWV8FGWN0Njbr/Rn7f/eWSQi1v7+HsUVwXgn8nWWlL64zKDkS0n8ZmQ3mlWOMuXOnR+7Nx/5tMO5AQ== + domexception@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/domexception/-/domexception-1.0.1.tgz#937442644ca6a31261ef36e3ec677fe805582c90" @@ -2977,6 +2990,13 @@ domhandler@^2.3.0, domhandler@^2.4.2: dependencies: domelementtype "1" +domhandler@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-3.0.0.tgz#51cd13efca31da95bbb0c5bee3a48300e333b3e9" + integrity sha512-eKLdI5v9m67kbXQbJSNn1zjh0SDzvzWVWtX+qEI3eMjZw8daH9k8rlj1FZY9memPwjiskQFbe7vHVVJIAqoEhw== + dependencies: + domelementtype "^2.0.1" + domutils@1.5.1: version "1.5.1" resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf" @@ -2993,6 +3013,15 @@ domutils@^1.5.1: dom-serializer "0" domelementtype "1" +domutils@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.0.0.tgz#15b8278e37bfa8468d157478c58c367718133c08" + integrity sha512-n5SelJ1axbO636c2yUtOGia/IcJtVtlhQbFiVDBZHKV5ReJO1ViX7sFEemtuyoAnBxk5meNSYgA8V4s0271efg== + dependencies: + dom-serializer "^0.2.1" + domelementtype "^2.0.1" + domhandler "^3.0.0" + dotenv-expand@5.1.0: version "5.1.0" resolved "https://registry.yarnpkg.com/dotenv-expand/-/dotenv-expand-5.1.0.tgz#3fbaf020bfd794884072ea26b1e9791d45a629f0" @@ -3092,6 +3121,11 @@ entities@^1.1.1, "entities@~ 1.1.1", entities@~1.1.1: resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56" integrity sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w== +entities@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-2.0.0.tgz#68d6084cab1b079767540d80e56a39b423e4abf4" + integrity sha512-D9f7V0JSRwIxlRI2mjMqufDrRDnx8p+eEOz7aUM9SuvF8gsBzra0/6tbjl1m8eQHrZlYj6PxqE00hZ1SAIKPLw== + enzyme-adapter-react-16@^1.1.1: version "1.14.0" resolved "https://registry.yarnpkg.com/enzyme-adapter-react-16/-/enzyme-adapter-react-16-1.14.0.tgz#204722b769172bcf096cb250d33e6795c1f1858f" @@ -4221,6 +4255,16 @@ htmlparser2@^3.10.0, htmlparser2@^3.9.1: inherits "^2.0.1" readable-stream "^3.1.1" +htmlparser2@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-4.0.0.tgz#6034658db65b7713a572a9ebf79f650832dceec8" + integrity sha512-cChwXn5Vam57fyXajDtPXL1wTYc8JtLbr2TN76FYu05itVVVealxLowe2B3IEznJG4p9HAYn/0tJaRlGuEglFQ== + dependencies: + domelementtype "^2.0.1" + domhandler "^3.0.0" + domutils "^2.0.0" + entities "^2.0.0" + http-deceiver@^1.2.7: version "1.2.7" resolved "https://registry.yarnpkg.com/http-deceiver/-/http-deceiver-1.2.7.tgz#fa7168944ab9a519d337cb0bec7284dc3e723d87"