diff --git a/package.json b/package.json
index 5b86237..d86e197 100644
--- a/package.json
+++ b/package.json
@@ -19,6 +19,7 @@
"express": "^4.16.3",
"fs-extra": "8.1.0",
"history": "^3.0.0",
+ "htmlparser2": "^4.0.0",
"isomorphic-fetch": "^2.2.1",
"json-stable-stringify": "^1.0.1",
"lodash.clonedeep": "^4.5.0",
diff --git a/sharness/test_no_raw_anchor_elements.t b/sharness/test_no_raw_anchor_elements.t
index 4292be2..4263a04 100755
--- a/sharness/test_no_raw_anchor_elements.t
+++ b/sharness/test_no_raw_anchor_elements.t
@@ -23,6 +23,7 @@ test_expect_success "application components must use instead of " '
":/src/*.js" \
":(exclude,top)*/__snapshots__/*" \
":(exclude,top)*/snapshots/*" \
+ ":(exclude,top)src/plugins/discourse/references.test.js" \
":(exclude,top)src/webutil/Link.js" \
;
'
diff --git a/src/plugins/discourse/references.js b/src/plugins/discourse/references.js
new file mode 100644
index 0000000..a92278e
--- /dev/null
+++ b/src/plugins/discourse/references.js
@@ -0,0 +1,25 @@
+// @flow
+
+const htmlparser2 = require("htmlparser2");
+
+export type Hyperlink = string;
+
+export function parseLinks(cookedHtml: string): Hyperlink[] {
+ const links = [];
+ const httpRegex = /^https?:\/\//;
+ const parser = new htmlparser2.Parser({
+ onopentag(name, attribs) {
+ if (name === "a") {
+ const href = attribs.href;
+ if (href != null) {
+ if (href.match(httpRegex)) {
+ links.push(href);
+ }
+ }
+ }
+ },
+ });
+ parser.write(cookedHtml);
+ parser.end();
+ return links;
+}
diff --git a/src/plugins/discourse/references.test.js b/src/plugins/discourse/references.test.js
new file mode 100644
index 0000000..ed08ef2
--- /dev/null
+++ b/src/plugins/discourse/references.test.js
@@ -0,0 +1,30 @@
+// @flow
+
+import {parseLinks} from "./references";
+
+describe("plugins/discourse/references", () => {
+ describe("parseLinks", () => {
+ it("does not error on empty string", () => {
+ expect(parseLinks("")).toEqual([]);
+ });
+ it("does not error on non-html", () => {
+ expect(parseLinks("foo bar")).toEqual([]);
+ });
+ it("does not pick up raw urls", () => {
+ expect(parseLinks("https://www.google.com")).toEqual([]);
+ });
+ it("picks up a (https://) hyperlink in href", () => {
+ expect(parseLinks(`A Link`)).toEqual(
+ ["https://www.google.com"]
+ );
+ });
+ it("picks up a (http://) hyperlink in href", () => {
+ expect(parseLinks(`A Link`)).toEqual([
+ "http://www.google.com",
+ ]);
+ });
+ it("doesn't pick up anchor hrefs", () => {
+ expect(parseLinks(`A Link`)).toEqual([]);
+ });
+ });
+});
diff --git a/yarn.lock b/yarn.lock
index 551ae48..90220ee 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2953,6 +2953,14 @@ dom-serializer@0, dom-serializer@~0.1.0, dom-serializer@~0.1.1:
domelementtype "^1.3.0"
entities "^1.1.1"
+dom-serializer@^0.2.1:
+ version "0.2.1"
+ resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.2.1.tgz#13650c850daffea35d8b626a4cfc4d3a17643fdb"
+ integrity sha512-sK3ujri04WyjwQXVoK4PU3y8ula1stq10GJZpqHIUgoGZdsGzAGu65BnU3d08aTVSvO7mGPZUc0wTEDL+qGE0Q==
+ dependencies:
+ domelementtype "^2.0.1"
+ entities "^2.0.0"
+
domain-browser@^1.1.1:
version "1.2.0"
resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda"
@@ -2963,6 +2971,11 @@ domelementtype@1, domelementtype@^1.3.0, domelementtype@^1.3.1:
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f"
integrity sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w==
+domelementtype@^2.0.1:
+ version "2.0.1"
+ resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.0.1.tgz#1f8bdfe91f5a78063274e803b4bdcedf6e94f94d"
+ integrity sha512-5HOHUDsYZWV8FGWN0Njbr/Rn7f/eWSQi1v7+HsUVwXgn8nWWlL64zKDkS0n8ZmQ3mlWOMuXOnR+7Nx/5tMO5AQ==
+
domexception@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/domexception/-/domexception-1.0.1.tgz#937442644ca6a31261ef36e3ec677fe805582c90"
@@ -2977,6 +2990,13 @@ domhandler@^2.3.0, domhandler@^2.4.2:
dependencies:
domelementtype "1"
+domhandler@^3.0.0:
+ version "3.0.0"
+ resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-3.0.0.tgz#51cd13efca31da95bbb0c5bee3a48300e333b3e9"
+ integrity sha512-eKLdI5v9m67kbXQbJSNn1zjh0SDzvzWVWtX+qEI3eMjZw8daH9k8rlj1FZY9memPwjiskQFbe7vHVVJIAqoEhw==
+ dependencies:
+ domelementtype "^2.0.1"
+
domutils@1.5.1:
version "1.5.1"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf"
@@ -2993,6 +3013,15 @@ domutils@^1.5.1:
dom-serializer "0"
domelementtype "1"
+domutils@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.0.0.tgz#15b8278e37bfa8468d157478c58c367718133c08"
+ integrity sha512-n5SelJ1axbO636c2yUtOGia/IcJtVtlhQbFiVDBZHKV5ReJO1ViX7sFEemtuyoAnBxk5meNSYgA8V4s0271efg==
+ dependencies:
+ dom-serializer "^0.2.1"
+ domelementtype "^2.0.1"
+ domhandler "^3.0.0"
+
dotenv-expand@5.1.0:
version "5.1.0"
resolved "https://registry.yarnpkg.com/dotenv-expand/-/dotenv-expand-5.1.0.tgz#3fbaf020bfd794884072ea26b1e9791d45a629f0"
@@ -3092,6 +3121,11 @@ entities@^1.1.1, "entities@~ 1.1.1", entities@~1.1.1:
resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56"
integrity sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==
+entities@^2.0.0:
+ version "2.0.0"
+ resolved "https://registry.yarnpkg.com/entities/-/entities-2.0.0.tgz#68d6084cab1b079767540d80e56a39b423e4abf4"
+ integrity sha512-D9f7V0JSRwIxlRI2mjMqufDrRDnx8p+eEOz7aUM9SuvF8gsBzra0/6tbjl1m8eQHrZlYj6PxqE00hZ1SAIKPLw==
+
enzyme-adapter-react-16@^1.1.1:
version "1.14.0"
resolved "https://registry.yarnpkg.com/enzyme-adapter-react-16/-/enzyme-adapter-react-16-1.14.0.tgz#204722b769172bcf096cb250d33e6795c1f1858f"
@@ -4221,6 +4255,16 @@ htmlparser2@^3.10.0, htmlparser2@^3.9.1:
inherits "^2.0.1"
readable-stream "^3.1.1"
+htmlparser2@^4.0.0:
+ version "4.0.0"
+ resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-4.0.0.tgz#6034658db65b7713a572a9ebf79f650832dceec8"
+ integrity sha512-cChwXn5Vam57fyXajDtPXL1wTYc8JtLbr2TN76FYu05itVVVealxLowe2B3IEznJG4p9HAYn/0tJaRlGuEglFQ==
+ dependencies:
+ domelementtype "^2.0.1"
+ domhandler "^3.0.0"
+ domutils "^2.0.0"
+ entities "^2.0.0"
+
http-deceiver@^1.2.7:
version "1.2.7"
resolved "https://registry.yarnpkg.com/http-deceiver/-/http-deceiver-1.2.7.tgz#fa7168944ab9a519d337cb0bec7284dc3e723d87"