mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-03 08:13:59 +00:00
Parse Discourse references from hyperlinks (#1405)
The `discourse/references` module now has a `linksToReferences` method which extracts the parsed Discourse references from an array of hyperlinks. The method is tested. Test plan: Unit tests added; `yarn test` passes. This is progress towards [Discourse reference and mention detection][1]. [1]: https://discourse.sourcecred.io/t/discourse-reference-mention-detection/270
This commit is contained in:
parent
f725f7c47a
commit
78c34b5a36
@ -2,9 +2,34 @@
|
||||
|
||||
const htmlparser2 = require("htmlparser2");
|
||||
|
||||
export type Hyperlink = string;
|
||||
import {type TopicId} from "./fetch";
|
||||
|
||||
export function parseLinks(cookedHtml: string): Hyperlink[] {
|
||||
export type DiscoursePostReference = {|
|
||||
+type: "POST",
|
||||
+topicId: TopicId,
|
||||
+postIndex: number,
|
||||
+serverUrl: string,
|
||||
|};
|
||||
|
||||
export type DiscourseTopicReference = {|
|
||||
+type: "TOPIC",
|
||||
+topicId: TopicId,
|
||||
+serverUrl: string,
|
||||
|};
|
||||
|
||||
export type DiscourseUserReference = {|
|
||||
+type: "USER",
|
||||
+username: string,
|
||||
+serverUrl: string,
|
||||
|};
|
||||
|
||||
export type DiscourseReference =
|
||||
| DiscoursePostReference
|
||||
| DiscourseTopicReference
|
||||
| DiscourseUserReference;
|
||||
|
||||
export type UrlString = string;
|
||||
export function parseLinks(cookedHtml: string): UrlString[] {
|
||||
const links = [];
|
||||
const httpRegex = /^https?:\/\//;
|
||||
const parser = new htmlparser2.Parser({
|
||||
@ -23,3 +48,38 @@ export function parseLinks(cookedHtml: string): Hyperlink[] {
|
||||
parser.end();
|
||||
return links;
|
||||
}
|
||||
|
||||
export function linksToReferences(
|
||||
links: $ReadOnlyArray<UrlString>
|
||||
): DiscourseReference[] {
|
||||
const server = "(https://[\\w.-]+)";
|
||||
const topic = `(?:${server})/t/[\\w-]+/(\\d+)`;
|
||||
const post = `(?:${topic})/(\\d+)`;
|
||||
const params = "(?:\\?[\\w-=]+)?";
|
||||
|
||||
const topicRegex = new RegExp(`^(?:${topic})(?:${params})/?$`);
|
||||
const postRegex = new RegExp(`^(?:${post})(?:${params})/?$`);
|
||||
const userRegex = new RegExp(`^(?:${server})/u/([\\w-]+)(?:${params})/?$`);
|
||||
const references: DiscourseReference[] = [];
|
||||
for (const link of links) {
|
||||
let match = null;
|
||||
const decoded = decodeURI(link);
|
||||
if ((match = decoded.match(postRegex))) {
|
||||
references.push({
|
||||
type: "POST",
|
||||
topicId: +match[2],
|
||||
serverUrl: match[1],
|
||||
postIndex: +match[3],
|
||||
});
|
||||
} else if ((match = decoded.match(topicRegex))) {
|
||||
references.push({type: "TOPIC", topicId: +match[2], serverUrl: match[1]});
|
||||
} else if ((match = decoded.match(userRegex))) {
|
||||
references.push({
|
||||
type: "USER",
|
||||
username: match[2],
|
||||
serverUrl: match[1],
|
||||
});
|
||||
}
|
||||
}
|
||||
return references;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
// @flow
|
||||
|
||||
import {parseLinks} from "./references";
|
||||
import {parseLinks, linksToReferences} from "./references";
|
||||
|
||||
describe("plugins/discourse/references", () => {
|
||||
describe("parseLinks", () => {
|
||||
@ -27,4 +27,122 @@ describe("plugins/discourse/references", () => {
|
||||
expect(parseLinks(`<a href="#foo">A Link</a>`)).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("linksToReferences", () => {
|
||||
it("works for topics", () => {
|
||||
const hyperlinks = [
|
||||
"https://sourcecred-test.discourse.group/t/123-a-post-with-numbers-in-slug/20",
|
||||
"https://sourcecred-test.discourse.group/t/123-a-post-with-numbers-in-slug/20/",
|
||||
"https://sourcecred-test.discourse.group/t/123-a-post-with-numbers-in-slug/20?u=d11",
|
||||
];
|
||||
const reference = {
|
||||
type: "TOPIC",
|
||||
topicId: 20,
|
||||
serverUrl: "https://sourcecred-test.discourse.group",
|
||||
};
|
||||
expect(linksToReferences(hyperlinks)).toEqual([
|
||||
reference,
|
||||
reference,
|
||||
reference,
|
||||
]);
|
||||
});
|
||||
it("works for posts", () => {
|
||||
const hyperlinks = [
|
||||
"https://sourcecred-test.discourse.group/t/my-first-test-post/11/2?u=d11",
|
||||
"https://sourcecred-test.discourse.group/t/my-first-test-post/11/2/",
|
||||
"https://sourcecred-test.discourse.group/t/my-first-test-post/11/2",
|
||||
];
|
||||
const reference = {
|
||||
type: "POST",
|
||||
topicId: 11,
|
||||
postIndex: 2,
|
||||
serverUrl: "https://sourcecred-test.discourse.group",
|
||||
};
|
||||
expect(linksToReferences(hyperlinks)).toEqual([
|
||||
reference,
|
||||
reference,
|
||||
reference,
|
||||
]);
|
||||
});
|
||||
it("works for mentions", () => {
|
||||
const hyperlinks = ["https://sourcecred-test.discourse.group/u/d11"];
|
||||
const reference = {
|
||||
type: "USER",
|
||||
username: "d11",
|
||||
serverUrl: "https://sourcecred-test.discourse.group",
|
||||
};
|
||||
expect(linksToReferences(hyperlinks)).toEqual([reference]);
|
||||
});
|
||||
it("doesn't find bad or malformed references", () => {
|
||||
const hyperlinks = [
|
||||
// Not a reference to anything in particular.
|
||||
"https://sourcecred-test.discourse.group",
|
||||
// No https == no go. We can be more permissive if needed.
|
||||
"sourcecred-test.discourse.group/t/foo/120",
|
||||
// There's a space at the front.
|
||||
" https://sourcecred-test.discourse.group/t/foo/120",
|
||||
// unexpected trailing stuff
|
||||
"https://sourcecred-test.discourse.group/t/foo/120$$",
|
||||
];
|
||||
expect(linksToReferences(hyperlinks)).toEqual([]);
|
||||
});
|
||||
it("works on a snapshot corpus", () => {
|
||||
const hyperlinks = [
|
||||
"https://discourse.sourcecred.io/t/experiment-sourcecred-stack-lookup/287/4",
|
||||
"https://discourse.sourcecred.io/t/experiment-sourcecred-stack-lookup/287/4?u=decentralion",
|
||||
"https://talk.observablehq.com/t/having-some-trouble-with-d3-dragging/776",
|
||||
"https://talk.observablehq.com/t/package-integrity-and-yarn-lock-package-lock-json/2300/6",
|
||||
// This topic has non-ASCII characters in the topic name; seems like
|
||||
// (that particular discoures instance) filtered it out to leave a
|
||||
// neutral topic slug.
|
||||
"https://forums.eveonline.com/t/topic/195153",
|
||||
// Shouldn't necessarily get a reference, since @-references generate
|
||||
// links that do not have the /summary suffix.
|
||||
"https://forums.eveonline.com/u/dorian_neil/summary",
|
||||
"https://discourse.sourcecred.io/u/decentralion",
|
||||
];
|
||||
const hyperlinkToReference = {};
|
||||
for (const hyperlink of hyperlinks) {
|
||||
hyperlinkToReference[hyperlink] = linksToReferences([hyperlink])[0];
|
||||
}
|
||||
expect(hyperlinkToReference).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"https://discourse.sourcecred.io/t/experiment-sourcecred-stack-lookup/287/4": Object {
|
||||
"postIndex": 4,
|
||||
"serverUrl": "https://discourse.sourcecred.io",
|
||||
"topicId": 287,
|
||||
"type": "POST",
|
||||
},
|
||||
"https://discourse.sourcecred.io/t/experiment-sourcecred-stack-lookup/287/4?u=decentralion": Object {
|
||||
"postIndex": 4,
|
||||
"serverUrl": "https://discourse.sourcecred.io",
|
||||
"topicId": 287,
|
||||
"type": "POST",
|
||||
},
|
||||
"https://discourse.sourcecred.io/u/decentralion": Object {
|
||||
"serverUrl": "https://discourse.sourcecred.io",
|
||||
"type": "USER",
|
||||
"username": "decentralion",
|
||||
},
|
||||
"https://forums.eveonline.com/t/topic/195153": Object {
|
||||
"serverUrl": "https://forums.eveonline.com",
|
||||
"topicId": 195153,
|
||||
"type": "TOPIC",
|
||||
},
|
||||
"https://forums.eveonline.com/u/dorian_neil/summary": undefined,
|
||||
"https://talk.observablehq.com/t/having-some-trouble-with-d3-dragging/776": Object {
|
||||
"serverUrl": "https://talk.observablehq.com",
|
||||
"topicId": 776,
|
||||
"type": "TOPIC",
|
||||
},
|
||||
"https://talk.observablehq.com/t/package-integrity-and-yarn-lock-package-lock-json/2300/6": Object {
|
||||
"postIndex": 6,
|
||||
"serverUrl": "https://talk.observablehq.com",
|
||||
"topicId": 2300,
|
||||
"type": "POST",
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user