Handle encoded urls

We didn't check for encoded characters in the URL.
Instead now first we decode the URL components, and then match against
the URLs.
We also make sure that there's no HTML in the link passed.

Signed-off-by: Andrea Maria Piana <andrea.maria.piana@gmail.com>
This commit is contained in:
Andrea Maria Piana 2020-06-19 08:28:13 +02:00
parent c1583249f6
commit 28ebedbd16
No known key found for this signature in database
GPG Key ID: AA6CCA6DE0E06424
4 changed files with 167 additions and 3 deletions

View File

@ -1,4 +1,5 @@
(ns status-im.utils.security)
(ns status-im.utils.security
(:require [status-im.utils.security-html :as h]))
(defprotocol Unmaskable
;; Retrieve the stored value.
@ -32,8 +33,10 @@
"Check the link is safe to be handled, it is not a javavascript link or contains
an rtlo character, which might mean is a spoofed url"
[link]
(not (or (re-matches javascript-link-regex link)
(re-matches rtlo-link-regex link))))
(let [decoded-link (js/decodeURIComponent link)]
(not (or (re-matches javascript-link-regex decoded-link)
(re-matches rtlo-link-regex decoded-link)
(h/is-html? decoded-link)))))
(defn safe-link-text?
"Check the text of the message containing a link is safe to be handled

View File

@ -0,0 +1,132 @@
(ns status-im.utils.security-html
(:require [clojure.string :as string]))
; Taken from https://github.com/sindresorhus/is-html
(def html-tags ["a"
"abbr"
"address"
"area"
"article"
"aside"
"audio"
"b"
"base"
"bdi"
"bdo"
"blockquote"
"body"
"br"
"button"
"canvas"
"caption"
"cite"
"code"
"col"
"colgroup"
"data"
"datalist"
"dd"
"del"
"details"
"dfn"
"dialog"
"div"
"dl"
"dt"
"em"
"embed"
"fieldset"
"figcaption"
"figure"
"footer"
"form"
"h1"
"h2"
"h3"
"h4"
"h5"
"h6"
"head"
"header"
"hgroup"
"hr"
"html"
"i"
"iframe"
"img"
"input"
"ins"
"kbd"
"label"
"legend"
"li"
"link"
"main"
"map"
"mark"
"math"
"menu"
"menuitem"
"meta"
"meter"
"nav"
"noscript"
"object"
"ol"
"optgroup"
"option"
"output"
"p"
"param"
"picture"
"pre"
"progress"
"q"
"rb"
"rp"
"rt"
"rtc"
"ruby"
"s"
"samp"
"script"
"section"
"select"
"slot"
"small"
"source"
"span"
"strong"
"style"
"sub"
"summary"
"sup"
"svg"
"table"
"tbody"
"td"
"template"
"textarea"
"tfoot"
"th"
"thead"
"time"
"title"
"tr"
"track"
"u"
"ul"
"var"
"video"
"wbr"])
(def basic-regex #"(?i)\s?<!doctype html>|(<html\b[^>]*>|<body\b[^>]*>|<x-[^>]+>)+")
(def tags-regex (->> html-tags
(map #(str "<" % "\\b[^>]*>"))
(string/join "|")))
(def full-regex (new js/RegExp tags-regex, "i"))
(defn is-html? [text]
(or (re-find basic-regex text)
(re-find full-regex text)))

View File

@ -0,0 +1,27 @@
(ns status-im.utils.security-html-test
(:require [cljs.test :refer-macros [deftest is testing]]
[status-im.utils.security-html :as s]))
(deftest with-doctype
(is (s/is-html? "<!doctype html>"))
(is (s/is-html? "\n\n<!doctype html><html>")))
(deftest body-html-tags
(testing "detect HTML if it has <html>, <body> or <x-*>"
(is (s/is-html? "<html>"))
(is (s/is-html? "<html></html>"))
(is (s/is-html? "<html lang=\"en\"></html>"))
(is (s/is-html? "<html><body></html>"))
(is (s/is-html? "<html><body class=\"no-js\"></html>"))
(is (s/is-html? "<x-unicorn>"))))
(deftest html-standard-tags
(testing "detect HTML if it contains any of the standard HTML tags"
(is (s/is-html? "<p>foo</p>"))
(is (s/is-html? "<a href=\"#\">foo</a>"))))
(deftest not-matching-xml
(is (not (s/is-html? "<cake>foo</cake>")))
(is (not (s/is-html? "<any>rocks</any>")))
(is (not (s/is-html? "<htmly>not</htmly>")))
(is (not (s/is-html? "<bodyx>not</bodyx>"))))

View File

@ -20,6 +20,8 @@
(is (not (security/safe-link? "JaVasCrIpt://anything"))))
(testing "a javascript link upper cases"
(is (not (security/safe-link? "JAVASCRIPT://anything"))))
(testing "an url-encoded javascript link"
(is (not (security/safe-link? "javascript:/%2F%250dalert(document.domain)"))))
(testing "rtlo links"
(is (not (security/safe-link? rtlo-link)))))