Handle encoded urls

We didn't check for encoded characters in the URL. Instead now first we decode the URL components, and then match against the URLs. We also make sure that there's no HTML in the link passed. Signed-off-by: Andrea Maria Piana <andrea.maria.piana@gmail.com>
2020-06-19 08:28:13 +02:00 · 2020-06-19 08:28:13 +02:00 · 28ebedbd16
parent c1583249f6
commit 28ebedbd16
4 changed files with 167 additions and 3 deletions
--- a/src/status_im/utils/security.cljs
+++ b/src/status_im/utils/security.cljs
@ -1,4 +1,5 @@
-(ns status-im.utils.security)
+(ns status-im.utils.security
+  (:require [status-im.utils.security-html :as h]))

 (defprotocol Unmaskable
  ;; Retrieve the stored value.
@ -32,8 +33,10 @@
  "Check the link is safe to be handled, it is not a javavascript link or contains
  an rtlo character, which might mean is a spoofed url"
  [link]
-  (not (or (re-matches javascript-link-regex link)
-           (re-matches rtlo-link-regex link))))
+  (let [decoded-link (js/decodeURIComponent link)]
+    (not (or (re-matches javascript-link-regex decoded-link)
+             (re-matches rtlo-link-regex decoded-link)
+             (h/is-html? decoded-link)))))

 (defn safe-link-text?
  "Check the text of the message containing a link  is safe to be handled
--- a/src/status_im/utils/security_html.cljs
+++ b/src/status_im/utils/security_html.cljs
@ -0,0 +1,132 @@
+(ns status-im.utils.security-html
+  (:require [clojure.string :as string]))
+
+; Taken from https://github.com/sindresorhus/is-html
+
+(def html-tags ["a"
+                "abbr"
+                "address"
+                "area"
+                "article"
+                "aside"
+                "audio"
+                "b"
+                "base"
+                "bdi"
+                "bdo"
+                "blockquote"
+                "body"
+                "br"
+                "button"
+                "canvas"
+                "caption"
+                "cite"
+                "code"
+                "col"
+                "colgroup"
+                "data"
+                "datalist"
+                "dd"
+                "del"
+                "details"
+                "dfn"
+                "dialog"
+                "div"
+                "dl"
+                "dt"
+                "em"
+                "embed"
+                "fieldset"
+                "figcaption"
+                "figure"
+                "footer"
+                "form"
+                "h1"
+                "h2"
+                "h3"
+                "h4"
+                "h5"
+                "h6"
+                "head"
+                "header"
+                "hgroup"
+                "hr"
+                "html"
+                "i"
+                "iframe"
+                "img"
+                "input"
+                "ins"
+                "kbd"
+                "label"
+                "legend"
+                "li"
+                "link"
+                "main"
+                "map"
+                "mark"
+                "math"
+                "menu"
+                "menuitem"
+                "meta"
+                "meter"
+                "nav"
+                "noscript"
+                "object"
+                "ol"
+                "optgroup"
+                "option"
+                "output"
+                "p"
+                "param"
+                "picture"
+                "pre"
+                "progress"
+                "q"
+                "rb"
+                "rp"
+                "rt"
+                "rtc"
+                "ruby"
+                "s"
+                "samp"
+                "script"
+                "section"
+                "select"
+                "slot"
+                "small"
+                "source"
+                "span"
+                "strong"
+                "style"
+                "sub"
+                "summary"
+                "sup"
+                "svg"
+                "table"
+                "tbody"
+                "td"
+                "template"
+                "textarea"
+                "tfoot"
+                "th"
+                "thead"
+                "time"
+                "title"
+                "tr"
+                "track"
+                "u"
+                "ul"
+                "var"
+                "video"
+                "wbr"])
+
+(def basic-regex #"(?i)\s?<!doctype html>|(<html\b[^>]*>|<body\b[^>]*>|<x-[^>]+>)+")
+(def tags-regex (->> html-tags
+                     (map #(str "<" % "\\b[^>]*>"))
+                     (string/join "|")))
+(def full-regex (new js/RegExp tags-regex, "i"))
+
+(defn is-html? [text]
+  (or (re-find basic-regex text)
+      (re-find full-regex text)))
--- a/src/status_im/utils/security_html_test.cljs
+++ b/src/status_im/utils/security_html_test.cljs
@ -0,0 +1,27 @@
+(ns status-im.utils.security-html-test
+  (:require [cljs.test :refer-macros [deftest is testing]]
+            [status-im.utils.security-html :as s]))
+
+(deftest with-doctype
+  (is (s/is-html? "<!doctype html>"))
+  (is (s/is-html? "\n\n<!doctype html><html>")))
+
+(deftest body-html-tags
+  (testing "detect HTML if it has <html>, <body> or <x-*>"
+    (is (s/is-html? "<html>"))
+    (is (s/is-html? "<html></html>"))
+    (is (s/is-html? "<html lang=\"en\"></html>"))
+    (is (s/is-html? "<html><body></html>"))
+    (is (s/is-html? "<html><body class=\"no-js\"></html>"))
+    (is (s/is-html? "<x-unicorn>"))))
+
+(deftest html-standard-tags
+  (testing "detect HTML if it contains any of the standard HTML tags"
+    (is (s/is-html? "<p>foo</p>"))
+    (is (s/is-html? "<a href=\"#\">foo</a>"))))
+
+(deftest not-matching-xml
+  (is (not (s/is-html? "<cake>foo</cake>")))
+  (is (not (s/is-html? "<any>rocks</any>")))
+  (is (not (s/is-html? "<htmly>not</htmly>")))
+  (is (not (s/is-html? "<bodyx>not</bodyx>"))))
--- a/src/status_im/utils/security_test.cljs
+++ b/src/status_im/utils/security_test.cljs
@ -20,6 +20,8 @@
    (is (not (security/safe-link? "JaVasCrIpt://anything"))))
  (testing "a javascript link upper cases"
    (is (not (security/safe-link? "JAVASCRIPT://anything"))))
+  (testing "an url-encoded javascript link"
+    (is (not (security/safe-link? "javascript:/%2F%250dalert(document.domain)"))))
  (testing "rtlo links"
    (is (not (security/safe-link? rtlo-link)))))