mirror of
https://github.com/status-im/status-mobile.git
synced 2025-01-26 16:39:24 +00:00
Implementation of text content parsing
This commit is contained in:
parent
c338896018
commit
acd05e2687
@ -11,6 +11,7 @@
|
||||
[status-im.group-chats.core :as group-chats]
|
||||
[status-im.chat.models :as chat-model]
|
||||
[status-im.chat.models.loading :as chat-loading]
|
||||
[status-im.chat.models.message-content :as message-content]
|
||||
[status-im.chat.commands.receiving :as commands-receiving]
|
||||
[status-im.utils.clocks :as utils.clocks]
|
||||
[status-im.utils.money :as money]
|
||||
@ -23,16 +24,12 @@
|
||||
[status-im.utils.fx :as fx]
|
||||
[taoensso.timbre :as log]))
|
||||
|
||||
(defn- emoji-only-content?
|
||||
[content]
|
||||
(and (string? content) (re-matches constants/regx-emoji content)))
|
||||
|
||||
(defn- prepare-message
|
||||
[{:keys [content] :as message} chat-id current-chat?]
|
||||
;; TODO janherich: enable the animations again once we can do them more efficiently
|
||||
(cond-> (assoc message :appearing? true)
|
||||
(not current-chat?) (assoc :appearing? false)
|
||||
(emoji-only-content? content) (assoc :content-type constants/content-type-emoji)))
|
||||
(message-content/emoji-only-content? content) (assoc :content-type constants/content-type-emoji)))
|
||||
|
||||
(fx/defn re-index-message-groups
|
||||
"Relative datemarks of message groups can get obsolete with passing time,
|
||||
|
114
src/status_im/chat/models/message_content.cljs
Normal file
114
src/status_im/chat/models/message_content.cljs
Normal file
@ -0,0 +1,114 @@
|
||||
(ns status-im.chat.models.message-content
|
||||
(:require [clojure.string :as string]
|
||||
[status-im.constants :as constants]))
|
||||
|
||||
(def ^:private actions {:link constants/regx-url
|
||||
:tag constants/regx-tag
|
||||
:mention constants/regx-mention})
|
||||
|
||||
(def ^:private stylings {:bold constants/regx-bold
|
||||
:italic constants/regx-italic})
|
||||
|
||||
(def ^:private styling-characters #"\*|~")
|
||||
|
||||
(def ^:private type->regex (merge actions stylings))
|
||||
|
||||
(defn- right-to-left-text? [text]
|
||||
(and (seq text)
|
||||
(re-matches constants/regx-rtl-characters (first text))))
|
||||
|
||||
(defn- query-regex [regex content]
|
||||
(loop [input content
|
||||
matches []
|
||||
offset 0]
|
||||
(if-let [match (.exec regex input)]
|
||||
(let [match-value (aget match 0)
|
||||
relative-index (.-index match)
|
||||
start-index (+ offset relative-index)
|
||||
end-index (+ start-index (count match-value))]
|
||||
(recur (apply str (drop end-index input))
|
||||
(conj matches [start-index end-index])
|
||||
end-index))
|
||||
(seq matches))))
|
||||
|
||||
(defn enrich-content
|
||||
"Enriches message content with `:metadata` and `:rtl?` information.
|
||||
Metadata map keys can by any of the `:link`, `:tag`, `:mention` actions
|
||||
or `:bold` and `:italic` stylings.
|
||||
Value for each key is sequence of tuples representing ranges in original
|
||||
`:text` content. "
|
||||
[{:keys [text] :as content}]
|
||||
(let [metadata (reduce-kv (fn [metadata type regex]
|
||||
(if-let [matches (query-regex regex text)]
|
||||
(assoc metadata type matches)
|
||||
metadata))
|
||||
{}
|
||||
type->regex)]
|
||||
(cond-> content
|
||||
(seq metadata) (assoc :metadata metadata)
|
||||
(right-to-left-text? text) (assoc :rtl? true))))
|
||||
|
||||
(defn- sorted-ranges [{:keys [metadata text]}]
|
||||
(->> metadata
|
||||
(reduce-kv (fn [acc type ranges]
|
||||
(reduce #(assoc %1 %2 type) acc ranges))
|
||||
{})
|
||||
(sort-by (comp (juxt first second) first))
|
||||
(cons [[0 (count text)] :text])))
|
||||
|
||||
(defn- last-index [result]
|
||||
(or (some-> result peek :end) 0))
|
||||
|
||||
(defn- start [[[start]]] start)
|
||||
|
||||
(defn- end [[[_ end]]] end)
|
||||
|
||||
(defn- kind [[_ kind]] kind)
|
||||
|
||||
(defn- result-record [start end path]
|
||||
{:start start
|
||||
:end end
|
||||
:kind (into #{} (map kind) path)})
|
||||
|
||||
(defn build-render-recipe
|
||||
"Builds render recipe from message text and metadata, can be used by render code
|
||||
by simply iterating over it and paying attention to `:kind` set for each segment of text."
|
||||
[{:keys [text metadata] :as content}]
|
||||
(letfn [(builder [[top :as stack] [input & rest-inputs :as inputs] result]
|
||||
(if (seq input)
|
||||
(cond
|
||||
;; input is child of the top
|
||||
(and (<= (start input) (end top))
|
||||
(<= (end input) (end top)))
|
||||
(recur (conj stack input) rest-inputs
|
||||
(conj result (result-record (last-index result) (start input) stack)))
|
||||
;; input overlaps top, it's neither child, nor sibling, discard input
|
||||
(and (>= (start input) (start top))
|
||||
(<= (start input) (end top)))
|
||||
(recur stack rest-inputs result)
|
||||
;; the only remaining possibility, input is next sibling to top
|
||||
:else
|
||||
(recur (rest stack) inputs
|
||||
(conj result (result-record (last-index result) (end top) stack))))
|
||||
;; inputs consumed, unwind stack
|
||||
(loop [[top & rest-stack :as stack] stack
|
||||
result result]
|
||||
(if top
|
||||
(recur rest-stack
|
||||
(conj result (result-record (last-index result) (end top) stack)))
|
||||
result))))]
|
||||
(when metadata
|
||||
(let [[head & tail] (sorted-ranges content)]
|
||||
(->> (builder (list head) tail [])
|
||||
(keep (fn [{:keys [start end kind]}]
|
||||
(let [text-content (-> (subs text start end) ;; select text chunk & remove styling chars
|
||||
(string/replace styling-characters ""))]
|
||||
(when (seq text-content) ;; filter out empty text chunks
|
||||
[text-content kind])))))))))
|
||||
|
||||
(defn emoji-only-content?
|
||||
"Determines if text is just an emoji"
|
||||
[{:keys [text response-to]}]
|
||||
(and (not response-to)
|
||||
(string? text)
|
||||
(re-matches constants/regx-emoji text)))
|
@ -188,6 +188,12 @@
|
||||
(ethereum/sha3 "Transfer(address,address,uint256)"))
|
||||
|
||||
(def regx-emoji #"^((?:[\u261D\u26F9\u270A-\u270D]|\uD83C[\uDF85\uDFC2-\uDFC4\uDFC7\uDFCA-\uDFCC]|\uD83D[\uDC42\uDC43\uDC46-\uDC50\uDC66-\uDC69\uDC6E\uDC70-\uDC78\uDC7C\uDC81-\uDC83\uDC85-\uDC87\uDCAA\uDD74\uDD75\uDD7A\uDD90\uDD95\uDD96\uDE45-\uDE47\uDE4B-\uDE4F\uDEA3\uDEB4-\uDEB6\uDEC0\uDECC]|\uD83E[\uDD18-\uDD1C\uDD1E\uDD1F\uDD26\uDD30-\uDD39\uDD3D\uDD3E\uDDD1-\uDDDD])(?:\uD83C[\uDFFB-\uDFFF])?|(?:[\u231A\u231B\u23E9-\u23EC\u23F0\u23F3\u25FD\u25FE\u2614\u2615\u2648-\u2653\u267F\u2693\u26A1\u26AA\u26AB\u26BD\u26BE\u26C4\u26C5\u26CE\u26D4\u26EA\u26F2\u26F3\u26F5\u26FA\u26FD\u2705\u270A\u270B\u2728\u274C\u274E\u2753-\u2755\u2757\u2795-\u2797\u27B0\u27BF\u2B1B\u2B1C\u2B50\u2B55]|\uD83C[\uDC04\uDCCF\uDD8E\uDD91-\uDD9A\uDDE6-\uDDFF\uDE01\uDE1A\uDE2F\uDE32-\uDE36\uDE38-\uDE3A\uDE50\uDE51\uDF00-\uDF20\uDF2D-\uDF35\uDF37-\uDF7C\uDF7E-\uDF93\uDFA0-\uDFCA\uDFCF-\uDFD3\uDFE0-\uDFF0\uDFF4\uDFF8-\uDFFF]|\uD83D[\uDC00-\uDC3E\uDC40\uDC42-\uDCFC\uDCFF-\uDD3D\uDD4B-\uDD4E\uDD50-\uDD67\uDD7A\uDD95\uDD96\uDDA4\uDDFB-\uDE4F\uDE80-\uDEC5\uDECC\uDED0-\uDED2\uDEEB\uDEEC\uDEF4-\uDEF8]|\uD83E[\uDD10-\uDD3A\uDD3C-\uDD3E\uDD40-\uDD45\uDD47-\uDD4C\uDD50-\uDD6B\uDD80-\uDD97\uDDC0\uDDD0-\uDDE6])|(?:[#\*0-9\xA9\xAE\u203C\u2049\u2122\u2139\u2194-\u2199\u21A9\u21AA\u231A\u231B\u2328\u23CF\u23E9-\u23F3\u23F8-\u23FA\u24C2\u25AA\u25AB\u25B6\u25C0\u25FB-\u25FE\u2600-\u2604\u260E\u2611\u2614\u2615\u2618\u261D\u2620\u2622\u2623\u2626\u262A\u262E\u262F\u2638-\u263A\u2640\u2642\u2648-\u2653\u2660\u2663\u2665\u2666\u2668\u267B\u267F\u2692-\u2697\u2699\u269B\u269C\u26A0\u26A1\u26AA\u26AB\u26B0\u26B1\u26BD\u26BE\u26C4\u26C5\u26C8\u26CE\u26CF\u26D1\u26D3\u26D4\u26E9\u26EA\u26F0-\u26F5\u26F7-\u26FA\u26FD\u2702\u2705\u2708-\u270D\u270F\u2712\u2714\u2716\u271D\u2721\u2728\u2733\u2734\u2744\u2747\u274C\u274E\u2753-\u2755\u2757\u2763\u2764\u2795-\u2797\u27A1\u27B0\u27BF\u2934\u2935\u2B05-\u2B07\u2B1B\u2B1C\u2B50\u2B55\u3030\u303D\u3297\u3299]|\uD83C[\uDC04\uDCCF\uDD70\uDD71\uDD7E\uDD7F\uDD8E\uDD91-\uDD9A\uDDE6-\uDDFF\uDE01\uDE02\uDE1A\uDE2F\uDE32-\uDE3A\uDE50\uDE51\uDF00-\uDF21\uDF24-\uDF93\uDF96\uDF97\uDF99-\uDF9B\uDF9E-\uDFF0\uDFF3-\uDFF5\uDFF7-\uDFFF]|\uD83D[\uDC00-\uDCFD\uDCFF-\uDD3D\uDD49-\uDD4E\uDD50-\uDD67\uDD6F\uDD70\uDD73-\uDD7A\uDD87\uDD8A-\uDD8D\uDD90\uDD95\uDD96\uDDA4\uDDA5\uDDA8\uDDB1\uDDB2\uDDBC\uDDC2-\uDDC4\uDDD1-\uDDD3\uDDDC-\uDDDE\uDDE1\uDDE3\uDDE8\uDDEF\uDDF3\uDDFA-\uDE4F\uDE80-\uDEC5\uDECB-\uDED2\uDEE0-\uDEE5\uDEE9\uDEEB\uDEEC\uDEF0\uDEF3-\uDEF8]|\uD83E[\uDD10-\uDD3A\uDD3C-\uDD3E\uDD40-\uDD45\uDD47-\uDD4C\uDD50-\uDD6B\uDD80-\uDD97\uDDC0\uDDD0-\uDDE6])\uFE0F|[\t-\r \xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF])+$")
|
||||
(def regx-rtl-characters #"[^\u0591-\u06EF\u06FA-\u07FF\u200F\u202B\u202E\uFB1D-\uFDFD\uFE70-\uFEFC]*?[\u0591-\u06EF\u06FA-\u07FF\u200F\u202B\u202E\uFB1D-\uFDFD\uFE70-\uFEFC]")
|
||||
(def regx-url #"(?i)(?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9\-]+[.][a-z]{1,4}/?)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]){0,}")
|
||||
(def regx-tag #"#[a-z0-9\-]+")
|
||||
(def regx-mention #"@[a-z0-9\-]+")
|
||||
(def regx-bold #"\*[^*]+\*")
|
||||
(def regx-italic #"~[^~]+~")
|
||||
|
||||
(def ^:const dapp-permission-contact-code "contact-code")
|
||||
(def ^:const dapp-permission-web3 "web3")
|
||||
|
29
test/cljs/status_im/test/chat/models/message_content.cljs
Normal file
29
test/cljs/status_im/test/chat/models/message_content.cljs
Normal file
@ -0,0 +1,29 @@
|
||||
(ns status-im.test.chat.models.message-content
|
||||
(:require [cljs.test :refer-macros [deftest is testing]]
|
||||
[status-im.chat.models.message-content :as message-content]))
|
||||
|
||||
(deftest enrich-string-content-test
|
||||
(testing "Text content of the message is enriched correctly"
|
||||
(is (not (:metadata (message-content/enrich-content {:text "Plain message"}))))
|
||||
(is (= {:bold [[5 14]]}
|
||||
(:metadata (message-content/enrich-content {:text "Some *styling* present"}))))
|
||||
(is (= {:bold [[5 14]]
|
||||
:tag [[28 33] [38 43]]}
|
||||
(:metadata (message-content/enrich-content {:text "Some *styling* present with #tag1 and #tag2 as well"}))))))
|
||||
|
||||
(deftest build-render-recipe-test
|
||||
(testing "Render tree is build from text"
|
||||
(is (not (message-content/build-render-recipe (message-content/enrich-content {:text "Plain message"}))))
|
||||
(is (= '(["Test " #{:text}]
|
||||
["#status" #{:tag :text}]
|
||||
[" one three " #{:text}]
|
||||
["#core-chat" #{:tag :bold :text}]
|
||||
[" (" #{:bold :text}]
|
||||
["@developer" #{:mention :bold :text}]
|
||||
[")!" #{:bold :text}]
|
||||
[" By the way, " #{:text}]
|
||||
["nice link(" #{:italic :text}]
|
||||
["https://link.com" #{:link :italic :text}]
|
||||
[")", #{:italic :text}])
|
||||
(message-content/build-render-recipe
|
||||
(message-content/enrich-content {:text "Test #status one three *#core-chat (@developer)!* By the way, ~nice link(https://link.com)~"}))))))
|
@ -23,6 +23,7 @@
|
||||
[status-im.test.chat.models.input]
|
||||
[status-im.test.chat.models.loading]
|
||||
[status-im.test.chat.models.message]
|
||||
[status-im.test.chat.models.message-content]
|
||||
[status-im.test.chat.subs]
|
||||
[status-im.test.chat.views.message]
|
||||
[status-im.test.chat.views.photos]
|
||||
@ -88,6 +89,7 @@
|
||||
'status-im.test.chat.models.loading
|
||||
'status-im.test.chat.models.input
|
||||
'status-im.test.chat.models.message
|
||||
'status-im.test.chat.models.message-content
|
||||
'status-im.test.chat.views.message
|
||||
'status-im.test.chat.views.photos
|
||||
'status-im.test.chat.commands.core
|
||||
|
Loading…
x
Reference in New Issue
Block a user