Mikhail Rogachev 511d6bfc54
feat: add parsing for new links format (#3665)
* feat(share-links): Add protobuf and encode/decode url data methods

* feat(new-links-format): Adds generators for new links format

* feat: add parsing for new links format

* feat: add messenger-level pubkey serialization and tests

* feat: fix and test CreateCommunityURLWithChatKey

* feat: impl and test parseCommunityURLWithChatKey

* feat: fix and test CreateCommunityURLWithData

* feat:  impl and test parseCommunityURLWithData (not working)

* feat: UrlDataResponse as response share urls api

* feat: impl& tested ShareCommunityChannelURLWithChatKey

* feat: impl & tested ParseCommunityChannelURLWithChatKey

* fix: bring urls to new format

* feat: add regexp for community channel urls

* feat: impl & test contact urls with chatKey, Ens and data

* fix: encodeDataURL/encodeDataURL patch from Samyoul

* fix: fix unmarshalling protobufs

* fix: fix minor issues, temporary comment TestParseUserURLWithENS

* fix: allow url to contain extra `#` in the signature

* fix: check signatures with SigToPub

* chore: lint fixes

* fix: encode the signature

* feat: Check provided channelID is Uuid

* fix(share-community-url): Remove if community encrypted scope

* fix: review fixes

* fix: use proto.Unmarshal instead of json.Marshal

* feat(share-urls): Adds TagsIndices to community data

* feat: support tag indices to community url data

---------

Co-authored-by: Boris Melnik <borismelnik@status.im>
2023-07-04 17:48:52 +04:00

183 lines
4.6 KiB
Go

package brotli
func utf8Position(last uint, c uint, clamp uint) uint {
if c < 128 {
return 0 /* Next one is the 'Byte 1' again. */
} else if c >= 192 { /* Next one is the 'Byte 2' of utf-8 encoding. */
return brotli_min_size_t(1, clamp)
} else {
/* Let's decide over the last byte if this ends the sequence. */
if last < 0xE0 {
return 0 /* Completed two or three byte coding. */ /* Next one is the 'Byte 3' of utf-8 encoding. */
} else {
return brotli_min_size_t(2, clamp)
}
}
}
func decideMultiByteStatsLevel(pos uint, len uint, mask uint, data []byte) uint {
var counts = [3]uint{0} /* should be 2, but 1 compresses better. */
var max_utf8 uint = 1
var last_c uint = 0
var i uint
for i = 0; i < len; i++ {
var c uint = uint(data[(pos+i)&mask])
counts[utf8Position(last_c, c, 2)]++
last_c = c
}
if counts[2] < 500 {
max_utf8 = 1
}
if counts[1]+counts[2] < 25 {
max_utf8 = 0
}
return max_utf8
}
func estimateBitCostsForLiteralsUTF8(pos uint, len uint, mask uint, data []byte, cost []float32) {
var max_utf8 uint = decideMultiByteStatsLevel(pos, uint(len), mask, data)
/* Bootstrap histograms. */
var histogram = [3][256]uint{[256]uint{0}}
var window_half uint = 495
var in_window uint = brotli_min_size_t(window_half, uint(len))
var in_window_utf8 = [3]uint{0}
/* max_utf8 is 0 (normal ASCII single byte modeling),
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
var i uint
{
var last_c uint = 0
var utf8_pos uint = 0
for i = 0; i < in_window; i++ {
var c uint = uint(data[(pos+i)&mask])
histogram[utf8_pos][c]++
in_window_utf8[utf8_pos]++
utf8_pos = utf8Position(last_c, c, max_utf8)
last_c = c
}
}
/* Compute bit costs with sliding window. */
for i = 0; i < len; i++ {
if i >= window_half {
var c uint
var last_c uint
if i < window_half+1 {
c = 0
} else {
c = uint(data[(pos+i-window_half-1)&mask])
}
if i < window_half+2 {
last_c = 0
} else {
last_c = uint(data[(pos+i-window_half-2)&mask])
}
/* Remove a byte in the past. */
var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
histogram[utf8_pos2][data[(pos+i-window_half)&mask]]--
in_window_utf8[utf8_pos2]--
}
if i+window_half < len {
var c uint = uint(data[(pos+i+window_half-1)&mask])
var last_c uint = uint(data[(pos+i+window_half-2)&mask])
/* Add a byte in the future. */
var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
histogram[utf8_pos2][data[(pos+i+window_half)&mask]]++
in_window_utf8[utf8_pos2]++
}
{
var c uint
var last_c uint
if i < 1 {
c = 0
} else {
c = uint(data[(pos+i-1)&mask])
}
if i < 2 {
last_c = 0
} else {
last_c = uint(data[(pos+i-2)&mask])
}
var utf8_pos uint = utf8Position(last_c, c, max_utf8)
var masked_pos uint = (pos + i) & mask
var histo uint = histogram[utf8_pos][data[masked_pos]]
var lit_cost float64
if histo == 0 {
histo = 1
}
lit_cost = fastLog2(in_window_utf8[utf8_pos]) - fastLog2(histo)
lit_cost += 0.02905
if lit_cost < 1.0 {
lit_cost *= 0.5
lit_cost += 0.5
}
/* Make the first bytes more expensive -- seems to help, not sure why.
Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */
if i < 2000 {
lit_cost += 0.7 - (float64(2000-i) / 2000.0 * 0.35)
}
cost[i] = float32(lit_cost)
}
}
}
func estimateBitCostsForLiterals(pos uint, len uint, mask uint, data []byte, cost []float32) {
if isMostlyUTF8(data, pos, mask, uint(len), kMinUTF8Ratio) {
estimateBitCostsForLiteralsUTF8(pos, uint(len), mask, data, cost)
return
} else {
var histogram = [256]uint{0}
var window_half uint = 2000
var in_window uint = brotli_min_size_t(window_half, uint(len))
var i uint
/* Bootstrap histogram. */
for i = 0; i < in_window; i++ {
histogram[data[(pos+i)&mask]]++
}
/* Compute bit costs with sliding window. */
for i = 0; i < len; i++ {
var histo uint
if i >= window_half {
/* Remove a byte in the past. */
histogram[data[(pos+i-window_half)&mask]]--
in_window--
}
if i+window_half < len {
/* Add a byte in the future. */
histogram[data[(pos+i+window_half)&mask]]++
in_window++
}
histo = histogram[data[(pos+i)&mask]]
if histo == 0 {
histo = 1
}
{
var lit_cost float64 = fastLog2(in_window) - fastLog2(histo)
lit_cost += 0.029
if lit_cost < 1.0 {
lit_cost *= 0.5
lit_cost += 0.5
}
cost[i] = float32(lit_cost)
}
}
}
}