research/whisper_scalability/whisper.py

128 lines
4.0 KiB
Python
Raw Normal View History

class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
2019-10-16 05:13:30 +00:00
# https://web.archive.org/web/20111010015624/http://blogmag.net/blog/read/38/Print_human_readable_file_size
def sizeof_fmt(num):
for x in ['bytes','KB','MB','GB','TB']:
if num < 1024.0:
2019-10-16 05:17:03 +00:00
return "%3.0f%s" % (num, x)
2019-10-16 05:13:30 +00:00
num /= 1024.0
def magnitude_fmt(num):
for x in ['','k','m']:
if num < 1000:
2019-10-16 05:17:03 +00:00
return "%2d%s" % (num, x)
2019-10-16 05:13:30 +00:00
num /= 1000
2019-10-16 05:42:02 +00:00
# Color format based on daily bandwidth usage
# <10mb/d = good, <30mb/d ok, <100mb/d bad, 100mb/d+ fail.
def load_color_prefix(load):
if load < (1024 * 1000 * 10):
color_level = bcolors.OKBLUE
elif load < (1024 * 1000 * 30):
color_level = bcolors.OKGREEN
elif load < (1024 * 1000 * 100):
color_level = bcolors.WARNING
else:
color_level = bcolors.FAIL
return color_level
def load_color_fmt(load, string):
return load_color_prefix(load) + string + bcolors.ENDC
# We assume an envelope is 1kb
2019-10-16 05:13:30 +00:00
envelope_size = 1024
# 100, 10k, 1m - jumping two orders of magnitude
n_users = 10000
# Due to negotiation, data sync, etc
# Rough assumed overhead, constant factor
envelopes_per_message = 10
# Receiving messages per day
# TODO: Split up by channel, etc
received_messages_per_day = 100
def bandwidth_usage(n_users):
print(n_users)
# We assume a node is not relaying messages, but only sending
# Goal:
# - make it user-bound, not network-bound
# - reasonable bw and fetch time
# ~1GB per month, ~ 30 mb per day, ~1 mb per hour
2019-10-16 05:13:30 +00:00
def case1():
# Case 1: only receiving messages meant for you
load = envelope_size * envelopes_per_message * \
received_messages_per_day
print bcolors.HEADER + "\nCase 1. Only receiving messages meant for you" + bcolors.ENDC
print ""
print "Assumptions:"
print "- A1. Envelope size (static): " + str(envelope_size) + "kb"
print "- A2. Envelopes / message (static): " + str(envelopes_per_message)
print "- A3. Received messages / day (static): " + str(received_messages_per_day)
print "- A4. Only receiving messages meant for you"
print ""
2019-10-16 05:42:02 +00:00
print load_color_fmt(load, "For N users, receiving bandwidth is " + sizeof_fmt(load) + "/day")
2019-10-16 05:13:30 +00:00
print ""
print("------------------------------------------------------------")
def case2():
# Case 2: receiving all messages
def load_users(n_users):
return envelope_size * envelopes_per_message * \
received_messages_per_day * n_users
def usage_str(n_users):
2019-10-16 05:42:02 +00:00
load = load_users(n_users)
return load_color_fmt(load, "For " + magnitude_fmt(n_users) + " users, receiving bandwidth is " + sizeof_fmt(load_users(n_users)) + "/day")
2019-10-16 05:13:30 +00:00
print bcolors.HEADER + "\nCase 2. Receiving messages for everyone" + bcolors.ENDC
print ""
print "Assumptions:"
print "- A1. Envelope size (static): " + str(envelope_size) + "kb"
print "- A2. Envelopes / message (static): " + str(envelopes_per_message)
print "- A3. Received messages / day (static): " + str(received_messages_per_day)
print "- A4. Received messages for everyone"
print ""
print usage_str(100)
print usage_str(100 * 100)
print usage_str(100 * 100 * 100)
print ""
print("------------------------------------------------------------")
2019-10-16 05:42:02 +00:00
# Assume half of all messages are in 1:1 and group chat
# XXX: Implicitly assume message/envelope ratio same for 1:1 and public,
# probably not true due to things like key negotiation and data sync
private_message_proportion = 0.5
# Case 3: all private messages go over one discovery topic
2019-10-16 05:13:30 +00:00
case1()
case2()
2019-10-16 05:42:02 +00:00
#case3()
2019-10-16 05:17:03 +00:00
# Ok, let's get serious. What assumptions do we need to encode?
# Also, what did I observe? I observed 15GB/m = 500mb per day.
2019-10-16 05:42:02 +00:00
# Things to encode:
# - Noisy topic
# - Duplicate messages
# - Bloom filter false positives
# - Bugs / invalid messages
# - Offline case dominant