go-waku/examples/chat2-reliable/chat_reliability_test.go

package main

import (
	"chat2-reliable/pb"
	"context"
	"fmt"
	"sync"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
	"github.com/urfave/cli/v2"
	"github.com/waku-org/go-waku/waku/v2/node"
	"github.com/waku-org/go-waku/waku/v2/peerstore"
	"github.com/waku-org/go-waku/waku/v2/protocol/relay"
)

type TestEnvironment struct {
	nodes []*node.WakuNode
	chats []*Chat
}

func setupTestEnvironment(ctx context.Context, t *testing.T, nodeCount int) (*TestEnvironment, error) {
	t.Logf("Setting up test environment with %d nodes", nodeCount)
	env := &TestEnvironment{
		nodes: make([]*node.WakuNode, nodeCount),
		chats: make([]*Chat, nodeCount),
	}

	for i := 0; i < nodeCount; i++ {
		node, err := setupTestNode(ctx, t)
		if err != nil {
			return nil, fmt.Errorf("failed to set up node %d: %w", i, err)
		}
		env.nodes[i] = node

		chat, err := setupTestChat(ctx, node, fmt.Sprintf("Node%d", i))
		if err != nil {
			return nil, fmt.Errorf("failed to set up chat for node %d: %w", i, err)
		}
		env.chats[i] = chat
	}

	t.Log("Connecting nodes in ring topology")
	for i := 0; i < nodeCount; i++ {
		nextIndex := (i + 1) % nodeCount
		_, err := env.nodes[i].AddPeer(env.nodes[nextIndex].ListenAddresses()[0], peerstore.Static, env.chats[i].options.Relay.Topics.Value())
		if err != nil {
			return nil, fmt.Errorf("failed to connect node %d to node %d: %w", i, nextIndex, err)
		}
	}

	t.Log("Test environment setup complete")
	return env, nil
}

func setupTestNode(ctx context.Context, t *testing.T) (*node.WakuNode, error) {
	opts := []node.WakuNodeOption{
		node.WithWakuRelay(),
		// node.WithWakuStore(),
	}
	node, err := node.New(opts...)
	if err != nil {
		return nil, err
	}
	if err := node.Start(ctx); err != nil {
		return nil, err
	}

	// if node.Store() == nil {
	// 	t.Logf("Store protocol is not enabled on node %d", index)
	// }

	return node, nil
}

type PeerConnection = node.PeerConnection

func setupTestChat(ctx context.Context, node *node.WakuNode, nickname string) (*Chat, error) {
	topics := cli.StringSlice{}
	topics.Set(relay.DefaultWakuTopic)

	options := Options{
		Nickname:     nickname,
		ContentTopic: "/test/1/chat/proto",
		Relay: RelayOptions{
			Enable: true,
			Topics: topics,
		},
	}

	// Create a channel of the correct type
	connNotifier := make(chan PeerConnection)

	chat := NewChat(ctx, node, connNotifier, options)
	if chat == nil {
		return nil, fmt.Errorf("failed to create chat instance")
	}
	return chat, nil
}

func areNodesConnected(nodes []*node.WakuNode, expectedPeers int) bool {
	for _, node := range nodes {
		if len(node.Host().Network().Peers()) != expectedPeers {
			return false
		}
	}
	return true
}

// TestLamportTimestamps verifies that Lamport timestamps are correctly updated
func TestLamportTimestamps(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	t.Log("Starting TestLamportTimestamps")

	nodeCount := 3
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	require.Eventually(t, func() bool {
		return areNodesConnected(env.nodes, 2)
	}, 30*time.Second, 1*time.Second, "Nodes failed to connect")

	for i, chat := range env.chats {
		t.Logf("Node %d initial Lamport timestamp: %d", i, chat.getLamportTimestamp())
	}

	t.Log("Sending message from Node 0")
	env.chats[0].SendMessage("Message from Node 0")

	t.Log("Waiting for message propagation")
	require.Eventually(t, func() bool {
		for _, chat := range env.chats {
			if chat.getLamportTimestamp() == 0 {
				return false
			}
		}
		return true
	}, 30*time.Second, 1*time.Second, "Message propagation failed")

	assert.Greater(t, env.chats[0].getLamportTimestamp(), int32(0), "Sender's Lamport timestamp should be greater than 0")
	assert.Greater(t, env.chats[1].getLamportTimestamp(), int32(0), "Node 1's Lamport timestamp should be greater than 0")
	assert.Greater(t, env.chats[2].getLamportTimestamp(), int32(0), "Node 2's Lamport timestamp should be greater than 0")

	assert.NotEmpty(t, env.chats[1].messageHistory, "Node 1 should have received the message")
	assert.NotEmpty(t, env.chats[2].messageHistory, "Node 2 should have received the message")

	if len(env.chats[1].messageHistory) > 0 {
		assert.Equal(t, "Message from Node 0", env.chats[1].messageHistory[0].Content, "Node 1 should have received the correct message")
	}
	if len(env.chats[2].messageHistory) > 0 {
		assert.Equal(t, "Message from Node 0", env.chats[2].messageHistory[0].Content, "Node 2 should have received the correct message")
	}

	t.Log("TestLamportTimestamps completed successfully")
}

// TestCausalOrdering ensures messages are processed in the correct causal order
func TestCausalOrdering(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	t.Log("Starting TestCausalOrdering")

	nodeCount := 3
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	require.Eventually(t, func() bool {
		return areNodesConnected(env.nodes, 2)
	}, 30*time.Second, 1*time.Second, "Nodes failed to connect")

	t.Log("Sending messages from different nodes")
	env.chats[0].SendMessage("Message 1 from Node 0")
	time.Sleep(100 * time.Millisecond)
	env.chats[1].SendMessage("Message 2 from Node 1")
	time.Sleep(100 * time.Millisecond)
	env.chats[2].SendMessage("Message 3 from Node 2")
	time.Sleep(100 * time.Millisecond)

	t.Log("Waiting for message propagation")
	require.Eventually(t, func() bool {
		for i, chat := range env.chats {
			t.Logf("Node %d message history length: %d", i, len(chat.messageHistory))
			if len(chat.messageHistory) != 3 {
				return false
			}
		}
		return true
	}, 30*time.Second, 1*time.Second, "Messages did not propagate to all nodes")

	for i, chat := range env.chats {
		assert.Len(t, chat.messageHistory, 3, "Node %d should have 3 messages", i)
		assert.Equal(t, "Message 1 from Node 0", chat.messageHistory[0].Content, "Node %d: First message incorrect", i)
		assert.Equal(t, "Message 2 from Node 1", chat.messageHistory[1].Content, "Node %d: Second message incorrect", i)
		assert.Equal(t, "Message 3 from Node 2", chat.messageHistory[2].Content, "Node %d: Third message incorrect", i)
	}

	t.Log("TestCausalOrdering completed successfully")
}

func TestBloomFilterDuplicateDetection(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	t.Log("Starting TestBloomFilterDuplicateDetection")

	nodeCount := 2
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	require.Eventually(t, func() bool {
		return areNodesConnected(env.nodes, 1)
	}, 30*time.Second, 1*time.Second, "Nodes failed to connect")

	t.Log("Sending a message")
	testMessage := "Test message"
	env.chats[0].SendMessage(testMessage)

	t.Log("Waiting for message propagation")
	var receivedMsg *pb.Message
	require.Eventually(t, func() bool {
		if len(env.chats[1].messageHistory) == 1 {
			receivedMsg = env.chats[1].messageHistory[0]
			return true
		}
		return false
	}, 30*time.Second, 1*time.Second, "Message did not propagate to second node")

	require.NotNil(t, receivedMsg, "Received message should not be nil")

	t.Log("Simulating receiving the same message again")

	// Create a duplicate message
	duplicateMsg := &pb.Message{
		SenderId:         receivedMsg.SenderId,
		MessageId:        receivedMsg.MessageId, // Use the same MessageId to simulate a true duplicate
		LamportTimestamp: receivedMsg.LamportTimestamp,
		CausalHistory:    receivedMsg.CausalHistory,
		ChannelId:        receivedMsg.ChannelId,
		BloomFilter:      receivedMsg.BloomFilter,
		Content:          receivedMsg.Content,
	}

	env.chats[1].processReceivedMessage(duplicateMsg)

	assert.Len(t, env.chats[1].messageHistory, 1, "Node 1 should still have only one message (no duplicates)")

	t.Log("TestBloomFilterDuplicateDetection completed successfully")
}

// TestNetworkPartition ensures that missing messages can be recovered
func TestNetworkPartition(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
	defer cancel()

	t.Log("Starting TestMessageRecovery")

	nodeCount := 3
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	nc := NewNetworkController(ctx, env.nodes, env.chats)

	require.Eventually(t, func() bool {
		return areNodesConnected(env.nodes, 2)
	}, 60*time.Second, 1*time.Second, "Nodes failed to connect")

	t.Log("Stage 1: Sending initial messages")
	env.chats[0].SendMessage("Message 1")
	time.Sleep(100 * time.Millisecond)
	env.chats[1].SendMessage("Message 2")
	time.Sleep(100 * time.Millisecond)

	t.Log("Waiting for message propagation")
	require.Eventually(t, func() bool {
		for _, chat := range env.chats {
			if len(chat.messageHistory) != 2 {
				return false
			}
		}
		return true
	}, 30*time.Second, 1*time.Second, "Messages did not propagate to all nodes")

	// Verify that Node 2 has messages before disconnection
	require.Equal(t, 2, len(env.chats[2].messageHistory), "Node 2 does not have all messages")

	t.Log("Stage 2: Simulating network partition for Node 2")
	nc.DisconnectNode(env.nodes[2])
	time.Sleep(1 * time.Second) // Allow time for disconnection to take effect

	t.Log("Stage 3: Sending message that Node 2 will miss")
	env.chats[0].SendMessage("Missed Message")
	time.Sleep(100 * time.Millisecond)

	t.Log("Stage 4: Reconnecting Node 2")
	nc.ReconnectNode(env.nodes[2])
	time.Sleep(5 * time.Second) // Allow time for reconnection to take effect

	// Verify that Node 2 didn't receive the message
	require.Equal(t, 2, len(env.chats[2].messageHistory), "Node 2 should not have received the missed message")

	t.Log("Stage 5: Sending a new message that depends on the missed message")
	env.chats[1].SendMessage("New Message")

	// Verify that Node 2 received the new message
	require.Eventually(t, func() bool {
		msgCount := len(env.chats[2].messageHistory)
		return msgCount >= 3
	}, 30*time.Second, 5*time.Second, "Node 2 should have received the new message")

	// Stage 6: Wait for message recovery
	t.Log("Stage 6: Waiting for message recovery")
	require.Eventually(t, func() bool {
		msgCount := len(env.chats[2].messageHistory)
		return msgCount == 4
	}, 30*time.Second, 5*time.Second, "Message recovery failed")

	// Print final message history for all nodes
	for i, chat := range env.chats {
		t.Logf("Node %d final message history:", i)
		for j, msg := range chat.messageHistory {
			t.Logf("  Message %d: %s", j+1, msg.Content)
		}
	}

	// Verify the results
	for i, msg := range env.chats[2].messageHistory {
		t.Logf("Message %d: %s", i+1, msg.Content)
	}

	assert.Equal(t, "Message 1", env.chats[2].messageHistory[0].Content, "First message incorrect")
	assert.Equal(t, "Message 2", env.chats[2].messageHistory[1].Content, "Second message incorrect")
	assert.Equal(t, "Missed Message", env.chats[2].messageHistory[2].Content, "Missed message not recovered")
	assert.Equal(t, "New Message", env.chats[2].messageHistory[3].Content, "New message incorrect")
}

func TestConcurrentMessageSending(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
	defer cancel()

	t.Log("Starting TestConcurrentMessageSending")

	nodeCount := 5
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	require.Eventually(t, func() bool {
		return areNodesConnected(env.nodes, 2)
	}, 60*time.Second, 3*time.Second, "Nodes failed to connect")

	messageCount := 10
	var wg sync.WaitGroup

	t.Log("Sending messages concurrently")
	for i := 0; i < len(env.chats); i++ {
		wg.Add(1)
		go func(index int) {
			defer wg.Done()
			for j := 0; j < messageCount; j++ {
				env.chats[index].SendMessage(fmt.Sprintf("Message %d from Node %d", j, index))
				time.Sleep(10 * time.Millisecond)
			}
		}(i)
	}

	wg.Wait()

	t.Log("Waiting for message propagation")
	totalExpectedMessages := len(env.chats) * messageCount
	require.Eventually(t, func() bool {
		for _, chat := range env.chats {
			if len(chat.messageHistory) != totalExpectedMessages {
				return false
			}
		}
		return true
	}, 2*time.Minute, 1*time.Second, "Messages did not propagate to all nodes")

	for i, chat := range env.chats {
		assert.Len(t, chat.messageHistory, totalExpectedMessages, "Node %d should have received all messages", i)
	}

	t.Log("TestConcurrentMessageSending completed successfully")
}

func TestLargeGroupScaling(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
	defer cancel()

	t.Log("Starting TestLargeGroupScaling")

	nodeCount := 20
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	require.Eventually(t, func() bool {
		return areNodesConnected(env.nodes, 2)
	}, 2*time.Minute, 3*time.Second, "Nodes failed to connect")

	// Send a message from the first node
	env.chats[0].SendMessage("Broadcast message to large group")

	// Allow time for propagation
	time.Sleep(time.Duration(nodeCount*100) * time.Millisecond)

	// Verify all nodes received the message
	for i, chat := range env.chats {
		assert.Len(t, chat.messageHistory, 1, "Node %d should have received the broadcast message", i)
		assert.Equal(t, "Broadcast message to large group", chat.messageHistory[0].Content)
	}

	t.Log("TestLargeGroupScaling completed successfully")
}

func TestEagerPushMechanism(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	nodeCount := 2
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	nc := NewNetworkController(ctx, env.nodes, env.chats)

	// Disconnect node 1
	nc.DisconnectNode(env.nodes[1])

	// Send a message from node 0
	env.chats[0].SendMessage("Test eager push")

	// Wait for the message to be added to the outgoing buffer
	time.Sleep(1 * time.Second)

	// Reconnect node 1
	nc.ReconnectNode(env.nodes[1])

	// Wait for eager push to resend the message
	time.Sleep(5 * time.Second)

	// Check if node 1 received the message
	assert.Eventually(t, func() bool {
		return len(env.chats[1].messageHistory) == 1
	}, 10*time.Second, 1*time.Second, "Node 1 should have received the message via eager push")
}

func TestBloomFilterWindow(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	nodeCount := 2
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	// Reduce bloom filter window for testing
	for _, chat := range env.chats {
		chat.bloomFilter.window = 2 * time.Second
	}

	// Send a message
	env.chats[0].SendMessage("Test bloom filter window")
	messageID := env.chats[0].messageHistory[0].MessageId

	// Check if the message is in the bloom filter
	assert.Eventually(t, func() bool {
		return env.chats[1].bloomFilter.Test(messageID)
	}, 30*time.Second, 1*time.Second, "Message should be in the bloom filter")

	// Wait for the bloom filter window to pass
	time.Sleep(3 * time.Second)

	// Clean the bloom filter
	env.chats[1].bloomFilter.Clean()

	time.Sleep(3 * time.Second)

	// Check if the message is no longer in the bloom filter
	assert.False(t, env.chats[1].bloomFilter.Test(messageID), "Message should no longer be in the bloom filter")

	// Send another message to ensure the filter still works for new messages
	env.chats[0].SendMessage("New test message")
	time.Sleep(1 * time.Second)

	newMessageID := env.chats[0].messageHistory[1].MessageId
	// Check if the new message is in the bloom filter
	assert.Eventually(t, func() bool {
		return env.chats[1].bloomFilter.Test(newMessageID)
	}, 30*time.Second, 1*time.Second, "New message should be in the bloom filter")
}

func TestConflictResolution(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	nodeCount := 3
	env, err := setupTestEnvironment(ctx, t, nodeCount)
	require.NoError(t, err, "Failed to set up test environment")

	// Create conflicting messages with the same Lamport timestamp
	conflictingMsg1 := &pb.Message{
		SenderId:         "Node0",
		MessageId:        "msg1",
		LamportTimestamp: 1,
		Content:          "Conflict 1",
	}
	conflictingMsg2 := &pb.Message{
		SenderId:         "Node1",
		MessageId:        "msg2",
		LamportTimestamp: 1,
		Content:          "Conflict 2",
	}

	// Process the conflicting messages in different orders on different nodes
	env.chats[0].processReceivedMessage(conflictingMsg1)
	env.chats[0].processReceivedMessage(conflictingMsg2)

	env.chats[1].processReceivedMessage(conflictingMsg2)
	env.chats[1].processReceivedMessage(conflictingMsg1)

	// Check if the messages are ordered consistently across nodes
	assert.Equal(t, env.chats[0].messageHistory[0].MessageId, env.chats[1].messageHistory[0].MessageId, "Conflicting messages should be ordered consistently")
	assert.Equal(t, env.chats[0].messageHistory[1].MessageId, env.chats[1].messageHistory[1].MessageId, "Conflicting messages should be ordered consistently")
}

func TestNewNodeSyncAndMessagePropagation(t *testing.T) {
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
	defer cancel()

	t.Log("Starting TestNewNodeSyncAndMessagePropagation")

	// Set up initial network with 2 nodes
	initialNodeCount := 2
	env, err := setupTestEnvironment(ctx, t, initialNodeCount)
	require.NoError(t, err, "Failed to set up initial test environment")

	// Ensure initial nodes are connected
	require.Eventually(t, func() bool {
		return areNodesConnected(env.nodes, 1)
	}, 60*time.Second, 1*time.Second, "Initial nodes failed to connect")

	t.Log("Sending initial messages")
	env.chats[0].SendMessage("Initial message 1")
	env.chats[1].SendMessage("Initial message 2")

	// Wait for message propagation
	time.Sleep(5 * time.Second)

	// Verify initial messages are received by both nodes
	for i, chat := range env.chats {
		assert.Len(t, chat.messageHistory, 2, "Node %d should have 2 initial messages", i)
	}

	t.Log("Adding new node to the network")
	newNode, err := setupTestNode(ctx, t)
	require.NoError(t, err, "Failed to set up new node")
	newChat, err := setupTestChat(ctx, newNode, "NewNode")
	require.NoError(t, err, "Failed to set up new chat")

	env.nodes = append(env.nodes, newNode)
	env.chats = append(env.chats, newChat)

	// Connect new node to the network
	_, err = env.nodes[2].AddPeer(env.nodes[0].ListenAddresses()[0], peerstore.Static, env.chats[2].options.Relay.Topics.Value())
	require.NoError(t, err, "Failed to connect new node to the network")

	t.Log("Waiting for new node to sync")
	require.Eventually(t, func() bool {
		msgCount := len(env.chats[2].messageHistory)
		return msgCount == 2
	}, 1*time.Minute, 5*time.Second, "New node failed to sync message history")

	t.Log("Sending message from old node")
	env.chats[0].SendMessage("Message from old node")

	// Wait for message propagation
	time.Sleep(10 * time.Second)

	// Verify the message is received by all nodes
	for i, chat := range env.chats {
		assert.Len(t, chat.messageHistory, 3, "Node %d should have 3 messages", i)
	}

	t.Log("Sending message from new node")
	env.chats[2].SendMessage("Message from new node")

	// Wait for message propagation
	time.Sleep(10 * time.Second)

	// Verify the message from new node is received by all nodes
	for i, chat := range env.chats {
		assert.Len(t, chat.messageHistory, 4, "Node %d should have 4 messages", i)
	}

	for i := 0; i < 3; i++ {
		lastMsg := env.chats[i].messageHistory[len(env.chats[i].messageHistory)-1]
		assert.Equal(t, "Message from new node", lastMsg.Content, "The last message is incorrect for node %d", i)
	}

	t.Log("TestNewNodeSyncAndMessagePropagation completed")
}