2024-06-26 18:25:44 +00:00
package wakuv2
import (
"context"
"encoding/hex"
"errors"
"slices"
2024-08-09 19:41:11 +00:00
"sync"
2024-06-26 18:25:44 +00:00
"time"
"github.com/libp2p/go-libp2p/core/peer"
"google.golang.org/protobuf/proto"
"go.uber.org/zap"
gethcommon "github.com/ethereum/go-ethereum/common"
"github.com/waku-org/go-waku/waku/v2/protocol"
"github.com/waku-org/go-waku/waku/v2/protocol/store"
"github.com/status-im/status-go/logutils"
"github.com/status-im/status-go/wakuv2/common"
"github.com/waku-org/go-waku/waku/v2/protocol/pb"
)
const maxContentTopicsPerRequest = 10
const maxAttemptsToRetrieveHistory = 3
const delay = 10 * time . Second
type TopicInterest struct {
peerID peer . ID
pubsubTopic string
contentTopics [ ] string
lastChecked time . Time
ctx context . Context
cancel context . CancelFunc
}
func ( p TopicInterest ) Equals ( other TopicInterest ) bool {
if p . peerID != other . peerID {
return false
}
if p . pubsubTopic != other . pubsubTopic {
return false
}
slices . Sort ( p . contentTopics )
slices . Sort ( other . contentTopics )
if len ( p . contentTopics ) != len ( other . contentTopics ) {
return false
}
for i , contentTopic := range p . contentTopics {
if contentTopic != other . contentTopics [ i ] {
return false
}
}
return true
}
func ( w * Waku ) SetTopicsToVerifyForMissingMessages ( peerID peer . ID , pubsubTopic string , contentTopics [ ] string ) {
w . topicInterestMu . Lock ( )
defer w . topicInterestMu . Unlock ( )
ctx , cancel := context . WithCancel ( w . ctx )
newMissingMessageRequest := TopicInterest {
peerID : peerID ,
pubsubTopic : pubsubTopic ,
contentTopics : contentTopics ,
2024-07-01 15:38:29 +00:00
lastChecked : w . timesource . Now ( ) . Add ( - delay ) ,
2024-06-26 18:25:44 +00:00
ctx : ctx ,
cancel : cancel ,
}
currMessageVerificationRequest , ok := w . topicInterest [ pubsubTopic ]
if ok && currMessageVerificationRequest . Equals ( newMissingMessageRequest ) {
return
}
if ok {
// If there is an ongoing request, we cancel it before replacing it
// by the new list. This can be probably optimized further by tracking
// the last time a content topic was synced, but might not be necessary
// since cancelling an ongoing request would mean cancelling just a single
// page of results
currMessageVerificationRequest . cancel ( )
}
w . topicInterest [ pubsubTopic ] = newMissingMessageRequest
}
func ( w * Waku ) checkForMissingMessages ( ) {
defer w . wg . Done ( )
defer w . logger . Debug ( "checkForMissingMessages - done" )
t := time . NewTicker ( time . Minute )
defer t . Stop ( )
var semaphore = make ( chan struct { } , 5 )
for {
select {
case <- t . C :
w . logger . Debug ( "checking for missing messages..." )
w . topicInterestMu . Lock ( )
for _ , request := range w . topicInterest {
select {
case <- w . ctx . Done ( ) :
return
default :
semaphore <- struct { } { }
go func ( r TopicInterest ) {
w . FetchHistory ( r )
<- semaphore
} ( request )
}
}
w . topicInterestMu . Unlock ( )
case <- w . ctx . Done ( ) :
return
}
}
}
func ( w * Waku ) FetchHistory ( missingHistoryRequest TopicInterest ) {
for i := 0 ; i < len ( missingHistoryRequest . contentTopics ) ; i += maxContentTopicsPerRequest {
j := i + maxContentTopicsPerRequest
if j > len ( missingHistoryRequest . contentTopics ) {
j = len ( missingHistoryRequest . contentTopics )
}
now := w . timesource . Now ( )
err := w . fetchMessagesBatch ( missingHistoryRequest , i , j , now )
if err != nil {
if errors . Is ( err , context . Canceled ) {
return
}
w . logger . Error ( "could not fetch history" , zap . Stringer ( "peerID" , missingHistoryRequest . peerID ) , zap . String ( "pubsubTopic" , missingHistoryRequest . pubsubTopic ) , zap . Strings ( "contentTopics" , missingHistoryRequest . contentTopics ) )
continue
}
w . topicInterestMu . Lock ( )
c := w . topicInterest [ missingHistoryRequest . pubsubTopic ]
if c . Equals ( missingHistoryRequest ) {
c . lastChecked = now
w . topicInterest [ missingHistoryRequest . pubsubTopic ] = c
}
w . topicInterestMu . Unlock ( )
}
}
func ( w * Waku ) storeQueryWithRetry ( ctx context . Context , queryFunc func ( ctx context . Context ) ( * store . Result , error ) , logger * zap . Logger , logMsg string ) ( * store . Result , error ) {
retry := true
count := 1
for retry && count <= maxAttemptsToRetrieveHistory {
logger . Debug ( logMsg , zap . Int ( "attempt" , count ) )
tCtx , cancel := context . WithTimeout ( ctx , 20 * time . Second )
result , err := queryFunc ( tCtx )
cancel ( )
if err != nil {
logger . Error ( "could not query storenode" , zap . Error ( err ) , zap . Int ( "attempt" , count ) )
select {
case <- w . ctx . Done ( ) :
return nil , w . ctx . Err ( )
case <- time . After ( 2 * time . Second ) :
}
} else {
return result , nil
}
}
return nil , errors . New ( "storenode not available" )
}
func ( w * Waku ) fetchMessagesBatch ( missingHistoryRequest TopicInterest , batchFrom int , batchTo int , now time . Time ) error {
logger := w . logger . With (
zap . Stringer ( "peerID" , missingHistoryRequest . peerID ) ,
zap . Strings ( "contentTopics" , missingHistoryRequest . contentTopics [ batchFrom : batchTo ] ) ,
zap . String ( "pubsubTopic" , missingHistoryRequest . pubsubTopic ) ,
logutils . WakuMessageTimestamp ( "from" , proto . Int64 ( missingHistoryRequest . lastChecked . UnixNano ( ) ) ) ,
logutils . WakuMessageTimestamp ( "to" , proto . Int64 ( now . UnixNano ( ) ) ) ,
)
result , err := w . storeQueryWithRetry ( missingHistoryRequest . ctx , func ( ctx context . Context ) ( * store . Result , error ) {
return w . node . Store ( ) . Query ( ctx , store . FilterCriteria {
ContentFilter : protocol . NewContentFilter ( missingHistoryRequest . pubsubTopic , missingHistoryRequest . contentTopics [ batchFrom : batchTo ] ... ) ,
TimeStart : proto . Int64 ( missingHistoryRequest . lastChecked . Add ( - delay ) . UnixNano ( ) ) ,
TimeEnd : proto . Int64 ( now . Add ( - delay ) . UnixNano ( ) ) ,
} , store . WithPeer ( missingHistoryRequest . peerID ) , store . WithPaging ( false , 100 ) , store . IncludeData ( false ) )
} , logger , "retrieving history to check for missing messages" )
if err != nil {
if ! errors . Is ( err , context . Canceled ) {
logger . Error ( "storenode not available" , zap . Error ( err ) )
}
return err
}
2024-07-01 07:55:11 +00:00
var missingHashes [ ] pb . MessageHash
2024-06-26 18:25:44 +00:00
for ! result . IsComplete ( ) {
for _ , mkv := range result . Messages ( ) {
hash := pb . ToMessageHash ( mkv . MessageHash )
w . poolMu . Lock ( )
alreadyCached := w . envelopeCache . Has ( gethcommon . Hash ( hash ) )
w . poolMu . Unlock ( )
if alreadyCached {
continue
}
2024-07-01 07:55:11 +00:00
missingHashes = append ( missingHashes , hash )
2024-06-26 18:25:44 +00:00
}
result , err = w . storeQueryWithRetry ( missingHistoryRequest . ctx , func ( ctx context . Context ) ( * store . Result , error ) {
if err = result . Next ( ctx ) ; err != nil {
return nil , err
}
return result , nil
} , logger . With ( zap . String ( "cursor" , hex . EncodeToString ( result . Cursor ( ) ) ) ) , "retrieving next page" )
if err != nil {
if ! errors . Is ( err , context . Canceled ) {
logger . Error ( "storenode not available" , zap . Error ( err ) )
}
return err
}
}
2024-07-01 07:55:11 +00:00
if len ( missingHashes ) == 0 {
2024-06-26 18:25:44 +00:00
// Nothing to do here
return nil
}
2024-08-09 19:41:11 +00:00
wg := sync . WaitGroup { }
// Split into batches
for i := 0 ; i < len ( missingHashes ) ; i += maxHashQueryLength {
j := i + maxHashQueryLength
2024-08-12 17:32:31 +00:00
j = math . Min ( j , len ( missingHashes ) )
2024-06-26 18:25:44 +00:00
}
2024-08-09 19:41:11 +00:00
wg . Add ( 1 )
go func ( messageHashes [ ] pb . MessageHash ) {
defer wg . Wait ( )
result , err = w . storeQueryWithRetry ( missingHistoryRequest . ctx , func ( ctx context . Context ) ( * store . Result , error ) {
return w . node . Store ( ) . QueryByHash ( ctx , messageHashes , store . WithPeer ( missingHistoryRequest . peerID ) , store . WithPaging ( false , maxHashQueryLength ) )
} , logger , "retrieving missing messages" )
2024-06-26 18:25:44 +00:00
if err != nil {
2024-08-09 19:41:11 +00:00
if ! errors . Is ( err , context . Canceled ) {
logger . Error ( "storenode not available" , zap . Error ( err ) )
}
return
2024-06-26 18:25:44 +00:00
}
2024-08-09 19:41:11 +00:00
for ! result . IsComplete ( ) {
for _ , mkv := range result . Messages ( ) {
envelope := protocol . NewEnvelope ( mkv . Message , mkv . Message . GetTimestamp ( ) , mkv . GetPubsubTopic ( ) )
w . logger . Info ( "received waku2 store message" ,
zap . Stringer ( "envelopeHash" , envelope . Hash ( ) ) ,
zap . String ( "pubsubTopic" , mkv . GetPubsubTopic ( ) ) ,
zap . Int64p ( "timestamp" , envelope . Message ( ) . Timestamp ) ,
)
err = w . OnNewEnvelopes ( envelope , common . StoreMessageType , false )
if err != nil {
logger . Error ( "could not process envelopes" , zap . Error ( err ) )
return
}
}
result , err = w . storeQueryWithRetry ( missingHistoryRequest . ctx , func ( ctx context . Context ) ( * store . Result , error ) {
if err = result . Next ( ctx ) ; err != nil {
return nil , err
}
return result , nil
} , logger . With ( zap . String ( "cursor" , hex . EncodeToString ( result . Cursor ( ) ) ) ) , "retrieving next page" )
if err != nil {
if ! errors . Is ( err , context . Canceled ) {
logger . Error ( "storenode not available" , zap . Error ( err ) )
}
return
}
2024-06-26 18:25:44 +00:00
}
2024-08-09 19:41:11 +00:00
} ( missingHashes [ i : j ] )
2024-06-26 18:25:44 +00:00
}
2024-08-09 19:41:11 +00:00
wg . Wait ( )
2024-06-26 18:25:44 +00:00
return nil
}