Fix/timesource offline (#4309)

* fix: add missing callback cleanup in timesource

fixes: "panic: sync: negative WaitGroup counter"

part of: status-im/status-desktop#12691

* fix: ensure timesource.GetCurrentTime is non-blocking

closes: status-im/status-desktop#12691
This commit is contained in:
osmaczko 2023-11-13 20:06:32 +01:00 committed by GitHub
parent 2076853ce3
commit a584ab086a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 73 additions and 111 deletions

View File

@ -82,13 +82,10 @@ func generateMediaTLSCert() error {
return nil return nil
} }
notBefore, err := timesource.GetCurrentTime() notBefore := timesource.GetCurrentTime()
if err != nil {
return err
}
notAfter := notBefore.Add(365 * 24 * time.Hour) notAfter := notBefore.Add(365 * 24 * time.Hour)
log.Debug("generate media cert", "system time", time.Now().String(), "cert notBefore", notBefore.String(), "cert notAfter", notAfter.String()) log.Debug("generate media cert", "system time", time.Now().String(), "cert notBefore", notBefore.String(), "cert notAfter", notAfter.String())
finalCert, certPem, err := GenerateTLSCert(*notBefore, notAfter, []net.IP{}, []string{Localhost}) finalCert, certPem, err := GenerateTLSCert(notBefore, notAfter, []net.IP{}, []string{Localhost})
if err != nil { if err != nil {
return err return err
} }

View File

@ -140,7 +140,7 @@ func NewBaseClient(c *ConnectionParams, logger *zap.Logger) (*BaseClient, error)
MinVersion: tls.VersionTLS12, MinVersion: tls.VersionTLS12,
InsecureSkipVerify: false, // MUST BE FALSE InsecureSkipVerify: false, // MUST BE FALSE
RootCAs: rootCAs, RootCAs: rootCAs,
Time: timesource.Time, Time: timesource.GetCurrentTime,
}, },
} }

View File

@ -39,10 +39,7 @@ func preflightHandler(w http.ResponseWriter, r *http.Request) {
} }
func makeCert(address net.IP) (*tls.Certificate, []byte, error) { func makeCert(address net.IP) (*tls.Certificate, []byte, error) {
now, err := timesource.GetCurrentTime() now := timesource.GetCurrentTime()
if err != nil {
return nil, nil, err
}
log.Debug("makeCert", "system time", time.Now().String(), "timesource time", now.String()) log.Debug("makeCert", "system time", time.Now().String(), "timesource time", now.String())
notBefore := now.Add(-pairing.CertificateMaxClockDrift) notBefore := now.Add(-pairing.CertificateMaxClockDrift)
notAfter := now.Add(pairing.CertificateMaxClockDrift) notAfter := now.Add(pairing.CertificateMaxClockDrift)
@ -87,7 +84,7 @@ func makeClient(certPem []byte) (*http.Client, error) {
MinVersion: tls.VersionTLS12, MinVersion: tls.VersionTLS12,
InsecureSkipVerify: false, // MUST BE FALSE InsecureSkipVerify: false, // MUST BE FALSE
RootCAs: rootCAs, RootCAs: rootCAs,
Time: timesource.Time, Time: timesource.GetCurrentTime,
}, },
} }

View File

@ -78,12 +78,9 @@ func MakeServerConfig(config *ServerConfig) error {
return err return err
} }
now, err := timesource.GetCurrentTime() now := timesource.GetCurrentTime()
if err != nil {
return err
}
log.Debug("pairing server generate cert", "system time", time.Now().String(), "timesource time", now.String()) log.Debug("pairing server generate cert", "system time", time.Now().String(), "timesource time", now.String())
tlsCert, _, err := GenerateCertFromKey(tlsKey, *now, ips, []string{}) tlsCert, _, err := GenerateCertFromKey(tlsKey, now, ips, []string{})
if err != nil { if err != nil {
return err return err
} }

View File

@ -35,10 +35,7 @@ func (api *MultiAccountsAPI) UpdateAccount(account multiaccounts.Account) error
return errors.New("UpdateAccount but account not found") return errors.New("UpdateAccount but account not found")
} }
if oldAcc.CustomizationColor != account.CustomizationColor { if oldAcc.CustomizationColor != account.CustomizationColor {
updatedAt, err := timesource.GetCurrentTimeInMillis() updatedAt := timesource.GetCurrentTimeInMillis()
if err != nil {
return err
}
account.CustomizationColorClock = updatedAt account.CustomizationColorClock = updatedAt
} }
return api.db.UpdateAccount(account) return api.db.UpdateAccount(account)

View File

@ -7,8 +7,6 @@ import (
"sync" "sync"
"time" "time"
"go.uber.org/zap"
"github.com/beevik/ntp" "github.com/beevik/ntp"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
@ -145,11 +143,6 @@ func Default() *NTPTimeSource {
return ntpTimeSourceCreator() return ntpTimeSourceCreator()
} }
type timeCallback struct {
wg *sync.WaitGroup
callback func(time.Time)
}
// NTPTimeSource provides source of time that tries to be resistant to time skews. // NTPTimeSource provides source of time that tries to be resistant to time skews.
// It does so by periodically querying time offset from ntp servers. // It does so by periodically querying time offset from ntp servers.
type NTPTimeSource struct { type NTPTimeSource struct {
@ -160,30 +153,12 @@ type NTPTimeSource struct {
timeQuery ntpQuery // for ease of testing timeQuery ntpQuery // for ease of testing
quit chan struct{} quit chan struct{}
wg sync.WaitGroup
started bool started bool
updatedOffset bool
callbacks []timeCallback
mu sync.RWMutex mu sync.RWMutex
latestOffset time.Duration latestOffset time.Duration
} }
// AddCallback adds callback that will be called when offset is updated.
// If offset is already updated once, callback will be called immediately.
func (s *NTPTimeSource) AddCallback(callback func(time.Time)) *sync.WaitGroup {
s.mu.Lock()
defer s.mu.Unlock()
var wg sync.WaitGroup
if s.updatedOffset {
callback(now().Add(s.latestOffset))
} else {
wg.Add(1)
s.callbacks = append(s.callbacks, timeCallback{wg: &wg, callback: callback})
}
return &wg
}
// Now returns time adjusted by latest known offset // Now returns time adjusted by latest known offset
func (s *NTPTimeSource) Now() time.Time { func (s *NTPTimeSource) Now() time.Time {
s.mu.RLock() s.mu.RLock()
@ -195,39 +170,28 @@ func (s *NTPTimeSource) updateOffset() error {
offset, err := computeOffset(s.timeQuery, s.servers, s.allowedFailures) offset, err := computeOffset(s.timeQuery, s.servers, s.allowedFailures)
if err != nil { if err != nil {
log.Error("failed to compute offset", "error", err) log.Error("failed to compute offset", "error", err)
s.mu.Lock()
defer s.mu.Unlock()
for _, c := range s.callbacks {
c.callback(now())
c.wg.Done()
}
return errUpdateOffset return errUpdateOffset
} }
log.Info("Difference with ntp servers", "offset", offset) log.Info("Difference with ntp servers", "offset", offset)
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
s.latestOffset = offset s.latestOffset = offset
s.updatedOffset = true
for _, c := range s.callbacks {
c.callback(now().Add(s.latestOffset))
c.wg.Done()
}
s.callbacks = nil
return nil return nil
} }
// runPeriodically runs periodically the given function based on NTPTimeSource // runPeriodically runs periodically the given function based on NTPTimeSource
// synchronization limits (fastNTPSyncPeriod / slowNTPSyncPeriod) // synchronization limits (fastNTPSyncPeriod / slowNTPSyncPeriod)
func (s *NTPTimeSource) runPeriodically(fn func() error) error { func (s *NTPTimeSource) runPeriodically(fn func() error, starWithSlowSyncPeriod bool) {
if s.started { if s.started {
return nil return
} }
var period time.Duration period := s.fastNTPSyncPeriod
if starWithSlowSyncPeriod {
period = s.slowNTPSyncPeriod
}
s.quit = make(chan struct{}) s.quit = make(chan struct{})
// we try to do it synchronously so that user can have reliable messages right away
s.wg.Add(1)
go func() { go func() {
for { for {
select { select {
@ -239,19 +203,29 @@ func (s *NTPTimeSource) runPeriodically(fn func() error) error {
} }
case <-s.quit: case <-s.quit:
s.wg.Done()
return return
} }
} }
}() }()
s.started = true
return nil
} }
// Start runs a goroutine that updates local offset every updatePeriod. // Start initializes the local offset and starts a goroutine that periodically updates the local offset.
func (s *NTPTimeSource) Start() error { func (s *NTPTimeSource) Start() {
return s.runPeriodically(s.updateOffset) if s.started {
return
}
// Attempt to update the offset synchronously so that user can have reliable messages right away
err := s.updateOffset()
if err != nil {
// Failure to update can occur if the node is offline.
// Instead of returning an error, continue with the process as the update will be retried periodically.
log.Error("failed to update offset", err)
}
s.runPeriodically(s.updateOffset, err == nil)
s.started = true
} }
// Stop goroutine that updates time source. // Stop goroutine that updates time source.
@ -260,39 +234,17 @@ func (s *NTPTimeSource) Stop() error {
return nil return nil
} }
close(s.quit) close(s.quit)
s.wg.Wait()
s.started = false s.started = false
return nil return nil
} }
func GetCurrentTimeInMillis() (uint64, error) { func GetCurrentTime() time.Time {
now, err := GetCurrentTime()
if err != nil {
return 0, err
}
return uint64(now.UnixNano() / int64(time.Millisecond)), nil
}
func GetCurrentTime() (*time.Time, error) {
ts := Default() ts := Default()
if err := ts.Start(); err != nil { ts.Start()
return nil, err
} return ts.Now()
var t *time.Time
ts.AddCallback(func(now time.Time) {
t = &now
}).Wait()
if ts.updatedOffset {
return t, nil
}
return nil, errUpdateOffset
} }
func Time() time.Time { func GetCurrentTimeInMillis() uint64 {
now, err := GetCurrentTime() return uint64(GetCurrentTime().UnixNano() / int64(time.Millisecond))
if err != nil {
log.Error("[timesource] error when getting current time", zap.Error(err))
return time.Now()
}
return *now
} }

View File

@ -210,7 +210,9 @@ func TestRunningPeriodically(t *testing.T) {
lastCall := time.Now() lastCall := time.Now()
// we're simulating a calls to updateOffset, testing ntp calls happens // we're simulating a calls to updateOffset, testing ntp calls happens
// on NTPTimeSource specified periods (fastNTPSyncPeriod & slowNTPSyncPeriod) // on NTPTimeSource specified periods (fastNTPSyncPeriod & slowNTPSyncPeriod)
err := source.runPeriodically(func() error { wg := sync.WaitGroup{}
wg.Add(1)
source.runPeriodically(func() error {
mu.Lock() mu.Lock()
periods = append(periods, time.Since(lastCall)) periods = append(periods, time.Since(lastCall))
mu.Unlock() mu.Unlock()
@ -219,13 +221,12 @@ func TestRunningPeriodically(t *testing.T) {
return errUpdateOffset return errUpdateOffset
} }
if hits == 6 { if hits == 6 {
source.wg.Done() wg.Done()
} }
return nil return nil
}) }, false)
source.wg.Wait() wg.Wait()
require.NoError(t, err)
mu.Lock() mu.Lock()
require.Len(t, periods, 6) require.Len(t, periods, 6)
@ -271,20 +272,41 @@ func TestGetCurrentTimeInMillis(t *testing.T) {
} }
expectedTime := uint64(11000) expectedTime := uint64(11000)
n, e := GetCurrentTimeInMillis() n := GetCurrentTimeInMillis()
require.NoError(t, e)
require.Equal(t, expectedTime, n) require.Equal(t, expectedTime, n)
// test repeat invoke GetCurrentTimeInMillis // test repeat invoke GetCurrentTimeInMillis
n, e = GetCurrentTimeInMillis() n = GetCurrentTimeInMillis()
require.NoError(t, e)
require.Equal(t, expectedTime, n) require.Equal(t, expectedTime, n)
e = Default().Stop() e := Default().Stop()
require.NoError(t, e) require.NoError(t, e)
// test invoke after stop // test invoke after stop
n, e = GetCurrentTimeInMillis() n = GetCurrentTimeInMillis()
require.NoError(t, e)
require.Equal(t, expectedTime, n) require.Equal(t, expectedTime, n)
e = Default().Stop() e = Default().Stop()
require.NoError(t, e) require.NoError(t, e)
} }
func TestGetCurrentTimeOffline(t *testing.T) {
// covers https://github.com/status-im/status-desktop/issues/12691
ntpTimeSourceCreator = func() *NTPTimeSource {
if ntpTimeSource != nil {
return ntpTimeSource
}
ntpTimeSource = &NTPTimeSource{
servers: defaultServers,
allowedFailures: DefaultMaxAllowedFailures,
fastNTPSyncPeriod: 1 * time.Millisecond,
slowNTPSyncPeriod: 1 * time.Second,
timeQuery: func(string, ntp.QueryOptions) (*ntp.Response, error) {
return nil, errors.New("offline")
},
}
return ntpTimeSource
}
// ensure there is no "panic: sync: negative WaitGroup counter"
// when GetCurrentTime() is invoked more than once when offline
_ = GetCurrentTime()
_ = GetCurrentTime()
}