# beacon_chain
# Copyright (c) 2021-2024 Status Research & Development GmbH
# Licensed and distributed under either of
#   * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
#   * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [].}

import ./common

const
  ServiceName = "fallback_service"

  FAIL_TIME_OFFSETS = [
    TimeOffset.init(-(MAXIMUM_GOSSIP_CLOCK_DISPARITY.nanoseconds)),
    TimeOffset.init(MAXIMUM_GOSSIP_CLOCK_DISPARITY.nanoseconds * 4)
  ]
  WARN_TIME_OFFSETS = [
    TimeOffset.init(-(MAXIMUM_GOSSIP_CLOCK_DISPARITY.nanoseconds div 2)),
    TimeOffset.init(MAXIMUM_GOSSIP_CLOCK_DISPARITY.nanoseconds * 2),
  ]
  NOTE_TIME_OFFSETS = [
    TimeOffset.init(-(MAXIMUM_GOSSIP_CLOCK_DISPARITY.nanoseconds div 4)),
    TimeOffset.init(MAXIMUM_GOSSIP_CLOCK_DISPARITY.nanoseconds),
  ]

declareGauge validator_client_time_offset,
  "Wall clock offset(s) between validator client and beacon node(s)",
  labels = ["node"]

logScope: service = ServiceName

proc nodesCount*(vc: ValidatorClientRef,
                 statuses: set[RestBeaconNodeStatus],
                 roles: set[BeaconNodeRole] = {}): int =
  if len(roles) == 0:
    vc.beaconNodes.countIt(it.status in statuses)
  else:
    vc.beaconNodes.countIt((it.roles * roles != {}) and (it.status in statuses))

proc filterNodes*(vc: ValidatorClientRef, statuses: set[RestBeaconNodeStatus],
                  roles: set[BeaconNodeRole] = {}): seq[BeaconNodeServerRef] =
  if len(roles) == 0:
    vc.beaconNodes.filterIt(it.status in statuses)
  else:
    vc.beaconNodes.filterIt((it.roles * roles != {}) and
                            (it.status in statuses))

proc nonameNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
  vc.beaconNodes.filterIt(it.status == RestBeaconNodeStatus.Noname)

proc offlineNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
  vc.beaconNodes.filterIt(it.status == RestBeaconNodeStatus.Offline)

proc otherNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
  vc.beaconNodes.filterIt(it.status != RestBeaconNodeStatus.Synced)

proc otherNodesCount*(vc: ValidatorClientRef): int =
  vc.beaconNodes.countIt(it.status != RestBeaconNodeStatus.Synced)

proc preGenesisNodes*(vc: ValidatorClientRef): seq[BeaconNodeServerRef] =
  vc.beaconNodes.filterIt(it.status notin {RestBeaconNodeStatus.Synced,
                                           RestBeaconNodeStatus.OptSynced})

proc waitNodes*(vc: ValidatorClientRef, timeoutFut: Future[void],
                statuses: set[RestBeaconNodeStatus],
                roles: set[BeaconNodeRole], waitChanges: bool) {.
     async: (raises: [CancelledError]).} =
  doAssert(not(isNil(vc.fallbackService)))
  var iterations = 0
  while true:
    if not(waitChanges) or (iterations != 0):
      if vc.nodesCount(statuses, roles) != 0:
        break

    if vc.fallbackService.changesEvent.isSet():
      vc.fallbackService.changesEvent.clear()

    if isNil(timeoutFut):
      await vc.fallbackService.changesEvent.wait()
    else:
      let breakLoop =
        block:
          let waitFut = vc.fallbackService.changesEvent.wait()
          try:
            discard await race(waitFut, timeoutFut)
          except CancelledError as exc:
            if not(waitFut.finished()):
              await waitFut.cancelAndWait()
            raise exc

          if not(waitFut.finished()):
            await waitFut.cancelAndWait()
            true
          else:
            false
      if breakLoop:
        break

    inc(iterations)

proc checkName*(
       node: BeaconNodeServerRef): RestBeaconNodeStatus {.raises: [].} =
  ## Could return only {Invalid, Noname, Offline}
  logScope: endpoint = node
  let client =
    block:
      let res = initClient(node.uri)
      if res.isErr():
        return
          case res.error
          of CriticalHttpAddressError:
            RestBeaconNodeStatus.Invalid
          of RecoverableHttpAddressError:
            RestBeaconNodeStatus.Noname
      res.get()

  node.client = client
  RestBeaconNodeStatus.Offline

proc checkCompatible(
       vc: ValidatorClientRef,
       node: BeaconNodeServerRef
     ): Future[RestBeaconNodeStatus] {.async: (raises: [CancelledError]).} =
  ## Could return only {Offline, Incompatible, Compatible}
  logScope: endpoint = node
  let info =
    try:
      debug "Requesting beacon node network configuration"
      let res = await node.client.getSpecVC()
      res.data.data
    except CancelledError as exc:
      debug "Configuration request was interrupted"
      raise exc
    except RestError as exc:
      if node.status != RestBeaconNodeStatus.Offline:
        debug "Unable to obtain beacon node's configuration",
              error_name = exc.name, error_message = exc.msg
      return RestBeaconNodeStatus.Offline

  let genesis =
    try:
      debug "Requesting beacon node genesis information"
      let res = await node.client.getGenesis()
      res.data.data
    except CancelledError as exc:
      debug "Genesis request was interrupted"
      raise exc
    except RestError as exc:
      if node.status != RestBeaconNodeStatus.Offline:
        debug "Unable to obtain beacon node's genesis",
              error_name = exc.name, error_message = exc.msg
      return RestBeaconNodeStatus.Offline

  let
    genesisFlag = (genesis != vc.beaconGenesis)
    configFlag = not(checkConfig(info))

  node.config = info
  node.genesis = Opt.some(genesis)

  if configFlag or genesisFlag:
    if node.status != RestBeaconNodeStatus.Incompatible:
      warn "Beacon node has incompatible configuration",
            genesis_flag = genesisFlag, config_flag = configFlag
    RestBeaconNodeStatus.Incompatible
  else:
    let res = vc.updateRuntimeConfig(node, node.config)
    if res.isErr():
      warn "Beacon nodes report different configuration values",
           reason = res.error
      RestBeaconNodeStatus.Incompatible
    else:
      RestBeaconNodeStatus.Compatible

proc checkSync(
       vc: ValidatorClientRef,
       node: BeaconNodeServerRef
     ): Future[RestBeaconNodeStatus] {.async: (raises: [CancelledError]).} =
  ## Could return only {Offline, NotSynced, Synced, OptSynced}
  logScope: endpoint = node
  let syncInfo =
    try:
      debug "Requesting beacon node sync status"
      let res = await node.client.getSyncingStatus()
      res.data.data
    except CancelledError as exc:
      debug "Sync status request was interrupted"
      raise exc
    except RestError as exc:
      if node.status != RestBeaconNodeStatus.Offline:
        debug "Unable to obtain beacon node's sync status",
              error_name = exc.name, error_message = exc.msg
      return RestBeaconNodeStatus.Offline

  node.syncInfo = Opt.some(syncInfo)
  if not(syncInfo.is_syncing) or (syncInfo.sync_distance < SYNC_TOLERANCE):
    if not(syncInfo.is_optimistic.get(false)):
      RestBeaconNodeStatus.Synced
    else:
      RestBeaconNodeStatus.OptSynced
  else:
    RestBeaconNodeStatus.NotSynced

proc checkOnline(
       node: BeaconNodeServerRef
     ): Future[RestBeaconNodeStatus] {.async: (raises: [CancelledError]).} =
  ## Could return only {Offline, Online}.
  logScope: endpoint = node
  debug "Checking beacon node status"
  let agent =
    try:
      let res = await node.client.getNodeVersion()
      res.data.data
    except CancelledError as exc:
      debug "Status request was interrupted"
      raise exc
    except RestError as exc:
      debug "Unable to check beacon node's status",
            error_name = exc.name, error_message = exc.msg
      return RestBeaconNodeStatus.Offline

  node.ident = Opt.some(agent.version)
  RestBeaconNodeStatus.Online

func getReason(status: RestBeaconNodeStatus): string =
  case status
  of RestBeaconNodeStatus.Invalid:
    "Beacon node address invalid"
  of RestBeaconNodeStatus.Noname:
    "Beacon node address cannot be resolved"
  of RestBeaconNodeStatus.Offline:
    "Connection with node has been lost"
  of RestBeaconNodeStatus.Online:
    "Connection with node has been established"
  else:
    "Beacon node reports"

proc checkNode(vc: ValidatorClientRef,
               node: BeaconNodeServerRef): Future[bool] {.
     async: (raises: [CancelledError]).} =
  let nstatus = node.status
  debug "Checking beacon node", endpoint = node, status = node.status

  if nstatus in {RestBeaconNodeStatus.Noname}:
    let
      status = node.checkName()
      failure = ApiNodeFailure.init(ApiFailure.NoError, "checkName",
                                    node, status.getReason())
    node.updateStatus(status, failure)
    if status != RestBeaconNodeStatus.Offline:
      return nstatus != status

  if nstatus in {RestBeaconNodeStatus.Offline,
                 RestBeaconNodeStatus.UnexpectedCode,
                 RestBeaconNodeStatus.UnexpectedResponse,
                 RestBeaconNodeStatus.InternalError}:
    let
      status = await node.checkOnline()
      failure = ApiNodeFailure.init(ApiFailure.NoError, "checkOnline",
                                    node, status.getReason())
    node.updateStatus(status, failure)
    if status != RestBeaconNodeStatus.Online:
      return nstatus != status

  if nstatus in {RestBeaconNodeStatus.Offline,
                 RestBeaconNodeStatus.UnexpectedCode,
                 RestBeaconNodeStatus.UnexpectedResponse,
                 RestBeaconNodeStatus.InternalError,
                 RestBeaconNodeStatus.Online,
                 RestBeaconNodeStatus.Incompatible}:
    let
      status = await vc.checkCompatible(node)
      failure = ApiNodeFailure.init(ApiFailure.NoError, "checkCompatible",
                                    node, status.getReason())
    node.updateStatus(status, failure)
    if status != RestBeaconNodeStatus.Compatible:
      return nstatus != status

  if nstatus in {RestBeaconNodeStatus.Offline,
                 RestBeaconNodeStatus.UnexpectedCode,
                 RestBeaconNodeStatus.UnexpectedResponse,
                 RestBeaconNodeStatus.InternalError,
                 RestBeaconNodeStatus.Online,
                 RestBeaconNodeStatus.Incompatible,
                 RestBeaconNodeStatus.Compatible,
                 RestBeaconNodeStatus.OptSynced,
                 RestBeaconNodeStatus.NotSynced}:
    let
      status = await vc.checkSync(node)
      failure = ApiNodeFailure.init(ApiFailure.NoError, "checkSync",
                                    node, status.getReason())
    node.updateStatus(status, failure)
    return nstatus != status

proc checkNodes*(service: FallbackServiceRef): Future[bool] {.
     async: (raises: [CancelledError]).} =
  let
    vc = service.client
    nodesToCheck =
      if vc.genesisEvent.isSet():
        service.client.otherNodes()
      else:
        service.client.preGenesisNodes()
    pendingChecks = nodesToCheck.mapIt(service.client.checkNode(it))
  var res = false
  try:
    await allFutures(pendingChecks)
    for fut in pendingChecks:
      if fut.completed() and fut.value():
        res = true
  except CancelledError as exc:
    let pending = pendingChecks
      .filterIt(not(it.finished())).mapIt(it.cancelAndWait())
    await noCancel allFutures(pending)
    raise exc
  res

proc checkOffsetStatus(node: BeaconNodeServerRef, offset: TimeOffset) =
  logScope:
    node = node

  node.timeOffset = Opt.some(offset)
  validator_client_time_offset.set(float64(offset.milliseconds()), @[$node])

  debug "Beacon node time offset", time_offset = offset

  let updateStatus =
    if (offset <= WARN_TIME_OFFSETS[0]) or (offset >= WARN_TIME_OFFSETS[1]):
      warn "Beacon node has significant time offset",
           time_offset = offset
      if (offset <= FAIL_TIME_OFFSETS[0]) or (offset >= FAIL_TIME_OFFSETS[1]):
        # Beacon node's clock is out of acceptable offsets, we marking this
        # beacon node and remote it from the list of working nodes.
        warn "Beacon node has enormous time offset",
             time_offset = offset
        let failure = ApiNodeFailure.init(ApiFailure.NoError,
          "checkTimeOffsetStatus()", node, 200,
          "Beacon node has enormous time offset")
        node.updateStatus(RestBeaconNodeStatus.BrokenClock, failure)
        false
      else:
        true
    elif (offset <= NOTE_TIME_OFFSETS[0]) or (offset >= NOTE_TIME_OFFSETS[1]):
      info "Beacon node has notable time offset",
           time_offset = offset
      true
    else:
      true

  if updateStatus:
    if node.status == RestBeaconNodeStatus.BrokenClock:
      # Beacon node's clock has been recovered to some acceptable offset, so we
      # could restore beacon node.
      let failure = ApiNodeFailure.init(ApiFailure.NoError,
          "checkTimeOffsetStatus()", node, 200,
          "Beacon node has acceptable time offset")
      node.updateStatus(RestBeaconNodeStatus.Offline, failure)

proc disableNimbusExtensions(node: BeaconNodeServerRef) =
  node.features.incl(RestBeaconNodeFeature.NoNimbusExtensions)
  if node.status == RestBeaconNodeStatus.BrokenClock:
    let failure = ApiNodeFailure.init(ApiFailure.NoError,
        "disableNimbusExtensions()", node, 200,
        "Nimbus extensions no longer available")
    node.updateStatus(RestBeaconNodeStatus.Offline, failure)

proc runTimeMonitor(
    service: FallbackServiceRef,
    node: BeaconNodeServerRef
) {.async: (raises: [CancelledError]).} =
  const NimbusExtensionsLog = "Beacon node does not support Nimbus extensions"
  let
    vc = service.client
    roles = AllBeaconNodeRoles
    statuses = AllBeaconNodeStatuses - {RestBeaconNodeStatus.Offline}

  logScope:
    node = node

  if BeaconNodeRole.NoTimeCheck in node.roles:
    debug "Beacon node time offset checks disabled"
    return

  while true:
    while node.status notin statuses:
      await vc.waitNodes(nil, statuses, roles, true)

    if RestBeaconNodeFeature.NoNimbusExtensions in node.features:
      return

    let tres =
      try:
        let delay = vc.processingDelay.valueOr: ZeroDuration
        await node.client.getTimeOffset(delay)
      except RestResponseError as exc:
        case exc.status
        of 400:
          debug "Beacon node returns invalid response",
                status = $exc.status, reason = $exc.msg,
                error_message = $exc.message
        else:
          notice NimbusExtensionsLog, status = $exc.status
          # Exiting loop
        node.disableNimbusExtensions()
        return
      except RestError as exc:
        debug "Unable to obtain beacon node's time offset", reason = $exc.msg
        notice NimbusExtensionsLog
        node.disableNimbusExtensions()
        return
      except CancelledError as exc:
        raise exc

    checkOffsetStatus(node, TimeOffset.init(tres))

    await service.waitForNextSlot()

proc processTimeMonitoring(
    service: FallbackServiceRef
) {.async: (raises: [CancelledError]).} =
  let
    vc = service.client
    blockNodes = vc.filterNodes(
      ResolvedBeaconNodeStatuses, AllBeaconNodeRoles)

  var pendingChecks: seq[Future[void]]

  try:
    for node in blockNodes:
      pendingChecks.add(service.runTimeMonitor(node))
    await allFutures(pendingChecks)
  except CancelledError as exc:
    let pending = pendingChecks
      .filterIt(not(it.finished())).mapIt(it.cancelAndWait())
    await noCancel allFutures(pending)
    raise exc

proc mainLoop(service: FallbackServiceRef) {.async: (raises: []).} =
  let vc = service.client
  service.state = ServiceState.Running
  debug "Service started"

  let timeMonitorFut = processTimeMonitoring(service)

  try:
    await vc.preGenesisEvent.wait()
  except CancelledError:
    debug "Service interrupted"
    if not(timeMonitorFut.finished()): await timeMonitorFut.cancelAndWait()
    return

  while true:
    # This loop could look much more nicer/better, when
    # https://github.com/nim-lang/Nim/issues/19911 will be fixed, so it could
    # become safe to combine loops, breaks and exception handlers.
    let breakLoop =
      try:
        if await service.checkNodes(): service.changesEvent.fire()
        await sleepAsync(2.seconds)
        false
      except CancelledError:
        debug "Service interrupted"
        if not(timeMonitorFut.finished()): await timeMonitorFut.cancelAndWait()
        true

    if breakLoop:
      break

proc init*(
    t: typedesc[FallbackServiceRef],
    vc: ValidatorClientRef
): Future[FallbackServiceRef] {.async: (raises: []).} =
  logScope: service = ServiceName
  let res = FallbackServiceRef(name: ServiceName, client: vc,
                               state: ServiceState.Initialized,
                               changesEvent: newAsyncEvent())
  debug "Initializing service"
  res

proc start*(service: FallbackServiceRef) =
  service.lifeFut = mainLoop(service)