transport-over-mix/reference/Transport/EC.hs


-- | Erasure coding of chunks

{-# OPTIONS_GHC -Wno-x-partial #-}
{-# LANGUAGE StrictData, RecordWildCards, DerivingVia #-}
module Transport.EC where

--------------------------------------------------------------------------------

import Data.Bits
import Data.Word
import Data.Array
import Data.List
import Data.Ord
import Data.Maybe

import Data.ByteString      (ByteString    ) ; import qualified Data.ByteString      as B
import Data.ByteString.Lazy (LazyByteString) ; import qualified Data.ByteString.Lazy as L

import Control.Monad
import System.IO.Unsafe
import System.Random

import Leopard.Binding
import Leopard.Types
import Leopard.Misc

import Transport.Chunks
import Transport.Types
import Transport.Misc

--------------------------------------------------------------------------------
-- * sanity check "testing"

findCounterExample :: IO ()
findCounterExample = do
  putStrLn "---------------------------------"
  ok <- testEC
  when ok findCounterExample

testEC :: IO Bool
testEC = withLeopard $ do
  sessionId <- randomSessionId
  msgIdx    <- randomRIO (0,99)
  len       <- randomRIO (5000,50000)     -- singleton chunks need to be handled specially...
  payload   <- randomByteString len

  let origChunksArr = chunkMsgPayload sessionId msgIdx payload
  let origChunks    = elems origChunksArr
  let ecK = length origChunks
  let ecM = deterministicParityCount ecK   -- min 5 ecK

  let parityChunks = computeParityChunks' ecM origChunks
  let allChunks    = origChunks ++ parityChunks
  let ecN          = length allChunks

  bad <- randomRIO (0,ecM+1)
  received <- catMaybes <$> maskListRandomly bad allChunks

  -- putStrLn $ "chunks received:"
  -- forM_ received $ \chunk -> do
  --   putStrLn $ " - " ++ show chunk
  -- putStrLn ""

  putStrLn $ "chunks received = " ++ show (map _chunkIndex received)

  let ei = decodeFromChunks received

  putStrLn $ "session id  = " ++ show sessionId
  putStrLn $ "message idx = " ++ show msgIdx
  putStrLn $ "original payload length = " ++ show len
  putStrLn $ "K = " ++ show ecK
  putStrLn $ "M = " ++ show ecM
  putStrLn $ "N = " ++ show ecN ++ " | N == K + M: " ++ show (ecN == ecK + ecM)
  putStrLn $ "didn't receive = " ++ show bad
  putStrLn $ "did receive    = " ++ show (ecN - bad)
  putStrLn $ "result:"

  fine <- case ei of

    Left  err -> do
      putStrLn $ " - error = " ++ show err
      return (bad > ecM)

    Right dec -> case dec of
      MkDecodedMessage reSId reIdx bs -> do
        let ok = bs == payload
        putStrLn $ " - recovered session id      = " ++ show reSId
        putStrLn $ " - recovered message idx     = " ++ show reIdx
        putStrLn $ " - recovered payload length  = " ++ show (B.length bs)
        putStrLn $ " - recovered payload matches = " ++ show ok
        return ok

  return fine

--------------------------------------------------------------------------------

-- | Unfortunately, Leopard doesn't seem to work the way I naively assumed, namely
-- that we can just add parity chunks (up to a limit)
--
-- This is not that suprising: it probably uses the smallest power-of-two subgroup
-- into which @N = K + M@ fits; so if we vary M, the subgroup can change.
--
-- Hence for any K we just deterministically figure out an M, and always use that.
--
deterministicECParams :: Int -> ECParams
deterministicECParams k = ECParams k n where
  n  = k + m
  m  = min k m'          -- we have the restriction M < K
  m' = if k < 16
    then 8
    else div k 2         -- TODO: finetune this!

deterministicParityCount :: Int -> Int
deterministicParityCount k = let ECParams k' n = deterministicECParams k in (n - k')

--------------------------------------------------------------------------------
-- * EC encoding

computeParityChunks :: [Chunk] -> [Chunk]
computeParityChunks chunks = computeParityChunks' m chunks where
  m = deterministicParityCount (length chunks)

{-# NOINLINE computeParityChunks' #-}
computeParityChunks' :: Int -> [Chunk] -> [Chunk]
computeParityChunks' 0       origChunks  = origChunks
computeParityChunks' mparity [origChunk] = [origChunk]
computeParityChunks' mparity origChunks  =
  case fromChunks origChunks of
    Left  err         -> error err
    Right (meta,ibss) -> case unsafePerformIO (unsafeEncodeIOList ecp $ map snd ibss) of
      Left  err         -> error $ decodeLeopardResult err
      Right bss         -> [ MkChunk meta (fromIntegral j) bs | (j,bs) <- zip [k..] bss ]
  where
    ecp = ECParams k (k+mparity)
    k   = length origChunks

--------------------------------------------------------------------------------
-- * EC decoding

data DecodeError
  = NotEnoughChunks Int              -- ^ how many more is required
  | InvalidChunks   String           -- ^ invalid and\/or incompatible set of chunks
  | LeopardError    LeopardResult    -- ^ Leopard reported an error
  | CannotParseMsg                   -- ^ can't parse the decoded bytestring
  deriving (Eq,Show)

data DecodedMessage = MkDecodedMessage
  { _msgSessionId  :: SessionId
  , _msgMessageIdx :: MsgIdx
  , _msgPayload    :: ByteString
  }
  deriving (Eq,Show)

----------------------------------------

{-# NOINLINE decodeFromChunks #-}
decodeFromChunks :: [Chunk] -> Either DecodeError DecodedMessage
decodeFromChunks []     = error "decodeFromChunks: fatal: empty input"
decodeFromChunks chunks =

  case fromChunks chunks of
    Left  err         -> Left $ InvalidChunks err
    Right (meta,ibss) -> handle meta ibss

  where

    handle :: ChunkMeta -> [(ChunkIdx,ByteString)] -> Either DecodeError DecodedMessage
    handle meta ibss
      | minIdx < 0        = Left $ InvalidChunks "negative chunk index"
      | maxIdx >= 2*ecK   = Left $ InvalidChunks "too big chunk index (max is 2*K because of Leopard)"
      | repeated_idxs     = Left $ InvalidChunks "repeated chunk indicies"
      | haveCnt < ecK_int = Left $ NotEnoughChunks (ecK_int - haveCnt)
      | otherwise         = case unsafePerformIO (unsafeDecodeIO ecp mbArr) of
          Left  leoRes -> Left (LeopardError leoRes)
          Right arr    -> case parseMsgPayload (B.concat (elems arr)) of
            Nothing      -> Left  $ CannotParseMsg
            Just bs      -> Right $ MkDecodedMessage sessionId msgIdx bs

      where
        MkChunkMeta sessionId msgIdx ecK = meta
        ecK_int  = fromIntegral ecK                 :: Int
        ecp      = deterministicECParams ecK_int    :: ECParams
        ecM_int  = _ecM ecp                         :: Int
        ecN_int  = _ecN ecp                         :: Int
        haveIdxs = map fst ibss                     :: [ChunkIdx]
        haveCnt  = length  haveIdxs                 :: Int
        minIdx   = minimum haveIdxs                 :: ChunkIdx
        maxIdx   = maximum haveIdxs                 :: ChunkIdx
        -- maxIdxI  = fromIntegral maxIdx              :: Int
        -- ecN_int  = maxIdxI + 1                      :: Int
        repeated_idxs = nubOrd haveIdxs /= sort haveIdxs
        mbArr    = listArray (0,ecN_int-1) (map fun' [0..ecN_int-1])

        fun' :: Int -> Maybe ByteString
        fun' j = fun (fromIntegral j)

        fun :: ChunkIdx -> Maybe ByteString
        fun j    = case find (\(i,bs) -> i == j) ibss of
          Just (_,bs) -> Just bs
          Nothing     -> Nothing

--------------------------------------------------------------------------------
-- * Shared

-- | Chunks of the same message
fromChunks :: [Chunk] -> Either String (ChunkMeta,[(ChunkIdx,ByteString)])
fromChunks [] = error "fromChunks: fatal: empty input"
fromChunks chunks
  | any (/= chunkMeta) (map _chunkMeta chunks)  = Left $ "fromChunks: chunk metadata differs from each other"
  | otherwise                                   = Right (chunkMeta, map f chunks)
  where
    chunkMeta = _chunkMeta (head chunks)
    f (MkChunk meta idx payload) = (idx, payload)

--------------------------------------------------------------------------------