{-# LANGUAGE MagicHash #-}
module Basement.String.Encoding.UTF16
    ( UTF16(..)
    , UTF16_Invalid(..)
    ) where
import GHC.Prim
import GHC.Word
import GHC.Types
import Data.Bits
import qualified Prelude
import Basement.Compat.Base
import Basement.Types.OffsetSize
import Basement.Monad
import Basement.Numerical.Additive
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder
import Basement.String.Encoding.Encoding
data UTF16_Invalid
    = InvalidContinuation
    | InvalidUnicode Char
  deriving (Show, Eq, Typeable)
instance Exception UTF16_Invalid
data UTF16 = UTF16
instance Encoding UTF16 where
    type Unit UTF16 = Word16
    type Error UTF16 = UTF16_Invalid
    encodingNext  _ = next
    encodingWrite _ = write
next :: (Offset Word16 -> Word16)
     -> Offset Word16
     -> Either UTF16_Invalid (Char, Offset Word16)
next getter off
    | h <  0xd800 = Right (toChar hh, off + Offset 1)
    | h >= 0xe000 = Right (toChar hh, off + Offset 1)
    | otherwise   = nextContinuation
  where
    h :: Word16
    !h@(W16# hh) = getter off
    toChar :: Word# -> Char
    toChar w = C# (chr# (word2Int# w))
    to32 :: Word16 -> Word32
    to32 (W16# w) = W32# w
    nextContinuation
        | cont >= 0xdc00 && cont < 0xe00 =
            let !(W32# w) = ((to32 h .&. 0x3ff) `shiftL` 10)
                         .|. (to32 cont .&. 0x3ff)
             in Right (toChar w, off + Offset 2)
        | otherwise = Left InvalidContinuation
      where
        cont :: Word16
        !cont = getter $ off + Offset 1
write :: (PrimMonad st, Monad st)
      => Char
      -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write c
    | c < toEnum 0xd800   = builderAppend $ w16 c
    | c > toEnum 0x10000  = let (w1, w2) = wHigh c in builderAppend w1 >> builderAppend w2
    | c > toEnum 0x10ffff = throw $ InvalidUnicode c
    | c >= toEnum 0xe000  = builderAppend $ w16 c
    | otherwise = throw $ InvalidUnicode c
  where
    w16 :: Char -> Word16
    w16 (C# ch) = W16# (int2Word# (ord# ch))
    to16 :: Word32 -> Word16
    to16 = Prelude.fromIntegral
    wHigh :: Char -> (Word16, Word16)
    wHigh (C# ch) =
        let v = W32# (minusWord# (int2Word# (ord# ch)) 0x10000##)
         in (0xdc00 .|. to16 (v `shiftR` 10), 0xd800 .|. to16 (v .&. 0x3ff))