



data Dataset = Initial { x::(Vector Double),y::(Vector Double) name::String}
             | Cleaned { x::(Vector Double),y::(Vector Double) name::String}
             | Transformed { x::(Vector Double),y::(Vector Double) name::String}




createDataset :: Vector Double -> Vector Double -> String -> Dataset
createDataset x y name = Initial x y name

removeOutliers :: Dataset -> Dataset
removeOutliers (Initial x y n) = let
                         (new_x,new_y) = outlierRemovalFunction x y
                         in Cleaned new_x new_y (n ++"_outliersRemoved")
               (Cleaned x y n) = error "Already been cleaned"
               (Scaled x y n) = error "Scaled data should have already been cleaned"
               (Transformed x y n) = error "Transformed data should have already been cleaned"

logTransform :: Dataset -> Dataset
logTransform (Initial x y n) = error "Need to clean first"
             (Cleaned x y n) = let
                         (new_x,new_y) = logTransformFunction x y
                         in Transformed new_x new_y (n ++ "_logTransformed)



我考虑过要为管道中的每个阶段使用单独的 types ,然后将“ dataset”接口实现为类型类,例如:

(logTransform . removeOutliers . createDataset) init_y init_y "ourData"


class Dataset a where
    x :: a -> Vector Double
    y :: a -> Vector Double
    name :: a -> String

data Initial = Initial x y name
instance Dataset Initial where ...

data Cleaned a = Cleaned a
instance Dataset Cleaned where ...

data Transformed a = Transformed a
instance Dataset Transformed where ...


但是,看来我只是将问题“一级升级”了。我现在正在处理类型变量和所有这些嵌套类型。现在,我不需要为每种类型的组合创建一个 removeOutliers :: (Dataset a) => a -> Cleaned a removeOutliers = ... logTransform :: (Dataset a) => Cleaned a -> Transformed Cleaned a logTransform = ... 实例,而不必为每个可能的管道步骤组合都使用Dataset构造函数






data Initial
data Cleaned
data Scaled

data Dataset a = Dataset { x :: Vector Double,y :: Vector Double,name :: String }

createDataset :: Vector Double -> Vector Double -> String -> Dataset Initial
createDataset x y name = Dataset x y name

removeOutliers :: Dataset Initial -> Dataset Cleaned
removeOutliers (Dataset x y n) =
    let (x',y') = clean x y
    in Dataset x' y' (n ++ "_clean")


{-# LANGUAGE DataKinds,KindSignatures #-}

data State = Initial | Cleaned | Scaled

data Dataset (a :: State) = Dataset { x :: Vector Double,name :: String }





{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}


data Process = Cleaned | Transformed | Scaled | Inspected | Analyzed


data Dataset (pipeline :: [Process])
  = Dataset { x :: [Double],y :: [Double],name :: String }



type family a || b where
  True  || b = True
  False || b = b

type family Subseq xs ys where
  Subseq '[]      ys  = True
  Subseq nonempty '[] = False
  Subseq (x:xs) (x:ys) = Subseq xs ys || Subseq (x:xs) ys
  Subseq xs     (y:ys) = Subseq xs ys


λ> :kind! Subseq '[Inspected,Transformed] '[Analyzed,Inspected,Transformed,Cleaned]
Subseq '[Inspected,Cleaned] :: Bool
= 'True
λ> :kind! Subseq '[Inspected,Cleaned] :: Bool
= 'False
λ> :kind! Subseq '[Inspected,Transformed] '[Transformed,Inspected]
Subseq '[Inspected,Inspected] :: Bool
= 'False


-- remember: pipeline type is in reverse order
foo1 :: (Subseq [Cleaned,Transformed] pipeline ~ True)
     => Dataset pipeline -> Dataset (Scaled : pipeline)
foo1 = undefined


type family Member x xs where
  Member x '[] = 'False
  Member x (x:xs) = 'True
  Member x (y:xs) = Member x xs


foo2 :: ( Subseq [Cleaned,Transformed] pipeline ~ True,Member Scaled pipeline ~ False)
     => Dataset pipeline -> Dataset (Scaled : pipeline)
foo2 = undefined


> foo2 (Dataset [] [] "x" :: Dataset '[Transformed])
... Couldn't match type ‘'False’ with ‘'True’ ...
> foo2 (Dataset [] [] "x" :: Dataset '[Cleaned,Scaled,Transformed])
... Couldn't match type ‘'False’ with ‘'True’ ...
> foo2 (Dataset [] [] "x" :: Dataset '[Cleaned,Transformed])
-- typechecks okay
foo2 (Dataset [] [] "x" :: Dataset '[Cleaned,Transformed])
  :: Dataset '[ 'Scaled,'Cleaned,'Transformed]


import Data.Kind
import GHC.TypeLits

type Require procs pipeline = Require1 (Subseq procs pipeline) procs pipeline
type family Require1 b procs pipeline :: Constraint where
  Require1 True procs pipeline = ()
  Require1 False procs pipeline
    = TypeError (Text "The pipeline " :<>: ShowType pipeline :<>:
                 Text " lacks required processing " :<>: ShowType procs)
type Forbid proc pipeline = Forbid1 (Member proc pipeline) proc pipeline
type family Forbid1 b proc pipeline :: Constraint where
  Forbid1 False proc pipeline = ()
  Forbid1 True proc pipeline
    = TypeError (Text "The pipeline " :<>: ShowType pipeline :<>:
                 Text " must not include " :<>: ShowType proc)

foo3 :: (Require [Cleaned,Transformed] pipeline,Forbid Scaled pipeline)
     => Dataset pipeline -> Dataset (Scaled : pipeline)
foo3 = undefined


> foo3 (Dataset [] [] "x" :: Dataset '[Transformed])
...The pipeline '[ 'Transformed] lacks required processing '[ 'Cleaned,'Transformed]...
> foo3 (Dataset [] [] "x" :: Dataset '[Cleaned,Transformed])
...The pipeline '[ 'Cleaned,'Scaled,'Transformed] must not include 'Scaled...
> foo3 (Dataset [] [] "x" :: Dataset '[Cleaned,Transformed])
-- typechecks okay
foo3 (Dataset [] [] "x" :: Dataset '[Cleaned,'Transformed]


{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}

import Data.Kind
import GHC.TypeLits

data Process = Cleaned | Transformed | Scaled | Inspected | Analyzed

data Dataset (pipeline :: [Process])
  = Dataset { x :: [Double],name :: String }

type family a || b where
  True  || b = True
  False || b = b

type family Subseq xs ys where
  Subseq '[]      ys  = True
  Subseq nonempty '[] = False
  Subseq (x:xs) (x:ys) = Subseq xs ys || Subseq (x:xs) ys
  Subseq xs     (y:ys) = Subseq xs ys

type family Member x xs where
  Member x '[] = False
  Member x (x:xs) = True
  Member x (y:xs) = Member x xs

type Require procs pipeline = Require1 (Subseq procs pipeline) procs pipeline
type family Require1 b procs pipeline :: Constraint where
  Require1 True procs pipeline = ()
  Require1 False procs pipeline
    = TypeError (Text "The pipeline " :<>: ShowType pipeline :<>:
                 Text " lacks required processing " :<>: ShowType procs)
type Forbid proc pipeline = Forbid1 (Member proc pipeline) proc pipeline
type family Forbid1 b proc pipeline :: Constraint where
  Forbid1 False proc pipeline = ()
  Forbid1 True proc pipeline
    = TypeError (Text "The pipeline " :<>: ShowType pipeline :<>:
                 Text " must not include " :<>: ShowType proc)

foo1 :: (Subseq [Cleaned,Transformed] pipeline ~ True)
     => Dataset pipeline -> Dataset (Scaled : pipeline)
foo1 = undefined

foo2 :: ( Subseq [Cleaned,Member Scaled pipeline ~ False)
     => Dataset pipeline -> Dataset (Scaled : pipeline)
foo2 = undefined

foo3 :: (Require [Cleaned,Forbid Scaled pipeline)
     => Dataset pipeline -> Dataset (Scaled : pipeline)
foo3 = undefined

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。