如何解决线性化表达式语法树

我定义了一个这样的递归类型

type t = | Param of int | Add of t * t

它允许我写这样的符号表达式

let x = Param(1)
let y = Param(2)
let z = Add(x,y)
# z;;
- : t = Add (Param 1,Param 2)

或者更复杂的东西

let rec fib n x y = match n with
  | 0 -> (x,y)
  | _ -> (fib (n - 1) y (Add(x,y)))      
let z5 = fib 5 x y
# z5;;
- : t * t =
(Add (Add (Param 2,Add (Param 1,Param 2)),Add (Add (Param 1,Param 2),Add (Param 2,Param 2)))),Add (Add (Add (Param 1,Param 2))),Add (Add (Param 2,Param 2))))))

现在我想把这个递归的东西转换成一个线性形式，其中每个唯一的 Param 和 Add wold 只出现一次——就像这样：

(Step 0,(Param 1))
(Step 1,(Param 2))
(Step 2,(Add (Step 0,Step 1))
(Step 3,(Add (Step 1,Step 2))
(Step 4,(Add (Step 2,Step 3))
...

这个转换叫什么？以及如何实施？

解决方法

我通常称其为展平或展开，其他一些常用名称（有时过于具体）是 three address code 或 A-normal form。

使用变量赋值来表示步骤可能更自然，而不是使用 Step 概念，例如，

type var = int
type const = int
type stmt = Set var * const
type expr = Cst of int | Var of int | Add of expr * expr | ...

这样你的扁平化纤维就会看起来像

Set (0,(Add (Cst 1,Cst 2)))
Set (1,(Add (Cst 2,Var 0)))
Set (2,(Add (Var 0,Var 1)))
...

使用类型为 int -> expr -> stmt list -> int * expr * stmt list 的递归函数，例如，（未经测试），

let rec flatten v exprs stmts = match expr with
  | Cst _ | Var _ as expr -> v,expr,stmts
  | Add (x,y) -> 
    let v,x,stmts = flatten v x stmts in
    let v,y,stmts = flatten v x stmts in
    v+1,Var (v+1),(Set (v+1,Add(x,y)) :: stmts)

v 参数是变量名的新生成器（我们只用整数表示）。

此外，如果您想了解扁平化在现实世界中的 AST 是如何工作的，这里是 an example。

实现散列约束和扁平化

首先，我们需要定义 AST 的哈希约束表示，例如，

type exp = Cst of int | Ref of int | Add of hexp * hexp
and hexp = {ref : int; exp : exp}

我们用唯一的序数索引每个散列约束的表达式，以便物理上相等的表达式将具有相同的 ref 编号。我们需要这样才能将表达式存储在有序的数据结构中，例如映射和集合（OCaml 不允许我们按物理地址对值进行排序，这是有道理的，因为 OCaml 使用分代 GC，因此值的地址会发生变化随着时间的推移）。

现在，让我们编写 hashcons 函数来对表达式进行散列运算，例如，

let hashes = Hashtbl.create 100
let hashcons exp =
  match Hashtbl.find_opt hashes exp with
  | None ->
    let ref = Hashtbl.length hashes + 1 in
    Hashtbl.add hashes exp ref;
    {ref; exp}
  | Some ref -> {ref; exp}

现在我们可以编写使用 hashconsed 表示的 fib 函数，

let rec fib n x y = match n with
  | 0 -> (x,y)
  | _ -> fib (n - 1) y (hashcons (Add (x,y)))

let x = hashcons@@Cst 1
let y = hashcons@@Cst 2
let _,z = fib 999 x y

没有大的变化。现在，让我们编写 flatten 函数。但在此之前，我们需要对程序进行一些表示，

module Program = Map.Make(Int)

let set id x = Program.update id (function
    | None -> Some x
    | x -> x)

let rec get ref prog = match Program.find_opt ref prog with
  | None -> None
  | Some _ -> Some {ref; exp = Ref ref}

let (++) exp prog = {ref=exp.ref; exp=Ref exp.ref},set exp.ref exp prog

我们的程序是从引用号（它既像变量名又像定义变量的地方——我们的表示中有一个内置的 SSA 属性）的映射，最后，我们的 flatten 函数是，>

let unify {ref} exp prog =
  {ref; exp=Ref ref},set ref {ref; exp} prog

let rec flatten input prog =
  match get input.ref prog with
  | Some exp -> exp,prog
  | None -> match input.exp with
    | Cst _ | Ref _ -> input ++ prog
    | Add (x,y) ->
      let x,prog = flatten x prog in
      let y,prog = flatten y prog in
      unify input (Add (x,y)) prog

它是线性的，因为当我们展平一个复杂的表达式时，我们使用 unify 并将该表达式的展平形式存储在其 id 下。以便下次我们看到相同的复杂表达式时，我们可以轻松地从程序中提取它已经扁平化的版本，使用整数键进行查找。

输出是，

# let p = snd@@flatten z Program.empty
val p : hexp Program.t = <abstr>
# print_program p;;
#1 := 1
#2 := 2
#3 := #1 + #2
#4 := #2 + #3
#5 := #3 + #4
#6 := #4 + #5
<snip>
#1001 := #999 + #1000
- : unit = ()

其中 print_program 函数定义为，

let rec pp_exp ppf {exp} = match exp with
  | Cst x -> Format.fprintf ppf "%d" x
  | Ref x -> Format.fprintf ppf "#%d" x
  | Add (x,y) -> Format.fprintf ppf "%a + %a" pp_exp x pp_exp y

let print_program prog =
  Program.to_seq prog |>
  Seq.iter @@ fun (id,exp) ->
  Format.printf "#%d := %a@\n" id pp_exp exp

扁平化功能的工作速度非常快，但一如既往它不是免费的。 fib 的哈希约束版本现在要慢得多，因为我们必须对每个表达式进行哈希约束，并且在此期间我们必须查找哈希表并在结构上比较大值。 fib 函数未利用表达式的哈希约束表示，因此可以编写更高效的 fib 函数。

线性化表达式语法树

如何解决线性化表达式语法树

解决方法

实现散列约束和扁平化

相关推荐