(*************************************************************************)
(*                                                                       *)
(*                     Projet      Formel                                *)
(*                                                                       *)
(*                            CAML                                       *)
(*                                                                       *)
(*************************************************************************)
(*                                                                       *)
(*                            Inria                                      *)
(*                      Domaine de Voluceau                              *)
(*                      78150  Rocquencourt                              *)
(*                            France                                     *)
(*                                                                       *)
(*************************************************************************)

(* LC_unify.ml	Unification modulo left commutativity                    *)
(*		Roberto Di Cosmo and Pierre Weis			 *)

(*\
\begin{caml_eval}
latex_set_pretty false;;
\end{caml_eval}

Next, we start by  having another, different look at the problem  of  deciding
equality in \MLisos, to  discover that it can usefully be considered a special
case of equational unification.

\*)

#standard arith true;;

module LC_unify;;

#set default grammar gtype:gtype;;

(*\
\subsection{Equality as Unification with Variable Renamings}
Since   order  and  name  of  quantified   type   variables   is  irrelevant
(Axioms~\ref{ax:05}  and  \ref{ax:06}),  we  can  consider  the  problem  of
deciding

$$\Sseq\  =  \SseqB$$

in   the   last   subtheory    consisting    of   Axioms~\ref{ax:prod-comm},
\ref{ax:prod-assoc}, \ref{ax:05}, \ref{ax:06} and  \Swap\ as  a special case
of unification of

$$  \PROD{(\usc{\Ta}{1}}{\PROD{\ldots}{\usc{\Ta}{n})}} = 
    \PROD{(\usc{\Tb}{1}}{\PROD{\ldots}{\usc{\Tb}{n})}},
$$

where we are not  allowed to substitute arbitrary types for the type variables
\Vec{\Xa} and \Vec{\Xb}, but just other type variables, with the constraint of
not  identifying  variables  that were originally different.  Essentially,  we
restrict to unifiers that are just  {\em  bound  variable renamings}.  We will
also call them in the following {\em consistent} variable renamings.

Again,  two  split  normal forms are equal iff for  some permutation $\sigma :
ARR{n}{n}$ their coordinates  $\Ta_i$  and  $\Tb_{\sigma(i)}$  can be  unified
modulo \Swap\ with a variable renaming.

Unification up to \Swap\  (left-commutativity  of \ARR{}{})  is decidable (see
\cite{KirchnerThEt}), so  we  can  perform the  necessary  unification  modulo
\Swap\  for all  permutations, and then  check  if there  exists a permutation
where unification succeeds with variable renamings.

\subsubsection{Step 2: Divide and Conquer}

Actually, since  all the  variables are distinct in  the different components,
the result  of  unification  on  $\Ta_i$  and  $\Tb_{\sigma(i)}$  for a  given
permutation $\sigma$ is completely independent  of the outcome of  unification
on the other coordinates: the variable renaming we are looking for is actually
made up of $n$ independent variable renamings (one for each coordinate), so we
can use a standard quadratic  test to check only the $n(n+1)\over 2$  relevant
permutations instead of trying all the $n!$ possible ones.

This is a  significant  cut-down on the number of  coordinates checking:  even
without  adopting  dynamic  programming  techniques,  we   can  see  that  the
complexity goes steeply  down  from  a  monstruous $m!n!S$ that corresponds to
trying equality  modulo  \Swap\  (of  cost S)  for  all  permutations  of  $m$
variables  and all  permutation  of $n$  coordinates`  to  an  average  (still
fearful,  but  much  lower)  $n^2({m\over n})!S$  that corresponds  to testing
equality  up  to \Swap\  for each relevant permutation of coordinates and each
permutation of the (average) $m\over n$ type variables in a coordinate.

But there is still room for improvements.

\subsection{Step 3: Dynamic Programming}  
We can  now try to attack also  the complexity of checking variable-renamings.
Instead of the naive approach consisting in, first, generation of all possible
variable  renaming,  and then checking  equality  up to \Swap, we  can use our
knowledge that the needed variable renaming will  have  to satisfy equality up
to  \Swap\  to significantly cut down the  number of renamings to generate and
test.

Actually, any variable occurring rightmost  in  \Ta\  cannot be moved  by left
commutativity,  and  must  be  renamed to  a  corresponding variable  in  \Tb\
occurring in the same position.  Any variable in rightmost position provides a
part of  the  renaming that we  look for,  and  rules  out  all  the  $(n-1)!$
renamings that do not agree.

For example, when trying to show equal

$$\ARR{\Ta}{\ARR{\Tb}{\Xa}}     =\ARR{\Ta'}{\ARR{\Tb'}{\Xb}},$$ 

we know  that \Xa\ must be  associated  with \Xb, so we need not try renamings
that dont do this.

In unification up  to  left  commutativity one takes this fact into account by
using  suitable  flat  normal forms~\cite{KirchnerThEt}, where all  permutable
subformulas  are flattened into  a list, and the only rightmost non permutable
subformula is  singled out.  Here we also keep track of the length of the list
to improve efficiency.

\*)

type Head_type == int * gtype;; (* record the length of premisses lists *)

type Flat_type = Fl of Head_type * Flat_type list;;

(*\ 

We bring a {\em split-normal-form} to flat normal form with a simple recursive
function

\*)
let rec flatten = function
    <<^x -> (^y -> ^z)>> -> 
        let nfx = flatten x and nfy = flatten y
        and (Fl ((lgth,exp), chain)) = flatten z
        in Fl ((lgth+2,exp), nfx::nfy::chain)
  | <<^x -> ^y>>       -> Fl ((1,y),[flatten x])
  | <<^y>>           -> Fl ((0,y),[]);;
(*\

The unification procedure scans this data structure using all the variables in
unmovable  positions  to  build partial renamings, and  stops  as  soon  as an
inconsistent variable renaming is reached  (for example, as  soon as the  same
variable is forced to be identified to more than one other variable).  Anyway,
when  such inconsistencies  are not encountered,  and  when  we find variables
whose  binding  is  not determined,  it is  necessary to examine  all possible
permutations of the flat premisses list,  and the associated renamings,  so we
need some code to produce permutations.

\*)
(* Standard code for computing permutations of a list *)

let rec perms = function
    [] -> [[]]
  | x -> list_it (function a -> function z1 ->
                   (list_it (function y -> function z2 -> (a::y)::z2)
                            (perms (except a x)) z1
                   )
                 ) x [];;
(*\

Our  algorithm  tries  to  adopt  as  much  as  possible  dynamic  programming
techniques.   We keep the current tentative  variable renaming, and we fail as
soon  as  it  is  made  inconsistent  by  variable  bindings  imposed  by  the
unification procedure.

\*)

type VarRenaming == (int * int) list;;

let var_renaming = ref ([]:VarRenaming);;

(*\

   A renaming becomes inconsistent as soon as  same variable gets bound to
   more  than  one other variable,  and  we  check  for this  event  while
   updating the variable renaming.  Notice that, morally, in the case {\tt
   (Some bind1, Some bind2)}  the test  {\tt == (bind1,var2)}  ought to be
   {\tt ==  (bind1,var2)  \&  ==  (bind2,var1)},  to insure  that  the new
   binding is correct, but we rely on  the correctness of  the preexisting
   variable renaming (where (var1,var2)  occurs \Iff\  (var2,var1) occurs)
   to be sure that {\tt == (bind1,var2)} \Iff\ {\tt == (bind2,var1)}.

\*)

let bound var = try (Some(assoc var !var_renaming)) with failure _ -> None;;

let rename_var (var1,var2) = 
    match bound var1, bound var2
    with Some bind1, Some bind2 -> 
           bind1 == var2 (* test consistency of new binding *)
      |  None, None ->
           var_renaming := (var1,var2)::(var2,var1)::!var_renaming; true
      |   _   -> false;;
(*\

   Then  we   start  to  build   our  unification  procedure  up  to  left
   commutativity on flat normal forms.   To decide if two flat type normal
   forms       {\tt      Fl((n1,mlt1),ml\_flat\_list1)}      and      {\tt
   Fl((n2,mlt2),ml\_flat\_list2)} are unifiable, we proceed as follows:

  \begin{itemize}

    \item Check if their flat lists have  the same length  and fail if it
    is not the case: \Swap\ does not change the length of flat lists.

    \item {\bf  Unify the heads},  that cannot  be  moved,  and  build  a
    partial renaming.

    \item {\bf Unify  the  flat  lists}  of  premisses starting  from the
    partial renaming built during the unification of the heads.

  \end{itemize}

   In case  of failure,  we restore the partial variable  renaming  to the
   state  before  the   call  to  unification,   in  order  to  allow  the
   backtracking  that  is necessary in order to perform unification of the
   premisses lists.

\*)

exception UNIFYLIST;;

let rec unify_LC (Fl((n1,mlt1),ml_flat_list1)) (Fl((n2,mlt2),ml_flat_list2)) =
    if not (n1 == n2)
    then false    (* Flat types of different length are not unifiable  *)
    else let saved_var_renaming = !var_renaming       (* save partial renaming  for backtracking *)
         in if unify_head (mlt1,mlt2) then            (* set  up  the correct  associative links *)
            unify_list(ml_flat_list1, ml_flat_list2)
            else (var_renaming := saved_var_renaming; false) (* restore partial renaming *)
(*\

    {\bf Unification  of  heads:} since the heads  of  flat normal forms must
    match  exactly,  we   perform standard  unification   of   the heads {\tt
    mlt1}  and  {\tt mlt2}:   this process can set   up some new bindings  in
    the  partial  variable renaming,  helping in cutting  down  the otherwise
    factorial search tree.

\*)
and unify_head Heads = (* unify_head is a predicate with side effects on var_renaming *)
  (exception unifyHead in
   (try unif_rec Heads;true with unifyHead -> false)
    where rec unif_rec = function
      <<'^r>>,<<'^s>> -> if rename_var (r,s) then () else raise unifyHead
    |      _ ,<<'^x>> -> raise unifyHead
    | <<'^x>>, _      -> raise unifyHead
    | <<(^l1) ^s1>>,<<(^l2) ^s2>>  -> if s1==s2 
                                      then do_list unif_rec (combine(l1,l2)) 
                                      else raise unifyHead
   )
(*\

    {\bf Unification of the  flat lists}: on the flat  lists of premisses
    {\tt ml\_flat\_list1} and {\tt ml\_flat\_list2} the Axiom  \Swap\ can
    act freely, so we must essentially try  all the  possibilities.  This
    amounts    to   building   all   possible    permutations   of   {\tt
    ml\_flat\_list1} and  trying  for each of them to unify componentwise
    up   to   left   commutativity   the   resulting   list   with   {\tt
    ml\_flat\_list2}.   We  use the exception {\tt found} to  succeed  as
    soon as possible.

% Again, we fail as soon as possible, since only the variable renamings that
% are {\em consistent} are built along the way.

\*)
(* unify_list is a predicate with side effects on var_renaming *)

and (unify_list : Flat_type list * Flat_type list -> bool) (a,b) =
    exception found in
     (try do_list
        (function x -> 
          let saved_var_renaming = !var_renaming in    (* save   renaming for backtracking  *)
          unify_map (x, b);                            (* try to unify componentwise  ...   *)
          var_renaming := saved_var_renaming; ()       (* restore renaming for backtracking *)
         ) (perms a); false
     with found -> true)
     where rec unify_map = function
          ([],[]) -> raise found
        | (a::resta,b::restb) -> if unify_LC a b
                               then unify_map (resta,restb) else false
        | _ -> raise UNIFYLIST (* incorrect state: should never be executed *)

(*\

Finally, our unification function will return a boolean to report unifiability
and the complete variable renaming associated in case of success

\*)

in
let unify_Left_Commutative flat1 flat2 =
    var_renaming:=[];
    let unifiable = unify_LC flat1 flat2 in
    unifiable,!var_renaming;;

end module with
 abstype Flat_type;
 value unify_Left_Commutative (* : Flat_type -> Flat_type -> bool * (int * int) list *)
 and   flatten                (* : gtype -> Flat_type *)
;; 
