Require Import Reals Coq.Lists.List Coquelicot.Series Coquelicot.Hierarchy Coquelicot.SF_seq.
Require Import pmf_monad Permutation fixed_point Finite LibUtils.
Require Import Sums Coq.Reals.ROrderedType.
Require Import micromega.Lra.
Require Import Coq.Logic.FunctionalExtensionality.
Require Import Equivalence RelationClasses EquivDec Morphisms.
Require Import Streams StreamAdd.
Require Import Utils.
Import ListNotations.
Set Bullet Behavior "
Strict Subproofs".
Import ListNotations.
Section extra.
Open Scope list_scope.
Lemma abs_convg_implies_convg :
forall (
a :
nat ->
R),
ex_series (
fun n =>
Rabs(
a n)) ->
ex_series a.
Proof.
End extra.
Class NonEmpty (
A :
Type) :=
ex :
A.
Section Rmax_list.
Open Scope list_scope.
Open Scope R_scope.
Import ListNotations.
Fixpoint Rmax_list (
l :
list R) :
R :=
match l with
|
nil => 0
|
a ::
nil =>
a
|
a ::
l1 =>
Rmax a (
Rmax_list l1)
end.
Lemma Rmax_spec_map {
A} (
l :
list A) (
f :
A ->
R) :
forall a:
A,
In a l ->
f a <=
Rmax_list (
List.map f l).
Proof.
intros a Ha.
induction l.
-
simpl ;
firstorder.
-
simpl in *.
intuition.
+
rewrite H.
destruct l.
simpl.
right;
reflexivity.
simpl.
apply Rmax_l.
+
destruct l.
simpl in *;
firstorder.
simpl in *.
eapply Rle_trans ;
eauto.
apply Rmax_r.
Qed.
Lemma Rmax_spec {
l :
list R} :
forall a:
R,
In a l ->
a <=
Rmax_list l.
Proof.
intros a Ha.
induction l.
-
simpl ;
firstorder.
-
simpl in *.
intuition.
+
rewrite H.
destruct l.
simpl.
right;
reflexivity.
simpl.
apply Rmax_l.
+
destruct l.
simpl in *;
firstorder.
simpl in *.
eapply Rle_trans ;
eauto.
apply Rmax_r.
Qed.
Lemma Rmax_list_map_const_mul {
A} (
l :
list A) (
f :
A ->
R) {
r :
R} (
hr : 0 <=
r) :
Rmax_list (
List.map (
fun a =>
r*
f(
a))
l) =
r*(
Rmax_list (
List.map f l)).
Proof.
induction l.
-
simpl ;
lra.
-
simpl.
rewrite IHl.
rewrite RmaxRmult ;
trivial.
destruct l.
+
simpl ;
reflexivity.
+
simpl in *.
f_equal ;
trivial.
Qed.
Lemma Rmax_list_const_add (
l :
list R) (
d :
R) :
Rmax_list (
List.map (
fun x =>
x +
d)
l) =
if (
l <> [])
then ((
Rmax_list l) +
d)
else 0.
Proof.
induction l.
- simpl ; reflexivity.
- simpl in *.
destruct l.
+ simpl ; reflexivity.
+ simpl in * ; rewrite IHl.
now rewrite Rcomplements.Rplus_max_distr_r.
Qed.
Lemma Rmax_list_zero {
A} (
l :
list A) :
Rmax_list (
List.map (
fun x => 0)
l) = 0.
Proof.
induction l.
--
simpl ;
reflexivity.
--
simpl in *.
rewrite IHl.
replace (
Rmax 0 0)
with 0.
destruct l ; [
simpl ;
reflexivity |
simpl ;
reflexivity] .
symmetry.
apply Rmax_left ;
lra.
Qed.
Lemma Rmax_list_ge (
l :
list R) (
r :
R) :
forall x,
In x l ->
r <=
x ->
r <=
Rmax_list l.
Proof.
Lemma Rmax_list_le (
l :
list R) (
r :
R) :
Rmax_list l <=
r ->
forall x,
In x l ->
x <=
r.
Proof.
Lemma Rmax_list_In (
l :
list R):
([] <>
l) ->
In (
Rmax_list l)
l.
Proof.
induction l.
-
simpl ;
firstorder.
-
intros H.
simpl in *.
destruct l.
--
now left.
--
assert ([] <>
r ::
l)
by apply nil_cons.
specialize (
IHl H0) ;
clear H0.
destruct (
Rle_dec a (
Rmax_list (
r ::
l))).
++
rewrite Rmax_right.
now right ;
assumption.
assumption.
++
rewrite Rmax_left.
now left.
left ;
apply ROrder.not_ge_lt ;
assumption.
Qed.
Lemma Rmax_list_lub (
l :
list R) (
r :
R):
([] <>
l) -> (
forall x,
In x l ->
x <=
r) ->
Rmax_list l <=
r.
Proof.
Lemma Rmax_list_le_iff {
l :
list R} (
hl : [] <>
l) (
r :
R):
Rmax_list l <=
r <-> (
forall x,
In x l ->
x <=
r) .
Proof.
Lemma Rmax_list_lt_iff {
l :
list R} (
hl : [] <>
l) (
r :
R):
Rmax_list l <
r <-> (
forall x,
In x l ->
x <
r) .
Proof.
Definition Rmax_list_incl l1 l2 :
nil <>
l1 ->
incl l1 l2 ->
Rmax_list l1 <=
Rmax_list l2.
Proof.
Global Instance Rmax_list_equivlist :
Proper (
equivlist ==>
eq)
Rmax_list.
Proof.
unfold Proper,
respectful;
intros x y equivs.
destruct x.
-
symmetry in equivs.
apply equivlist_nil in equivs.
subst;
simpl;
trivial.
-
destruct y.
+
apply equivlist_nil in equivs.
discriminate.
+
apply equivlist_incls in equivs.
destruct equivs.
generalize (
Rmax_list_incl (
r::
x) (
r0::
y));
intros HH1.
generalize (
Rmax_list_incl (
r0::
y) (
r::
x));
intros HH2.
cut_to HH1;
trivial;
try discriminate.
cut_to HH2;
trivial;
try discriminate.
lra.
Qed.
Lemma Rmax_list_sum {
A B} {
la :
list A} (
lb :
list B) (
f :
A ->
B ->
R) (
Hla : [] <>
la):
Rmax_list (
List.map (
fun a =>
list_sum (
List.map (
f a)
lb))
la) <=
list_sum (
List.map (
fun b =>
Rmax_list (
List.map (
fun a =>
f a b)
la))
lb).
Proof.
Lemma Rmax_list_cons_cons (
l :
list R) (
a b :
R) :
Rmax_list (
a ::
b ::
l) =
Rmax a (
Rmax_list (
b ::
l)).
Proof.
constructor.
Qed.
Lemma Rmax_list_Rmax_swap (
l :
list R) (
a b :
R) :
Rmax a (
Rmax_list (
b ::
l)) =
Rmax b (
Rmax_list (
a ::
l)).
Proof.
Lemma Rmax_list_cons (
x0 :
R) (
l1 l2 :
list R) :
Permutation l1 l2 -> (
Rmax_list l1 =
Rmax_list l2) ->
Rmax_list (
x0 ::
l1) =
Rmax_list(
x0 ::
l2).
Proof.
intros Hpl Hrl.
case_eq l1.
*
intro Hl.
rewrite Hl in Hpl.
set (
Permutation_nil Hpl).
now rewrite e.
*
case_eq l2.
++
intro Hl2.
rewrite Hl2 in Hpl.
symmetry in Hpl.
set (
Permutation_nil Hpl).
now rewrite e.
++
intros r l H r0 l0 H0.
rewrite <-
H0, <-
H.
simpl ;
rewrite Hrl.
now rewrite H0,
H.
Qed.
Lemma Rmax_list_cons_swap (
x0 y0 :
R) (
l1 l2 :
list R) :
Permutation l1 l2 -> (
Rmax_list l1 =
Rmax_list l2) ->
Rmax_list (
x0 ::
y0 ::
l1) =
Rmax_list(
y0 ::
x0 ::
l2).
Proof.
Global Instance Rmax_list_Proper :
Proper (@
Permutation R ++>
eq)
Rmax_list.
Proof.
Definition Rmax_list_map {
A} (
l :
list A) (
f :
A ->
R) :=
Rmax_list (
List.map f l).
Declare Scope rmax_scope.
Notation "
Max_{
l } (
f )" := (
Rmax_list (
List.map f l)) (
at level 50) :
rmax_scope.
Open Scope rmax_scope.
Delimit Scope rmax_scope with rmax.
Lemma Rmax_list_map_exist {
A} (
f :
A ->
R) (
l :
list A) :
[] <>
l ->
exists a:
A,
In a l /\
f a =
Max_{
l}(
f).
Proof.
Lemma exists_in_strengthen_dec {
A} (
P:
A->
Prop)
l (
dec:
forall x, {
P x} + {~
P x})
(
ex:
exists x,
In x l /\
P x) : {
x |
In x l /\
P x}.
Proof.
induction l;
simpl.
-
elimtype False.
destruct ex ;
intuition.
-
destruct (
dec a).
+
exists a ;
eauto.
+
destruct IHl as [
x [
inx px]].
*
destruct ex as [
x [
inx px]].
destruct inx.
--
congruence.
--
eauto.
*
eauto.
Qed.
Lemma Rmax_list_map_exist_sig {
A} (
f :
A ->
R) {
l :
list A} :
[] <>
l -> {
a:
A |
In a l /\
f a =
Max_{
l}(
f)}.
Proof.
Definition argmax {
A} {
l :
list A} (
hl : [] <>
l)(
f :
A ->
R) :
A :=
proj1_sig (
Rmax_list_map_exist_sig f hl).
Lemma argmax_is_max {
A} {
l :
list A} (
hl : [] <>
l) (
f :
A->
R) :
f (
argmax hl f) =
Max_{
l}(
f).
Proof.
Lemma argmax_in_list {
A} {
l :
list A} (
hl : [] <>
l) (
f :
A ->
R):
In (
argmax hl f)
l.
Proof.
Global Instance Rmax_eq_Proper {
A} {
l :
list A} (
hl : [] <>
l) :
Proper (
pointwise_relation _ eq ++>
eq) (@
Rmax_list_map A l).
Proof.
Lemma Rmax_list_prod_le {
A B} (
f :
A ->
B ->
R) {
la :
list A} {
lb :
list B}
(
Hla : [] <>
la) (
Hlb : [] <>
lb) :
Max_{
la}(
fun a =>
Max_{
lb} (
fun b =>
f a b)) =
Max_{
list_prod la lb} (
fun ab =>
f (
fst ab) (
snd ab)).
Proof.
Lemma Rmax_list_prod_le' {
A B} (
f :
A ->
B ->
R) {
la :
list A} {
lb :
list B}
(
Hla : [] <>
la) (
Hlb : [] <>
lb) :
Max_{
lb}(
fun b =>
Max_{
la} (
fun a =>
f a b)) =
Max_{
list_prod la lb} (
fun ab =>
f (
fst ab) (
snd ab)).
Proof.
Lemma Rmax_list_map_comm {
A B} (
f :
A ->
B ->
R) {
la :
list A} {
lb :
list B}
(
Hla : [] <>
la) (
Hlb : [] <>
lb) :
Max_{
la}(
fun a =>
Max_{
lb} (
fun b =>
f a b)) =
Max_{
lb}(
fun b =>
Max_{
la} (
fun a =>
f a b)) .
Proof.
etransitivity; [|
symmetry].
-
apply Rmax_list_prod_le ;
trivial.
-
apply Rmax_list_prod_le';
trivial.
Qed.
Lemma Rmax_list_prod_combine {
A B} (
f :
A ->
B ->
R) {
la :
list A} {
lb :
list B}
(
Hla : [] <>
la) (
Hlb : [] <>
lb) :
equivlist (
list_prod la lb) (
combine la lb) ->
Max_{
la}(
fun a =>
Max_{
lb} (
fun b =>
f a b)) =
Max_{
combine la lb} (
fun ab =>
f (
fst ab) (
snd ab)).
Proof.
Lemma Rmax_list_minus_le {
A} {
B :
A ->
Type} (
f g :
forall a,
B a ->
R) (
la :
forall a,
list (
B a)):
forall a:
A, (
Max_{
la a}(
f a) -
Max_{
la a}(
g a)) <=
Max_{
la a}(
fun x =>
f a x -
g a x).
Proof.
Lemma Rmax_list_minus_le_abs {
A} (
f g :
A ->
R) (
la :
list A):
Rabs (
Max_{
la}(
f) -
Max_{
la}(
g)) <=
Max_{
la}(
fun a =>
Rabs(
f a -
g a)).
Proof.
Lemma Rmax_list_minus_le_abs' {
A} {
B :
A ->
Type} (
f g :
forall a,
B a ->
R) (
la :
forall a,
list (
B a)):
forall a:
A,
Rabs (
Max_{
la a}(
f a) -
Max_{
la a}(
g a)) <=
Max_{
la a}(
fun x =>
Rabs(
f a x -
g a x)).
Proof.
Lemma Rmax_list_fun_swap {
A B} {
lf :
list(
A ->
B)}{
la :
list A}
(
g :
B -> (
A ->
B) ->
R)
(
hl : [] <>
lf) (
hla : [] <>
la) :
Max_{
la} (
fun s =>
Max_{
lf} (
fun f =>
g (
f s)
f)) =
Max_{
lf} (
fun f =>
Max_{
List.map f la} (
fun b =>
g b f)).
Proof.
Lemma Rmax_list_le_range {
A B} (
f :
A ->
B) (
g :
B ->
R) {
la :
list A} {
lb :
list B}
(
hla : [] <>
la)
(
hf :
forall {
a},
In a la ->
In (
f a)
lb) :
Max_{
la} (
fun a =>
g(
f a)) <=
Max_{
lb} (
fun b =>
g b).
Proof.
Lemma Rmax_list_le_range' {
A B} (
g :
B ->
R)
lf {
lb :
list B}
(
hlf : [] <>
lf){
a :
A}
(
hfa :
forall f,
In (
f a)
lb) :
Max_{
lf} (
fun f =>
g(
f a)) <=
Max_{
lb} (
fun b =>
g b).
Proof.
Lemma Rmax_list_fun_le {
A} {
la :
list A}
(
f :
A ->
R) (
g :
A ->
R) :
(
forall a,
f a <=
g a) ->
Max_{
la} (
fun a =>
f a) <=
Max_{
la} (
fun a =>
g a).
Proof.
Lemma Rmax_list_fun_le' {
A B} {
la :
list A} {
lb :
list B}
(
f :
A ->
R) (
g :
B ->
R)
(
hla : [] <>
la) (
hlb : [] <>
lb) :
(
forall a b,
f a <=
g b) ->
Max_{
la} (
fun a =>
f a) <=
Max_{
lb} (
fun b =>
g b).
Proof.
Lemma Rmax_list_map_transf {
A B} (
l :
list A) (
f :
A ->
R) (
f' :
B ->
R) (
g :
A ->
B) :
(
List.Forall (
fun x =>
f x =
f'(
g x))
l) ->
Max_{
l}(
f) =
Max_{
List.map g l}(
f').
Proof.
Lemma Rmax_list_pairs {
A} (
ls :
list (
Stream A)) (
f :
Stream A ->
R) (
f':
A*
Stream A ->
R) :
(
List.Forall(
fun x =>
f x =
f'(
hd x,
tl x))
ls) ->
Max_{
ls}(
f) =
Max_{(
List.map (
fun x => (
hd x,
tl x))
ls)}(
fun x =>
f'(
x)).
Proof.
Lemma fin_fun_bounded {
A} (
finA :
Finite A) (
f :
A ->
R) : {
D |
forall a,
f a <=
D}.
Proof.
exists (
Max_{@
elms _ finA}(
f)).
intro a.
apply Rmax_spec.
rewrite in_map_iff.
exists a ;
split ;
trivial.
destruct finA ;
eauto.
Qed.
Lemma fin_fun_bounded_Rabs {
A} (
finA :
Finite A) (
f :
A ->
R) : {
D |
forall a,
Rabs(
f a) <=
D }.
Proof.
End Rmax_list.
Notation "
Max_{
l } (
f )" := (
Rmax_list (
List.map f l)) (
at level 50).
Section MDPs.
Open Scope monad_scope.
Open Scope R_scope.
Record MDP :=
mkMDP {
State and action spaces.
state :
Type;
act :
forall s:
state,
Type;
The state space has decidable equality.
st_eqdec :>
EqDec state eq;
The state and action spaces are finite.
fs :>
Finite (
state) ;
fa :>
forall s,
Finite (
act s);
The state space and the fibered action spaces are nonempty.
ne :
NonEmpty (
state) ;
na :
forall s,
NonEmpty (
act s);
Probabilistic transition structure.
t(s,a,s') is the probability that the next state is s' given that you take action a in state s.
One can also consider to to be an act-indexed collection of Kliesli arrows of Pmf.
t :
forall s :
state, (
act s ->
Pmf state);
Reward when you move to s' from s by taking action a.
reward :
forall s :
state, (
act s ->
state ->
R)
}.
Arguments outcomes {
_}.
Arguments t {
_}.
Arguments reward {
_}.
Global Existing Instance fs.
Definition dec_rule (
M :
MDP) :=
forall s :
M.(
state), (
M.(
act))
s.
Global Instance dec_rule_finite (
M :
MDP) :
Finite (
dec_rule M).
Proof.
Global Instance nonempty_dec_rule (
M :
MDP) :
NonEmpty (
dec_rule M)
:=
fun s =>
na M s.
Lemma bdd {
M} : {
D | (
forall s s':
M.(
state),
forall σ :
dec_rule M,
Rabs (
reward s (σ
s)
s') <=
D)}.
Proof.
Definition act_list (
M :
MDP) :
forall s,
list (
act M s) :=
fun s =>
let (
la,
Hla) :=
fa M s in la.
Lemma act_list_not_nil (
M :
MDP) :
forall s, [] <>
act_list M s.
Proof.
Definition policy (
M :
MDP) :=
Stream (
dec_rule M).
Context {
M :
MDP}.
Context (σ :
dec_rule M).
Definition bind_stoch_iter (
n :
nat) (
init :
M.(
state)) :
Pmf M.(
state):=
applyn (
Pmf_pure init) (
fun y =>
Pmf_bind y (
fun s =>
t s (σ
s)))
n.
Definition bind_stoch_iter_str (π :
Stream(
dec_rule M)) (
n :
nat) (
init :
M.(
state))
:
Pmf M.(
state):=
applyn (
Pmf_pure init) (
fun y =>
Pmf_bind y (
fun s =>
t s (
Str_nth n π
s)))
n.
Lemma bind_stoch_iter_1 (
init :
M.(
state)) :
bind_stoch_iter 1
init =
t init (σ
init).
Proof.
Definition step_expt_reward :
state M ->
R :=
(
fun s =>
expt_value (
t s (σ
s)) (
reward s (σ
s))).
Definition expt_reward (
init :
M.(
state)) (
n :
nat) :
R :=
expt_value (
bind_stoch_iter n init) (
step_expt_reward).
Lemma expt_reward0_eq_reward :
forall init :
M.(
state),
expt_reward init 0 = (
step_expt_reward init).
Proof.
Lemma expt_reward_succ (
n :
nat) :
forall init :
M.(
state),
expt_reward init (
S n) =
expt_value (
bind_stoch_iter n init) (
fun s :
state M =>
expt_value (
t s (σ
s)) (
step_expt_reward)).
Proof.
Lemma expt_reward_le_max_Rabs {
D :
R} (
init :
M.(
state)) :
(
forall s s':
M.(
state),
Rabs (
reward s (σ
s)
s') <=
D) ->
(
forall n:
nat,
Rabs (
expt_reward init n) <=
D).
Proof.
End MDPs.
Section Rfct_AbelianGroup.
Definition Rfct (
A :
Type) {
fin :
Finite A} :=
A ->
R.
Context (
A :
Type) {
finA :
Finite A}.
Definition Rfct_zero :
Rfct A :=
fun x => 0.
Definition Rfct_plus (
f :
Rfct A) (
g :
Rfct A) :=
fun x => (
f x) + (
g x).
Definition Rfct_opp (
f :
Rfct A) :=
fun x =>
opp (
f x).
Definition Rfct_scal (
r :
R) (
f :
Rfct A) :=
fun x =>
scal r (
f x).
Definition Rfct_le (
f g :
Rfct A) :=
forall a :
A,
f a <=
g a.
Definition Rfct_ge (
f g :
Rfct A) :=
forall a :
A,
f a >=
g a.
Global Instance Rfct_le_pre :
PreOrder Rfct_le.
Proof.
unfold Rfct_le.
split;
red;
intros.
-
lra.
-
specialize (
H a);
specialize (
H0 a);
lra.
Qed.
Global Instance Rfct_ge_pre :
PreOrder Rfct_ge.
Proof.
unfold Rfct_ge.
split;
red;
intros.
-
lra.
-
specialize (
H a);
specialize (
H0 a);
lra.
Qed.
Lemma Rfct_not_ge_lt (
f g :
Rfct A) :
not (
Rfct_ge f g) <-> (
exists a :
A,
f a <
g a).
Proof.
Lemma Rfct_not_le_gt (
f g :
Rfct A) :
not (
Rfct_le f g) <-> (
exists a :
A,
f a >
g a).
Proof.
Lemma Rfct_ge_not_lt (
f g :
Rfct A) : (
Rfct_ge f g) <->
not (
exists a :
A,
f a <
g a).
Proof.
Lemma Rfct_le_not_gt (
f g :
Rfct A) : (
Rfct_le f g) <->
not (
exists a :
A,
f a >
g a).
Proof.
Lemma Rfct_le_ge_symm (
f g :
Rfct A) :
Rfct_le f g <->
Rfct_ge g f.
Proof.
split;
intros.
-
intro a.
specialize (
H a).
now apply Rle_ge.
-
intro a.
specialize (
H a).
now apply Rge_le.
Qed.
Definition monotone_le (
F :
Rfct A ->
Rfct A) :=
forall f g,
Rfct_le f g ->
Rfct_le (
F f) (
F g).
Definition monotone_ge (
F :
Rfct A ->
Rfct A) :=
forall f g,
Rfct_ge f g ->
Rfct_ge (
F f) (
F g).
Lemma Rfct_eq_ext (
f g:
Rfct A) : (
forall x,
f x =
g x) ->
f =
g.
Proof.
Lemma Rfct_le_antisym (
f g :
Rfct A) :
f =
g <->
Rfct_le f g /\
Rfct_le g f.
Proof.
split ;
intros.
-
split.
++
intros a.
set (
equal_f H).
specialize (
e a).
now right.
++
intros a.
set (
equal_f H).
specialize (
e a).
now right.
-
destruct H as [
Hl Hr].
apply Rfct_eq_ext.
intro a.
specialize (
Hl a).
specialize (
Hr a).
apply Rle_antisym ;
auto.
Qed.
Lemma Rfct_plus_comm (
f g:
Rfct A) :
Rfct_plus f g =
Rfct_plus g f.
Proof.
Lemma Rfct_plus_assoc (
x y z:
Rfct A) :
Rfct_plus x (
Rfct_plus y z) =
Rfct_plus (
Rfct_plus x y)
z.
Proof.
Lemma Rfct_plus_zero_r (
x:
Rfct A) :
Rfct_plus x Rfct_zero =
x.
Proof.
Lemma Rfct_plus_opp_r (
f:
Rfct A) :
Rfct_plus f (
Rfct_opp f) =
Rfct_zero.
Proof.
Definition Rfct_AbelianGroup_mixin :=
AbelianGroup.Mixin (
Rfct A)
Rfct_plus Rfct_opp Rfct_zero Rfct_plus_comm
Rfct_plus_assoc Rfct_plus_zero_r Rfct_plus_opp_r.
Canonical Rfct_AbelianGroup :=
AbelianGroup.Pack (
Rfct A) (
Rfct_AbelianGroup_mixin) (
Rfct A).
End Rfct_AbelianGroup.
Section Rfct_ModuleSpace.
Context (
A :
Type) {
finA :
Finite A}.
Lemma Rfct_scal_assoc (
x y :
R) (
u:
Rfct A) :
Rfct_scal A x (
Rfct_scal A y u) =
Rfct_scal A (
x*
y)
u.
Proof.
Lemma Rfct_scal_one (
u:
Rfct A) :
Rfct_scal A R1 u =
u.
Proof.
Lemma Rfct_scal_distr_l x (
u v:
Rfct A) :
Rfct_scal A x (
Rfct_plus A u v)
=
Rfct_plus A (
Rfct_scal A x u) (
Rfct_scal A x v).
Proof.
Lemma Rfct_scal_distr_r (
x y:
R) (
u:
Rfct A) :
Rfct_scal A (
Rplus x y)
u
=
Rfct_plus A (
Rfct_scal A x u) (
Rfct_scal A y u).
Proof.
Definition Rfct_ModuleSpace_mixin :=
ModuleSpace.Mixin R_Ring (
Rfct_AbelianGroup A) (
Rfct_scal A)
Rfct_scal_assoc
Rfct_scal_one Rfct_scal_distr_l Rfct_scal_distr_r.
Canonical Rfct_ModuleSpace :=
ModuleSpace.Pack R_Ring (
Rfct A)
(
ModuleSpace.Class R_Ring (
Rfct A)
_ Rfct_ModuleSpace_mixin) (
Rfct A).
End Rfct_ModuleSpace.
Section Rfct_UniformSpace.
Context (
A :
Type) {
finA :
Finite A}.
Definition Rmax_norm :
Rfct A ->
R :=
let (
ls,
_) :=
finA in fun (
f:
Rfct A) =>
Max_{
ls}(
fun s =>
Rabs (
f s)).
Definition Rmax_ball :=
fun (
f:
Rfct A)
eps g =>
Rmax_norm (
fun s =>
minus (
g s) (
f s)) <
eps.
Lemma Rmax_ball_le (
f g :
Rfct A) {
eps1 eps2 :
R} :
eps1 <=
eps2 ->
Rmax_ball f eps1 g ->
Rmax_ball f eps2 g.
Proof.
Lemma Rmax_ball_center (
f :
Rfct A) (
e :
posreal) :
Rmax_ball f e f.
Proof.
Lemma Rmax_ball_sym (
f g :
Rfct A) (
e :
R) :
Rmax_ball f e g ->
Rmax_ball g e f.
Proof.
Lemma Rmax_ball_triangle (
f g h :
Rfct A) (
e1 e2 :
R) :
Rmax_ball f e1 g ->
Rmax_ball g e2 h ->
Rmax_ball f (
e1 +
e2)
h.
Proof.
Definition Rfct_UniformSpace_mixin :=
UniformSpace.Mixin (
Rfct A)
Rmax_ball Rmax_ball_center Rmax_ball_sym Rmax_ball_triangle.
Definition Rfct_UniformSpace :
UniformSpace :=
UniformSpace.Pack (
Rfct A)
Rfct_UniformSpace_mixin (
Rfct A).
End Rfct_UniformSpace.
Section Rfct_NormedModule.
Context (
A :
Type) {
finA :
Finite A}.
Canonical Rfct_NormedModuleAux :=
NormedModuleAux.Pack R_AbsRing (
Rfct A)
(
NormedModuleAux.Class R_AbsRing (
Rfct A)
(
ModuleSpace.class _ (
Rfct_ModuleSpace A))
(
Rfct_UniformSpace_mixin A)) (
Rfct A).
Lemma Rfct_norm_triangle f g:
(
Rmax_norm A (
Rfct_plus A f g)) <= (
Rmax_norm A f) + (
Rmax_norm A g).
Proof.
Lemma Rfct_norm_ball1 :
forall (
f g :
Rfct A) (
eps :
R),
Rmax_norm A (
minus g f) <
eps -> @
Hierarchy.ball (
NormedModuleAux.UniformSpace R_AbsRing Rfct_NormedModuleAux)
f eps g.
Proof.
intros f g eps H.
unfold ball ; simpl. apply H.
Qed.
Lemma Rfct_norm_ball2 (
f g :
Rfct A) (
eps :
posreal) :
@
Hierarchy.ball (
NormedModuleAux.UniformSpace R_AbsRing Rfct_NormedModuleAux)
f eps g ->
Rmax_norm A (
minus g f) < 1 *
eps.
Proof.
intros Hball.
repeat red in Hball.
rewrite Rmult_1_l.
apply Hball.
Qed.
Lemma Rfct_norm_scal_aux:
forall (
l :
R) (
f :
Rfct A),
Rmax_norm A (
scal l f) <=
Rabs l *
Rmax_norm A f .
Proof.
Lemma Rfct_norm_eq_0:
forall (
f:
Rfct A), (
Rmax_norm A f) = 0 ->
f =
zero.
Proof.
Definition Rfct_NormedModule_mixin :=
NormedModule.Mixin R_AbsRing Rfct_NormedModuleAux (
Rmax_norm A) 1%
R Rfct_norm_triangle Rfct_norm_scal_aux
Rfct_norm_ball1 Rfct_norm_ball2 Rfct_norm_eq_0.
Canonical Rfct_NormedModule :=
NormedModule.Pack R_AbsRing (
Rfct A)
(
NormedModule.Class _ _ _ Rfct_NormedModule_mixin) (
Rfct A).
End Rfct_NormedModule.
Section Rfct_open_closed.
Context (
A :
Type) {
finA :
Finite A}.
Lemma Rmax_ball_compat (
f g :
Rfct A) (
eps :
posreal) :
ball f eps g <->
Rmax_ball A f eps g.
Proof.
Lemma Rmax_open_compat (ϕ :
Rfct A ->
Prop) :
open ϕ <-> @
open (
Rfct_UniformSpace A) ϕ.
Proof.
unfold open,
locally,
ball,
fct_ball.
simpl.
setoid_rewrite Rmax_ball_compat.
split ;
trivial.
Qed.
Lemma forall_lt {
f g} {
ne :
NonEmpty A} : (
forall a :
A,
g a <
f a) -> (0 <
Rmax_norm A (
fun a =>
minus (
f a) (
g a))).
Proof.
Lemma le_max {
f h} : (
forall a :
A,
h a >
Rmax_norm A f) -> (
forall a :
A,
h a >
f a).
Proof.
Global Instance closed_Proper :
Proper (
pointwise_relation (
Rfct_UniformSpace A)
iff ==>
Basics.impl)
closed.
Proof.
intros x y H H0.
eapply closed_ext ; eauto.
Qed.
Global Instance closed_Proper_flip :
Proper (
pointwise_relation (
Rfct_UniformSpace A)
iff ==>
Basics.flip Basics.impl)
closed.
Proof.
intros x y H H0.
eapply closed_ext ; eauto. symmetry.
apply H.
Qed.
Global Instance open_Proper :
Proper (
pointwise_relation (
Rfct_UniformSpace A)
iff ==>
Basics.impl)
open.
Proof.
intros x y H H0.
eapply open_ext ; eauto.
Qed.
Global Instance open_Proper_flip :
Proper (
pointwise_relation (
Rfct_UniformSpace A)
iff ==>
Basics.flip Basics.impl)
open.
Proof.
intros x y H H0.
eapply open_ext ; eauto. symmetry.
apply H.
Qed.
Theorem lt_open (
f :
Rfct A) : @
open (
Rfct_UniformSpace A) (
fun g => (
exists a,
g a <
f a)).
Proof.
rewrite <-
Rmax_open_compat.
unfold open,
locally,
ball.
simpl.
unfold fct_ball,
ball.
simpl.
intros g Hgf.
unfold AbsRing_ball.
simpl.
setoid_rewrite Rminus_lt_0 in Hgf.
destruct Hgf as [
a0 Ha0].
pose (
eps :=
mkposreal _ Ha0).
exists (
mkposreal _ (
is_pos_div_2 eps)).
simpl.
intros y Hyg.
exists a0.
rewrite Rminus_lt_0.
assert (
h1 : (
f a0 -
g a0) = ((
f a0 -
y a0) + (
y a0 -
g a0)))
by ring.
clear eps.
rewrite h1 in Ha0.
assert (
h2 : (
f a0 -
y a0) + (
y a0 -
g a0) <= (
f a0 -
y a0) +
Rabs(
y a0 -
g a0)).
{
apply Rplus_le_compat_l.
apply Rle_abs.
}
assert (
h3 : (
f a0 -
y a0) +
Rabs (
y a0 -
g a0) <= (
f a0 -
y a0) + (
f a0 -
g a0) / 2).
{
apply Rplus_le_compat_l.
left.
apply Hyg.
}
assert (
h4 :
f a0 -
g a0 <=
f a0 -
y a0 + (
f a0 -
g a0) / 2).
{
eapply Rle_trans.
rewrite h1.
apply h2.
apply h3.
}
assert (
h5 : (
f a0 -
g a0)/2 <=
f a0 -
y a0).
{
rewrite <-
Rle_minus_l in h4.
now field_simplify in h4.
}
specialize (
Hyg a0).
eapply Rlt_le_trans ;
last apply h5.
eapply Rle_lt_trans ;
last apply Hyg.
apply Rabs_pos.
Qed.
Theorem ge_closed (
f :
Rfct A) :
@
closed (
Rfct_UniformSpace A) (
fun g =>
Rfct_ge A g f).
Proof.
Theorem gt_open (
f :
Rfct A) : @
open (
Rfct_UniformSpace A) (
fun g => (
exists a,
g a >
f a)).
Proof.
rewrite <-
Rmax_open_compat.
unfold open,
locally,
ball.
simpl.
unfold fct_ball,
ball.
simpl.
intros g Hgf.
unfold AbsRing_ball.
setoid_rewrite Rminus_lt_0 in Hgf.
destruct Hgf as [
a0 Ha0].
pose (
eps :=
mkposreal _ Ha0).
exists (
mkposreal _ (
is_pos_div_2 eps)).
simpl.
intros y Hyg.
exists a0.
apply Rminus_gt.
assert (
h1 : (
g a0 -
f a0) = ((
g a0 -
y a0) + (
y a0 -
f a0)))
by ring.
clear eps.
rewrite h1 in Ha0.
assert (
h2 : (
g a0 -
y a0) + (
y a0 -
f a0) <=
Rabs(
g a0 -
y a0) + (
y a0 -
f a0)).
{
apply Rplus_le_compat_r.
apply Rle_abs.
}
assert (
h3 :
Rabs(
g a0 -
y a0) + (
y a0 -
f a0) <= ((
g a0 -
f a0) / 2) + (
y a0 -
f a0)).
{
rewrite Rabs_minus_sym.
apply Rplus_le_compat_r.
left.
apply Hyg.
}
assert (
h4 : (
g a0 -
f a0) <= (
g a0 -
f a0) / 2 + (
y a0 -
f a0)).
{
eapply Rle_trans.
rewrite h1.
apply h2.
apply h3.
}
assert (
h5 : (
g a0 -
f a0)/2 <=
y a0 -
f a0).
{
rewrite Rplus_comm in h4.
rewrite <-
Rle_minus_l in h4.
field_simplify in h4.
lra.
}
specialize (
Hyg a0).
eapply Rlt_le_trans ;
last apply h5.
eapply Rle_lt_trans ;
last apply Hyg.
apply Rabs_pos.
Qed.
Theorem le_closed (
f :
Rfct A) :
@
closed (
Rfct_UniformSpace A) (
fun g =>
Rfct_le A g f).
Proof.
End Rfct_open_closed.
Global Instance Filter_prop {
A} (
fin :
Finite A) (
F : (
Rfct_UniformSpace A ->
Prop) ->
Prop) (
ff :
Filter F) :
Proper (
pointwise_relation (
Rfct_UniformSpace A)
iff ==>
Basics.flip Basics.impl)
F.
Proof.
intros A'
B'
Ha.
unfold Basics.flip,
Basics.impl.
apply filter_imp.
intro a.
specialize (
Ha a).
now destruct Ha.
Qed.
Section Rfct_CompleteSpace.
Context (
A :
Type) {
finA :
Finite A}.
Lemma close_lim_Rfct :
forall F1 F2,
filter_le F1 F2 ->
filter_le F2 F1 -> @
close (
Rfct_UniformSpace A) (
lim_fct F1) (
lim_fct F2).
Proof.
intros F1 F2 H H0.
unfold close.
intros eps.
apply Rmax_ball_compat.
apply (
close_lim_fct F1 F2 H H0 eps).
Qed.
Lemma complete_cauchy_Rfct :
forall F : (
Rfct_UniformSpace A ->
Prop) ->
Prop,
ProperFilter F ->
(
forall (
eps :
posreal),
exists f,
F (
Rmax_ball A f eps))
->
forall eps :
posreal,
F (
Rmax_ball A (
lim_fct F)
eps).
Proof.
intros F ProperF CauchyF eps.
eapply filter_imp.
-
intro f.
now rewrite <-
Rmax_ball_compat.
-
setoid_rewrite <-
Rmax_ball_compat.
set (
complete_cauchy_fct F).
apply f ;
trivial.
intros eps'.
destruct (
CauchyF eps')
as [
f'
Hf'].
exists f'.
eapply filter_imp ;
last apply Hf'.
intros h Hmax.
now apply Rmax_ball_compat.
Qed.
Definition Rfct_CompleteSpace_mixin :=
CompleteSpace.Mixin (
Rfct_UniformSpace A)
lim_fct complete_cauchy_Rfct close_lim_Rfct.
Canonical Rfct_CompleteSpace :=
CompleteSpace.Pack (
Rfct A) (
CompleteSpace.Class _ _ Rfct_CompleteSpace_mixin) (
Rfct A).
Canonical Rfct_CompleteNormedModule :=
CompleteNormedModule.Pack _ (
Rfct A) (
CompleteNormedModule.Class R_AbsRing _ (
NormedModule.class _ (
Rfct_NormedModule A))
Rfct_CompleteSpace_mixin) (
Rfct A).
End Rfct_CompleteSpace.
Section fixpt.
Context {
K :
AbsRing}{
X :
CompleteNormedModule K}.
Definition fixpt (
F :
X ->
X) (
init :
X) :=
lim (
fun P =>
eventually (
fun n =>
P (
fixed_point.iter F n init))).
Context {
F :
X ->
X} (
hF :
is_contraction F) (
phi :
X ->
Prop)
(
fphi :
forall x :
X,
phi x ->
phi (
F x))
(
phic :
closed phi).
Lemma fixpt_init_unique {
init1 init2 :
X} (
Hphi1 :
phi init1) (
Hphi2 :
phi init2):
fixpt F init1 =
fixpt F init2.
Proof.
Context (
init:
NonEmpty X) (
init_phi:
phi init).
Lemma fixpt_is_fixpt :
F (
fixpt F init) =
fixpt F init.
Proof.
Lemma fixpt_is_unique :
forall g :
X,
phi g ->
F g =
g ->
g =
fixpt F init.
Proof.
Theorem metric_coinduction :
phi (
fixpt F init).
Proof.
End fixpt.
Section contraction_coinduction.
Context (
A :
Type) {
finm :
Finite A}.
Lemma monotone_le_preserv (
F :
Rfct A ->
Rfct A) (
f :
Rfct A) :
monotone_le A F -> (
Rfct_le A (
F f)
f) -> (
forall g,
Rfct_le A g f ->
Rfct_le A (
F g)
f).
Proof.
intros Hm Hle g Hgf.
unfold monotone_le in Hm.
unfold Rfct_le in *.
specialize (
Hm g f Hgf) ;
clear Hgf.
intro a.
specialize (
Hm a).
specialize (
Hle a).
eapply Rle_trans ;
first apply Hm.
assumption.
Qed.
Lemma monotone_ge_preserv (
F :
Rfct A ->
Rfct A) (
f :
Rfct A) :
monotone_ge A F -> (
Rfct_ge A (
F f)
f) -> (
forall g,
Rfct_ge A g f ->
Rfct_ge A (
F g)
f).
Proof.
intros Hm Hge g Hgf.
unfold monotone_ge in Hm.
unfold Rfct_ge in *.
specialize (
Hm g f Hgf) ;
clear Hgf.
intro a.
specialize (
Hm a).
specialize (
Hge a).
eapply Rge_trans ;
first apply Hm.
assumption.
Qed.
Theorem contraction_coinduction_Rfct_le
{
F} (
hF : @
is_contraction (
Rfct_CompleteSpace A) (
Rfct_CompleteSpace A)
F)
(
hM :
monotone_le A F)
:
forall f init,
Rfct_le A (
F f)
f ->
Rfct_le A (
fixpt F init)
f.
Proof.
Theorem contraction_coinduction_Rfct_ge
{
F} (
hF : @
is_contraction (
Rfct_CompleteSpace A) (
Rfct_CompleteSpace A)
F)
(
hM :
monotone_ge A F)
:
forall f init,
Rfct_ge A (
F f)
f ->
Rfct_ge A (
fixpt F init)
f.
Proof.
Corollary contraction_coinduction_Rfct_le'
{
F} (
hF : @
is_contraction (
Rfct_CompleteSpace A) (
Rfct_CompleteSpace A)
F)
(
hM :
monotone_le A F)
:
forall f init,
Rfct_ge A f (
F f) ->
Rfct_ge A f (
fixpt F init).
Proof.
Corollary contraction_coinduction_Rfct_ge'
{
F} (
hF : @
is_contraction (
Rfct_CompleteSpace A) (
Rfct_CompleteSpace A)
F)
(
hM :
monotone_ge A F)
:
forall f init,
Rfct_le A f (
F f) ->
Rfct_le A f (
fixpt F init).
Proof.
End contraction_coinduction.
Section ltv.
Open Scope R_scope.
Context {
M :
MDP} (γ :
R).
Context (σ :
dec_rule M) (
init :
M.(
state)) (
hγ : (0 <= γ < 1)%
R).
Arguments reward {
_}.
Arguments outcomes {
_}.
Arguments t {
_}.
Global Instance Series_proper :
Proper (
pointwise_relation _ eq ==>
eq) (
Series).
Proof.
Definition ltv_part (
N :
nat) :=
sum_n (
fun n => γ^
n * (
expt_reward σ
init n))
N.
Lemma ltv_part0_eq_reward :
ltv_part 0 = (
step_expt_reward σ
init).
Proof.
Lemma sum_mult_geom (
D :
R) :
infinite_sum (
fun n =>
D*γ^
n) (
D/(1 - γ)).
Proof.
rewrite infinite_sum_infinite_sum'.
apply infinite_sum'
_mult_const.
rewrite <-
infinite_sum_infinite_sum'.
apply is_series_Reals.
apply is_series_geom.
rewrite Rabs_pos_eq.
lra.
lra.
Qed.
Lemma ex_series_mult_geom (
D:
R) :
ex_series (
fun n =>
D*γ^
n).
Proof.
exists (
D/(1-γ)).
rewrite is_series_Reals.
apply sum_mult_geom.
Qed.
Lemma ltv_part_le_norm (
N :
nat) :
Rabs(
ltv_part N) <=
sum_f_R0 (
fun n => γ^
n * (
proj1_sig (@
bdd M)))
N.
Proof.
Theorem ex_series_ltv s0 :
ex_series (
fun n => γ^
n * (
expt_reward σ
s0 n)).
Proof.
Definition ltv :
M.(
state) ->
R:=
fun s =>
Series (
fun n => γ^
n * (
expt_reward σ
s n)).
Definition expt_ltv (
p :
Pmf M.(
state)) :
R :=
expt_value p ltv.
Lemma Pmf_bind_comm_stoch_bind (
n :
nat) :
Pmf_bind (
bind_stoch_iter σ
n init) (
fun a :
state M =>
t a (σ
a)) =
Pmf_bind (
t init (σ
init)) (
fun a :
state M =>
bind_stoch_iter σ
n a).
Proof.
Lemma ltv_corec :
ltv init = (
step_expt_reward σ
init) + γ*
expt_value (
t init (σ
init))
ltv.
Proof.
End ltv.
Section operator.
Open Scope R_scope.
Context {
M :
MDP} (γ :
R).
Context (
hγ : (0 <= γ < 1)%
R).
Arguments reward {
_}.
Arguments outcomes {
_}.
Arguments t {
_}.
Definition bellman_op (π :
dec_rule M) : @
Rfct M.(
state) (
fs M) -> @
Rfct M.(
state) (
fs M) :=
fun W =>
fun s => (
step_expt_reward π
s + γ*(
expt_value (
t s (π
s))
W)).
Theorem is_contraction_bellman_op (π :
dec_rule M) :
@
is_contraction (
Rfct_UniformSpace M.(
state)) (
Rfct_UniformSpace M.(
state)) (
bellman_op π).
Proof.
Lemma ltv_bellman_eq_ltv :
forall π,
ltv γ π =
bellman_op π (
ltv γ π).
Proof.
Lemma ltv_bellman_op_fixpt :
forall π
init,
ltv γ π =
fixpt (
bellman_op π)
init.
Proof.
Lemma bellman_op_monotone_le (π :
dec_rule M) :
monotone_le M.(
state) (
bellman_op π).
Proof.
Lemma bellman_op_monotone_ge (π :
dec_rule M) :
monotone_ge M.(
state) (
bellman_op π).
Proof.
Definition bellman_max_op : @
Rfct M.(
state) (
fs M) -> @
Rfct M.(
state) (
fs M)
:=
fun W =>
fun s =>
let (
la,
_) :=
fa M s in
Max_{
la}(
fun a =>
expt_value (
t s a) (
reward s a) + γ*(
expt_value (
t s a)
W)).
Lemma bellman_max_op_monotone_le :
(
monotone_le M.(
state) (
bellman_max_op)).
Proof.
Lemma bellman_max_op_monotone_ge :
(
monotone_ge M.(
state)
bellman_max_op).
Proof.
Lemma bellman_op_bellman_max_le (π :
dec_rule M) :
forall W,
Rfct_le M.(
state) (
bellman_op π
W) (
bellman_max_op W).
Proof.
Theorem is_contraction_bellman_max_op
:
@
is_contraction (
Rfct_UniformSpace M.(
state)) (
Rfct_UniformSpace M.(
state))
bellman_max_op.
Proof.
Lemma ltv_Rfct_le_fixpt (π :
dec_rule M) :
forall init,
Rfct_le M.(
state) (
ltv γ π) (
fixpt (
bellman_max_op)
init).
Proof.
Definition greedy init:
dec_rule M :=
fun s =>
let V' :=
fixpt bellman_max_op in
let (
la,
Hla) :=
fa M s in
let pt :=
na M s in
@
argmax (
act M s)
la (
proj2 (
not_nil_exists _) (
ex_intro _ pt (
Hla pt)))
(
fun a =>
expt_value (
t s a) (
reward s a) +
γ*(
expt_value (
t s a) (
V'
init))).
Lemma greedy_argmax_is_max :
forall init,
let V' :=
fixpt bellman_max_op in
let σ' :=
greedy init in
bellman_op σ' (
V'
init) =
V'
init.
Proof.
Lemma exists_fixpt_policy :
forall init,
let V' :=
fixpt (
bellman_max_op)
in
let σ' :=
greedy init in
ltv γ σ' =
V'
init.
Proof.
End operator.
Section order.
Open Scope R_scope.
Context {
M :
MDP} (γ :
R).
Context (
hγ : (0 <= γ < 1)%
R).
Arguments reward {
_}.
Arguments outcomes {
_}.
Arguments t {
_}.
Definition policy_eq (σ τ :
forall s :
state M ,
act M s) :
Prop
:=
forall s, (@
ltv M γ σ
s) = (@
ltv M γ τ
s).
Global Instance policy_eq_equiv :
Equivalence policy_eq.
Proof.
constructor; repeat red; intros.
reflexivity.
now symmetry.
etransitivity; eauto.
Qed.
Definition policy_le (σ τ :
forall s :
state M ,
act M s) :
Prop
:=
forall s, (
ltv γ σ
s) <= (
ltv γ τ
s).
Global Instance policy_equiv_sub :
subrelation policy_eq policy_le.
Proof.
Global Instance Rle_trans :
Transitive Rle.
Proof.
repeat red;
intros.
eapply Rle_trans;
eauto.
Qed.
Global Instance policy_le_pre :
PreOrder policy_le.
Proof.
unfold policy_eq,
policy_le.
constructor;
red;
intros.
-
lra.
-
etransitivity;
eauto.
Qed.
Global Instance policy_le_part :
PartialOrder policy_eq policy_le.
Proof.
Definition max_ltv :
M.(
state) ->
R :=
fun s =>
let (
ld,
_) :=
dec_rule_finite M in
Max_{
ld} (
fun σ =>
ltv γ σ
s).
Theorem max_ltv_eq_fixpt :
forall init,
fixpt (
bellman_max_op γ)
init =
max_ltv.
Proof.
Theorem bellman_iterate :
forall init,
lim (
fun P =>
eventually (
fun n :
nat =>
P (@
fixed_point.iter (
CompleteNormedModule.UniformSpace R_AbsRing (@
Rfct_CompleteNormedModule (
state M) (
fs M)))
(
bellman_max_op γ)
n init))) =
max_ltv.
Proof.
Theorem bellman_op_iterate :
forall init π,
lim (
fun P =>
eventually (
fun n :
nat =>
P (@
fixed_point.iter (
CompleteNormedModule.UniformSpace R_AbsRing (@
Rfct_CompleteNormedModule (
state M) (
fs M))) (
bellman_op γ π)
n init))) =
ltv γ π.
Proof.
End order.
Section improve.
Context {
M :
MDP} (γ :
R).
Arguments reward {
_}.
Arguments outcomes {
_}.
Arguments t {
_}.
Context (
hγ : 0<=γ<1).
Theorem policy_improvement_1 (σ τ :
dec_rule M) :
(
forall s,
bellman_op γ τ (
ltv γ σ)
s >=
bellman_op γ σ (
ltv γ σ)
s)
->
forall s,
ltv γ τ
s >=
ltv γ σ
s.
Proof.
Theorem policy_improvement_2 (σ τ :
dec_rule M) :
(
forall s,
bellman_op γ τ (
ltv γ σ)
s <=
bellman_op γ σ (
ltv γ σ)
s)
->
forall s,
ltv γ τ
s <=
ltv γ σ
s.
Proof.
Definition improved (σ :
dec_rule M) {
la :
forall s,
list (
act M s)} (
hla :
forall s, [] <>
la s) :
dec_rule M :=
fun s =>
argmax (
hla s) (
fun a =>
expt_value (
t s a) (
fun s' =>
reward s a s' + γ*(
ltv γ σ
s'))).
Lemma improved_is_better_aux (σ :
dec_rule M)
la (
hla :
forall s, [] <>
la s) :
forall s,
bellman_op γ (
improved σ
hla) (
ltv γ σ)
s =
Max_{
la s}(
fun a :
act M s =>
expt_value (
t s a) (
fun s' :
state M =>
reward s a s' + γ *
ltv γ σ
s')).
Proof.
Lemma improved_is_better (σ :
dec_rule M)
la (
hla :
forall s, [] <>
la s)
(
H :
forall σ :
dec_rule M,
forall s,
In (σ
s) (
la s)):
forall s,
bellman_op γ (
improved σ
hla) (
ltv γ σ)
s >=
bellman_op γ σ (
ltv γ σ)
s.
Proof.
Definition improved_tot (σ :
dec_rule M) :
dec_rule M :=
improved σ (
act_list_not_nil M).
Lemma improved_tot_better (σ :
dec_rule M) :
forall s,
bellman_op γ (
improved_tot σ) (
ltv γ σ)
s >=
bellman_op γ σ (
ltv γ σ)
s.
Proof.
Theorem improved_has_better_value (σ :
dec_rule M) :
forall s,
ltv γ (
improved_tot σ)
s >=
ltv γ σ
s.
Proof.
End improve.