irs gan doc
TRANSCRIPT
!"##$!%&"# '$%($$# ) *+)$#$,-%&.$ /.$,0-,&-1
*$%(",203 -#/ 456+4#.$,0$ 5$",!$8$#% 6$-,#] -#/
:#$,9;<'-0$/ ="/$1
) * 456
!"#$%&'()*
!"
!+ ,-. )',(
!"#$"#%
!"# $%$&'()*$ !+*$&,)'- "$(./&0,1
!
234#2%*$&,$ 3$)%5/&6$7$%( 4$'&%)%81
! "
!"# $%$&'()*$ !+*$&,)'- "$(./&0,1
!"!#$%&# '()*#(+("$%!#
!"!#$%&# '()*#(+("$%&#
'()*(#(+("$%&# !"!#$%&#
!"#$%& ' ()* !+,"%
! "#
!"# $%&'(& !&)$*+',&-&$. "&/'$)$01
!"#$%&
!"#$%& ' ()* !+,"%
! "#
!"
!
τ −cθ(τ)
pθ(τ) =1
Z(θ)exp (−cθ(τ))
cθ(τ) =∑
t
cθ(xt, ut)
τ =
(
x1, x2, · · · , xTu1, u2, · · · , uT
)
−cθ(τ) cθ(τ)xt t xut t uτ
! "#
!"
!
τ −cθ(τ)cθ(τ)
pθ(τ) =1
Z(θ)exp (−cθ(τ))
!"#$%& '() (* '( ()
! "#
!"
!"
!" #$%&'() −∫
pθ(log pθ)dpθ
*!+,-!$ .&'/0,
*+-10 2',% 30!&$-$4
*56 *+-1 2',% 30!&$-$4 *56
! "#
!"#$ %&'( )$*+,",- .&+ /0)
!" #$%&'( Lcost(p) pθ )*%+&,-
!"#$ "% &'()
Lcost(p) = Eτ∼p[− log pθ(τ)] ./0
= Eτ∼p[cθ(τ)] + logZ(θ) .10
= Eτ∼p[cθ(τ)] + log
(
Eτ∼q
[
exp(−cθ(τ))
q(τ)
])
.20
!" #$%,&'( Z(θ)q q Lsampler(q)
! "#
!"#$ %&'( )$*+,",- .&+ /0)
cθ Z =∫
exp(cθ(τ))dθq(τ) 1
Zexp(−cθ(τ)) !
Lsampler(q) q(τ) "#$%&'(
!"#$%&' () *+,-
Lsampler(q) = KL
(
q(τ)||1
Zexp(−cθ(τ))
)
)*+
=
∫
q(τ) log1
Zexp(−cθ(τ))
q(τ)dτ ),+
= Eτ∼p[cθ(τ)] + Eτ∼q[log q(τ)] + logZ )-+
./01# 2&3$ !#4'5056 p qLcost(p) pθLsampler(q) q
! "#
!"#$ %&'( )$*+,",- .&+ /0)
q(τ) !"#$%&'() *&!"+,'- *&!"+)
p(τ)
µ ∼1
2p(τ) +
1
2q(τ)
p(τ) p̃(τ)./0 .)')$&%#$ p(τ)
!"#$ %
Lcost(p) = Eτ∼p[cθ(τ)] + log
(
Eτ∼µ
[
exp(−cθ(τ))1
2p̃(τ) + 1
2q(τ)
])
123
! " #!
!" #$%&$'$()*+,
p(τ) q(τ) !"
#$%&'$($)*+,' D∗
!"# $%&'(%)%*+,-(.
D∗(τ) =p(τ)
1
2p(τ) + 1
2q(τ)
-./
p(τ)
p(τ) =1
Zexp(−cθ(τ))
!"# $%&'(%)%*+,-( /0( θ.
Dθ(τ) =1
Zexp(−cθ(τ))
1
2Zexp(−cθ(τ)) +
1
2q(τ)
-0/
! "#
!" #$%&$'$()*+, -+%%
!"## "$ %&#'(&)*+,(-
Ldiscriminator(Dθ) = Eτ∼p[logDθ(τ)]− Eτ∼p[log(1−Dθ(τ))] !"#
= Eτ∼p
[
− log1
Zexp(−cθ(τ))
1
2Zexp(−cθ(τ)) +
1
2q(τ)
]
− Eτ∼p
[
− logq(τ)
1
2Zexp(−cθ(τ)) +
1
2q(τ)
]
!!#
$%&'(%)%*+,-( ./,0-(1 2-&&
! " !#
!"#$%&"% '
µ̃ =1
2Zexp(−cθ(τ)) +
1
2q(τ)
!"#$%"&"'()*%+
Ldiscriminator(Dθ) = Eτ∼p[logDθ(τ)]− Eτ∼p[log(1−Dθ(τ))] !"#
= Eτ∼µ
[
1
Zexp(−cθ(τ))
µ̃
]
− Eτ∼q
[
− logq(τ)
µ̃
]
!$#
= logZ + Eτ∼p[cθ(τ)] + Eτ∼p[log µ̃(τ)]
− Eτ∼q[log q(τ)] + Eτ∼q[log µ̃(τ)] !%#
! " #$
!"#$%&"% '
!"#
Ldiscriminator(Dθ) = logZ + Eτ∼p[cθ(τ)] + Eτ∼p[log µ̃(τ)]
− Eτ∼q[log q(τ)] + Eτ∼q[log µ̃(τ)]
Ldiscriminater(Dθ) Z Z
!"#$%&#$! '#()"#*#+%&!" ,#-& z
∂zLdiscriminator(Dθ) =1
Z− Eτ∼µ
[
1
Z2 exp(−cθ(τ))
µ̃
]
!$#
∂zLdiscriminator(Dθ) = 0 !%#
Z = Eτ∼µ
[
exp(−cθ(τ))
µ̃
]
!&#
! " #$
!"#$%&#$! #'("#)#*%&!"
!"#
Ldiscriminator(Dθ) = logZ + Eτ∼p[cθ(τ)] + Eτ∼p[log µ̃(τ)]
− Eτ∼q[log q(τ)] + Eτ∼q[log µ̃(τ)]
Ldiscriminater(Dθ) θ
!"#$%&#$! '#()"#*#+%&!" ,#&- θ
∂θLdiscriminator(Dθ) = Eτ∼p[∂θcθ(τ)]
− Eτ∼µ
[
1
Zexp(−cθ(τ)∂θcθ(τ)
µ̃
]
!$#
! " #$
!"#$%&#$! '() *+,&
!"
Lcost(θ) = Eτ∼p[cθ(τ)] + log
(
Eτ∼µ
[
exp(−cθ(τ))
µ̃(τ)
])
#$% Lcost(θ) θ &!" '
!"#$%&#$! '()& *#&+ θ
∂θLcost(θ) = Eτ∼p[∂θcθ(τ)] + ∂θ logEτ∼µ
[
exp(−cθ(τ))
µ̃(τ)
]
&("
= Eτ∼p[∂θcθ(τ)]
−
(
Eτ∼µ
[
exp(−cθ(τ))∂θcθ(τ)
µ̃(τ)
]
/Eτ∼µ
[
exp(−cθ(τ))
µ̃(τ)
])
= Eτ∼p[∂θcθ(τ)]−
(
Eτ∼µ
[
exp(−cθ(τ))∂θcθ(τ)
µ̃(τ)
]
/Z
)
)*"
)&"
! " #$
!"#$%&'!" ()* #!&+ ,"- ./0 -''2'",+!1
!"#$%&#$! '() *+,& - !"#$%&#$! ./0 1#,*"#2#3%&+"
!"#
∂θLdiscriminator(Dθ) = Eτ∼p[∂θcθ(τ)]
− Eτ∼µ
[
1
Zexp(−cθ(τ)∂θcθ(τ)
µ̃
]
∂θLcost(θ) = Eτ∼p[∂θcθ(τ)]−
(
Eτ∼µ
[
exp(−cθ(τ))∂θcθ(τ)
µ̃(τ)
]
/Z
)
= Eτ∼p[∂θcθ(τ)]− Eτ∼µ
[
1
Zexp(−cθ(τ))∂θcθ(τ)
µ̃(τ)
]
$$#
= ∂θLdiscriminator(Dθ) $%#
! " #$
!"#$%&'!" ()* &+,-$./ +"0 123 4."./+5!/
!" #$%&'()
!"
Lsampler(q) = Eτ∼p[cθ(τ)] + Eτ∼q[log q(τ)]
*+, -(.()$/0) 1 !2 #$%&'() 3 40.#/$./
Lgenerater(q) = Eτ∼q[log(1−D(τ))− logD((τ))] #$"
= Eτ∼q
[
logq(τ)
µ̃(τ)− log
1
Zexp(−cθ(τ))
µ̃(τ)
]
#%"
= Eτ∼q[log q(τ) + logZ + cθ(τ)] #!"
= logZ + Eτ∼q[cθ(τ)] + Eτ∼q[log q(τ)] #&"
= logZ + Lsampler(q) #'"
! " #$
!"#$%&'!"
!"#$%&
!" Lcost qθ Lsampler q−cθ(τ)
#$% Lgenerater Ldiscriminator
q(τ) = p(τ)
'() *+,
!" #$% ∂θLcost = ∂θLdiscriminator
!" #$% Lsampler(q) + logZ = Lgenerator(q)
! " #$
!"#"$%&
!"
!"#$%&
pθ(τ) =1
Z(θ)exp (−cθ(τ))
Z(θ) cθ ! " !