Initial commit

This commit is contained in:
2026-03-31 13:28:59 +02:00
commit 7ec43ca17d
314 changed files with 189852 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
# Apprentissage par renforcement
## Processus de décision markovien
La vidéo de ce tutoriel est disponible à l'adresse suivante:<br>
https://www.youtube.com/watch?v=Rgxs8lfoG4I

View File

@@ -0,0 +1,35 @@
import numpy as np
import time
Q=[[0, 0], [0, 0], [0, 0]]
T=[[[0.50, 0.00, 0.50], [0.00, 0.00, 1.00]],
[[0.70, 0.10, 0.20], [0.00, 0.95, 0.05]],
[[0.40, 0.00, 0.60], [0.30, 0.30, 0.40]]]
R=[[[ 0.00, 0.00, 0.00], [ 0.00, 0.00, 0.00]],
[[+5.00, 0.00, 0.00], [ 0.00, 0.00, 0.00]],
[[ 0.00, 0.00, 0.00], [-1.00, 0.00, 0.00]]]
gamma=0.95
for i in range(200):
time.sleep(0.05)
tab_somme_action=[]
for S in range(3):
for A in range(2):
somme=0
for s in range(3):
somme+=T[S][A][s]*(R[S][A][s]+gamma*np.max(Q[s]))
Q[S][A]=somme
print("---------------------------------")
print("Iteration:", i)
for S in range(3):
print()
for A in range(2):
text="Q[etat:{}, action:{}]={:+10.4f}".format(S, A, Q[S][A])
if A==np.argmax(Q[S]):
text=text+" <-"
print(text)
print("---------------------------------")