Initialversion: Forschungsprojekte 'HRT meets' und 'Grundsymbole' integriert
This commit is contained in:
33
Algorithmen/Code MDP Algorithmus.md
Normal file
33
Algorithmen/Code MDP Algorithmus.md
Normal file
@ -0,0 +1,33 @@
|
||||
```python
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Beispielhafte Definitionen (stark vereinfacht)
|
||||
states = np.arange(7) # NRS 4-10, Schmerz kontrolliert als Zustand 7
|
||||
actions = np.array([0, 1, 2, 3]) # Esketamin, Fentanyl, Anpassen, Diagnostik
|
||||
P = np.zeros((len(states), len(actions), len(states))) # Übergangsmatrix
|
||||
R = np.zeros((len(states), len(actions))) # Belohnungsmatrix
|
||||
|
||||
# Beispielhafte Füllung von P und R mit fiktiven Daten
|
||||
|
||||
# Value Iteration
|
||||
V = np.zeros(len(states))
|
||||
gamma = 0.99
|
||||
threshold = 0.01
|
||||
while True:
|
||||
delta = 0
|
||||
for s in range(len(states)):
|
||||
v = V[s]
|
||||
V[s] = max(sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions)
|
||||
delta = max(delta, abs(v - V[s]))
|
||||
if delta < threshold:
|
||||
break
|
||||
|
||||
# Optimale Politik berechnen
|
||||
policy = np.zeros(len(states), dtype=int)
|
||||
for s in range(len(states)):
|
||||
policy[s] = np.argmax([sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions])
|
||||
|
||||
print("Optimale Politik:", policy)
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user