34 lines
1.0 KiB
Markdown
34 lines
1.0 KiB
Markdown
```python
|
|
|
|
import numpy as np
|
|
|
|
# Beispielhafte Definitionen (stark vereinfacht)
|
|
states = np.arange(7) # NRS 4-10, Schmerz kontrolliert als Zustand 7
|
|
actions = np.array([0, 1, 2, 3]) # Esketamin, Fentanyl, Anpassen, Diagnostik
|
|
P = np.zeros((len(states), len(actions), len(states))) # Übergangsmatrix
|
|
R = np.zeros((len(states), len(actions))) # Belohnungsmatrix
|
|
|
|
# Beispielhafte Füllung von P und R mit fiktiven Daten
|
|
|
|
# Value Iteration
|
|
V = np.zeros(len(states))
|
|
gamma = 0.99
|
|
threshold = 0.01
|
|
while True:
|
|
delta = 0
|
|
for s in range(len(states)):
|
|
v = V[s]
|
|
V[s] = max(sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions)
|
|
delta = max(delta, abs(v - V[s]))
|
|
if delta < threshold:
|
|
break
|
|
|
|
# Optimale Politik berechnen
|
|
policy = np.zeros(len(states), dtype=int)
|
|
for s in range(len(states)):
|
|
policy[s] = np.argmax([sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions])
|
|
|
|
print("Optimale Politik:", policy)
|
|
```
|
|
|