1.0 KiB
1.0 KiB
import numpy as np
# Beispielhafte Definitionen (stark vereinfacht)
states = np.arange(7) # NRS 4-10, Schmerz kontrolliert als Zustand 7
actions = np.array([0, 1, 2, 3]) # Esketamin, Fentanyl, Anpassen, Diagnostik
P = np.zeros((len(states), len(actions), len(states))) # Übergangsmatrix
R = np.zeros((len(states), len(actions))) # Belohnungsmatrix
# Beispielhafte Füllung von P und R mit fiktiven Daten
# Value Iteration
V = np.zeros(len(states))
gamma = 0.99
threshold = 0.01
while True:
delta = 0
for s in range(len(states)):
v = V[s]
V[s] = max(sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions)
delta = max(delta, abs(v - V[s]))
if delta < threshold:
break
# Optimale Politik berechnen
policy = np.zeros(len(states), dtype=int)
for s in range(len(states)):
policy[s] = np.argmax([sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions])
print("Optimale Politik:", policy)