```python

import numpy as np

# Beispielhafte Definitionen (stark vereinfacht)
states = np.arange(7)  # NRS 4-10, Schmerz kontrolliert als Zustand 7
actions = np.array([0, 1, 2, 3])  # Esketamin, Fentanyl, Anpassen, Diagnostik
P = np.zeros((len(states), len(actions), len(states)))  # Übergangsmatrix
R = np.zeros((len(states), len(actions)))  # Belohnungsmatrix

# Beispielhafte Füllung von P und R mit fiktiven Daten

# Value Iteration
V = np.zeros(len(states))
gamma = 0.99
threshold = 0.01
while True:
    delta = 0
    for s in range(len(states)):
        v = V[s]
        V[s] = max(sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions)
        delta = max(delta, abs(v - V[s]))
    if delta < threshold:
        break

# Optimale Politik berechnen
policy = np.zeros(len(states), dtype=int)
for s in range(len(states)):
    policy[s] = np.argmax([sum(P[s, a, s_prime] * (R[s, a] + gamma * V[s_prime]) for s_prime in range(len(states))) for a in actions])

print("Optimale Politik:", policy)
```