Systemtheoretische Modelle und Algorithmen zur Analyse angedacht.

2025-09-03 21:38:18 +02:00
parent 6d8e50e2a3
commit 55812bc5d6
16 changed files with 4033 additions and 505 deletions
--- a/systemtheoretisch.py
+++ b/systemtheoretisch.py
@ -0,0 +1,410 @@
+from __future__ import annotations
+"""
+Visible Learning – Systemtheoretische Sicht (nach Luhmann/Erziehungssystem)
+---------------------------------------------------------------------------
+Ziel
+- Daten (Thermometer) laden
+- Psychische und soziale Adressierungen identifizieren (Mapping/Heuristik)
+- Kopplungsindex (Kommunikation ↔ Gedanke) als Indikator für Lernfähigkeit berechnen
+- CI-konforme Visualisierungen (2D & 3D) erzeugen
+
+CI
+- Verwendet die gleiche CI wie die statistische Datei (ci_template.plotly_template)
+- Farben/Styles ausschließlich über Template (keine Hardcodierung)
+"""
+
+# -----------------------------------------
+# Imports
+# -----------------------------------------
+import os
+import json
+import math
+import numpy as np
+import pandas as pd
+
+import plotly.graph_objs as go
+import plotly.io as pio
+
+# -----------------------------------------
+# Konfiguration laden (identischer Mechanismus)
+# -----------------------------------------
+# Erwartete Felder: csv_file, theme, export_fig_visual, export_fig_png
+from config_visible_learning import (
+    csv_file,
+    theme,
+    export_fig_visual,
+    export_fig_png,
+)
+
+# -----------------------------------------
+# CI-Template (identisch zu statistischer Datei)
+# -----------------------------------------
+try:
+    from ci_template import plotly_template
+    plotly_template.set_theme(theme)
+    _layout = lambda title, x, y, z='Z': plotly_template.get_standard_layout(
+        title=title, x_title=x, y_title=y, z_title=z
+    )
+    _styles = plotly_template.get_plot_styles()
+    _colors = plotly_template.get_colors()
+except Exception as _e:
+    # Fallback (neutral)
+    def _layout(title, x, y, z='Z'):
+        return dict(title=title, xaxis_title=x, yaxis_title=y)
+    _styles = {
+        "marker_accent": dict(color="#1f77b4", size=8, symbol="circle"),
+        "marker_positiveHighlight": dict(color="#2ca02c", size=8, symbol="circle"),
+        "marker_negativeHighlight": dict(color="#d62728", size=8, symbol="circle"),
+        "linie_primaryLine": dict(color="#1f77b4", width=2),
+        "linie_secondaryLine": dict(color="#ff7f0e", width=2),
+        "balken_accent": dict(color="#1f77b4"),
+    }
+    _colors = {
+        "accent": "#1f77b4",
+        "brightArea": "#66CCCC",
+        "depthArea": "#006666",
+        "positiveHighlight": "#2ca02c",
+        "negativeHighlight": "#d62728",
+        "text": "#333333",
+        "background": "#ffffff",
+        "white": "#ffffff",
+    }
+
+# -----------------------------------------
+# Export-Helfer
+# -----------------------------------------
+EXPORT_DIR = os.path.join(os.path.dirname(__file__), "export")
+os.makedirs(EXPORT_DIR, exist_ok=True)
+
+def export_figure(fig, name: str, do_html: bool, do_png: bool):
+    base = os.path.join(EXPORT_DIR, name)
+    if do_html:
+        pio.write_html(fig, file=f"{base}.html", auto_open=False, include_plotlyjs="cdn")
+    if do_png:
+        try:
+            pio.write_image(fig, f"{base}.png", scale=2)
+        except Exception:
+            pass
+
+def export_json(obj: dict, name: str):
+    p = os.path.join(EXPORT_DIR, name)
+    try:
+        with open(p, "w", encoding="utf-8") as f:
+            json.dump(obj, f, ensure_ascii=False, indent=2)
+    except Exception:
+        pass
+
+# -----------------------------------------
+# Daten laden und vorbereiten
+# -----------------------------------------
+REQUIRED = ["Thermometer_ID", "Stichwort", "Effektstärke"]
+
+def load_data(path: str) -> pd.DataFrame:
+    df = pd.read_csv(path)
+    missing = [c for c in REQUIRED if c not in df.columns]
+    if missing:
+        raise ValueError(f"Fehlende Spalten in CSV: {missing}")
+
+    df["Thermometer_ID"] = df["Thermometer_ID"].astype(str)
+    df["Effektstärke"] = (
+        df["Effektstärke"].astype(str).str.replace(",", ".", regex=False).str.strip()
+    )
+    df["Effektstärke"] = pd.to_numeric(df["Effektstärke"], errors="coerce")
+    df = df.dropna(subset=["Effektstärke"]).copy()
+
+    # Kapitelnummer & -name herstellen, falls nicht vorhanden
+    if "Kapitel" not in df.columns:
+        df["Kapitel"] = df["Thermometer_ID"].str.split(".").str[0].astype(int)
+    if "Kapitelname" not in df.columns:
+        kapitel_map = {
+            5: "Lernende",
+            6: "Elternhaus und Familie",
+            7: "Schule und Gesellschaft",
+            8: "Klassenzimmer",
+            9: "Lehrperson",
+            10: "Curriculum",
+            11: "Zielorientiertes Unterrichten",
+            12: "Lernstrategien",
+            13: "Lehrstrategien",
+            14: "Nutzung von Technologien",
+            15: "Schulische und außerschulische Einflüsse",
+        }
+        df["Kapitelname"] = df["Kapitel"].map(kapitel_map).fillna(df["Kapitel"].map(lambda k: f"Kapitel {k}"))
+    return df
+
+# -----------------------------------------
+# System-Mapping: Psychisch / Sozial
+# -----------------------------------------
+# Erwartete optionale Datei: system_mapping.csv
+# Spalten: "Term","Psych","Sozial"  (0/1), wobei "Term" gegen Stichwort gematcht wird (Teilstring, case-insensitive)
+
+def load_system_mapping(map_csv: str = "system_mapping.csv") -> pd.DataFrame | None:
+    path = os.path.join(os.path.dirname(__file__), map_csv)
+    if os.path.exists(path):
+        m = pd.read_csv(path)
+        for col in ["Term", "Psych", "Sozial"]:
+            if col not in m.columns:
+                raise ValueError("system_mapping.csv muss die Spalten 'Term','Psych','Sozial' enthalten.")
+        m["Term"] = m["Term"].astype(str).str.strip()
+        m["Psych"] = m["Psych"].astype(int).clip(0, 1)
+        m["Sozial"] = m["Sozial"].astype(int).clip(0, 1)
+        return m
+    return None
+
+def classify_systems(df: pd.DataFrame, mapping: pd.DataFrame | None = None) -> pd.DataFrame:
+    out = df.copy()
+    out["Psych"] = 0
+    out["Sozial"] = 0
+
+    # 1) Mapping-Datei (präzise)
+    if mapping is not None and not mapping.empty:
+        sw = out["Stichwort"].astype(str).str.lower()
+        for _, row in mapping.iterrows():
+            term = str(row["Term"]).lower().strip()
+            if not term:
+                continue
+            mask = sw.str.contains(term, na=False)
+            out.loc[mask, "Psych"] = np.maximum(out.loc[mask, "Psych"], int(row["Psych"]))
+            out.loc[mask, "Sozial"] = np.maximum(out.loc[mask, "Sozial"], int(row["Sozial"]))
+
+    # 2) Heuristik (falls nach Mapping noch 0/0), bewusst konservativ
+    #    - Psychische Marker
+    psych_tokens = [
+        "intelligenz","kognition","exekutiv","gedächtnis","selbstwirksam",
+        "selbstbild","emotion","angst","depress","wut","frustration","konzentration",
+        "ausdauer","beharrlichkeit","zuversicht","mindset","kreativ","neugier",
+        "arbeitsgedächtnis","einstellung","motivation","willen"
+    ]
+    #    - Soziale Marker
+    sozial_tokens = [
+        "klasse","klassen","beziehung","lehrer","schüler","unterricht",
+        "klima","team","gruppe","beratung","schulleitung","schule","familie",
+        "eltern","konflikt","zusammenhalt","zugehörigkeit","tracking","sommerschule",
+        "curriculum","kalender","stundenplan","pause","bulling","ausschluss"
+    ]
+
+    # nur dort heuristisch, wo noch keine Setzung vorhanden ist:
+    unset_mask = (out["Psych"] == 0) & (out["Sozial"] == 0)
+    sw2 = out.loc[unset_mask, "Stichwort"].astype(str).str.lower()
+
+    out.loc[unset_mask & sw2.str.contains("|".join(psych_tokens), na=False), "Psych"] = 1
+    out.loc[unset_mask & sw2.str.contains("|".join(sozial_tokens), na=False), "Sozial"] = 1
+
+    return out
+
+# -----------------------------------------
+# Kopplungsindex (Erziehungssystem: Lernfähig/nicht lernfähig)
+# -----------------------------------------
+def minmax_norm(a: np.ndarray) -> np.ndarray:
+    a = np.asarray(a, dtype=float)
+    if a.size == 0:
+        return a
+    lo, hi = np.nanmin(a), np.nanmax(a)
+    if not np.isfinite(lo) or not np.isfinite(hi) or hi - lo <= 1e-12:
+        return np.zeros_like(a)
+    return (a - lo) / (hi - lo)
+
+def compute_coupling_index(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Kopplungsindex = norm(|d|) * w
+      w = 1.0 bei Kopplung (Psych=1 & Sozial=1)
+          0.6 bei nur Psych=1 oder nur Sozial=1
+          0.2 sonst (unspezifisch)
+    Vorzeichen des Index = Vorzeichen(d)
+    """
+    out = df.copy()
+    abs_d = out["Effektstärke"].abs().values
+    abs_d_norm = minmax_norm(abs_d)
+
+    both = (out["Psych"] == 1) & (out["Sozial"] == 1)
+    single = ((out["Psych"] == 1) ^ (out["Sozial"] == 1))
+    none = (out["Psych"] == 0) & (out["Sozial"] == 0)
+
+    w = np.where(both, 1.0, np.where(single, 0.6, 0.2))
+    signed = np.sign(out["Effektstärke"].values) * abs_d_norm * w
+    out["Kopplungsindex"] = signed
+
+    # Label für schnelle Lesbarkeit
+    def addr_label(p, s):
+        if p == 1 and s == 1:
+            return "Kopplung (Psych+Sozial)"
+        if p == 1 and s == 0:
+            return "Psychisch adressiert"
+        if p == 0 and s == 1:
+            return "Sozial adressiert"
+        return "Unspezifisch"
+    out["Adressierung"] = [addr_label(p, s) for p, s in zip(out["Psych"], out["Sozial"])]
+
+    # Ränge
+    out["Rank_abs_d"] = (-out["Effektstärke"].abs()).rank(method="min").astype(int)
+    out["Rank_kopplung"] = (-out["Kopplungsindex"].abs()).rank(method="min").astype(int)
+    return out
+
+# -----------------------------------------
+# Visualisierungen (CI-konform, keine Hardcodierung)
+# -----------------------------------------
+def plot_sign_system_2d(df: pd.DataFrame):
+    """
+    2D-Sicht: X=Psych (0/1), Y=Sozial (0/1), Markergröße|Farbe ~ Kopplungsindex
+    """
+    x = df["Psych"].astype(int)
+    y = df["Sozial"].astype(int)
+    size = (df["Kopplungsindex"].abs() * 22.0 + 6.0).astype(float)
+
+    color_pos = _colors.get("positiveHighlight", "#2ca02c")
+    color_neg = _colors.get("negativeHighlight", "#d62728")
+    point_colors = np.where(df["Kopplungsindex"] >= 0, color_pos, color_neg)
+
+    hover = (
+        "Thermometer: %{customdata[0]}<br>"
+        "Stichwort: %{text}<br>"
+        "d: %{customdata[1]:.2f}<br>"
+        "Adressierung: %{customdata[2]}<br>"
+        "Kopplungsindex: %{customdata[3]:.3f}<br>"
+        "Kapitel: %{customdata[4]}<extra></extra>"
+    )
+
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=x, y=y,
+        mode="markers",
+        marker=dict(size=size, color=point_colors),
+        text=df["Stichwort"],
+        customdata=np.stack([
+            df["Thermometer_ID"],
+            df["Effektstärke"],
+            df["Adressierung"],
+            df["Kopplungsindex"],
+            df["Kapitelname"]
+        ], axis=-1),
+        hovertemplate=hover,
+        name="Thermometer"
+    ))
+
+    # Diskrete Achsen (0/1) mit CI-Layout
+    fig.update_layout(_layout(
+        "Erziehungssystem – Adressierung & Kopplung (2D)",
+        "Psychisch (0/1)", "Sozial (0/1)"
+    ))
+    fig.update_xaxes(tickmode="array", tickvals=[0, 1], ticktext=["0", "1"])
+    fig.update_yaxes(tickmode="array", tickvals=[0, 1], ticktext=["0", "1"])
+    fig.show()
+    export_figure(fig, "sys_erziehung_2d", export_fig_visual, export_fig_png)
+
+def plot_sign_system_3d(df: pd.DataFrame):
+    """
+    3D-Sicht: X=Psych (0/1), Y=Sozial (0/1), Z=Effektstärke; Farbe/Größe ~ Kopplungsindex
+    """
+    size = (df["Kopplungsindex"].abs() * 8.0 + 4.0).astype(float)
+    color_pos = _colors.get("positiveHighlight", "#2ca02c")
+    color_neg = _colors.get("negativeHighlight", "#d62728")
+    point_colors = np.where(df["Kopplungsindex"] >= 0, color_pos, color_neg)
+
+    hover = (
+        "Thermometer: %{text}<br>"
+        "Kapitel: %{customdata[0]}<br>"
+        "Psych: %{x} | Sozial: %{y}<br>"
+        "d: %{z:.2f}<br>"
+        "Kopplungsindex: %{customdata[1]:.3f}<extra></extra>"
+    )
+
+    fig = go.Figure()
+    fig.add_trace(go.Scatter3d(
+        x=df["Psych"].astype(int),
+        y=df["Sozial"].astype(int),
+        z=df["Effektstärke"],
+        mode="markers",
+        marker={**_styles.get("marker_accent", {}), "size": size, "color": point_colors},
+        text=df["Stichwort"],
+        customdata=np.stack([df["Kapitelname"], df["Kopplungsindex"]], axis=-1),
+        hovertemplate=hover,
+        name="Thermometer"
+    ))
+
+    fig.update_layout(_layout(
+        "Erziehungssystem – 3D-Sicht (Psych × Sozial × d)",
+        "Psychisch (0/1)", "Sozial (0/1)", "Cohen d"
+    ))
+    fig.update_scenes(
+        xaxis=dict(tickmode="array", tickvals=[0,1], ticktext=["0","1"]),
+        yaxis=dict(tickmode="array", tickvals=[0,1], ticktext=["0","1"])
+    )
+    fig.show()
+    export_figure(fig, "sys_erziehung_3d", export_fig_visual, export_fig_png)
+
+def plot_rank_tables(df: pd.DataFrame, top_n: int = 15):
+    """
+    Zwei tabellarische Sichten:
+    - Top |d| (stärkste Magnitude)
+    - Top |Kopplungsindex| (stärkste systemische Kopplung)
+    """
+    from plotly.graph_objs import Table, Figure
+
+    def table(data: pd.DataFrame, title: str, fname: str):
+        cols = ["Thermometer_ID", "Stichwort", "Kapitelname", "Effektstärke", "Psych", "Sozial", "Kopplungsindex", "Adressierung"]
+        data = data[cols].copy()
+        data["Effektstärke"] = data["Effektstärke"].round(2)
+        data["Kopplungsindex"] = data["Kopplungsindex"].round(3)
+
+        headers = list(data.columns)
+        values = [data[c].astype(str).tolist() for c in headers]
+        fig = Figure(data=[Table(
+            header=dict(values=headers, fill_color=_colors["brightArea"], font=dict(color=_colors["white"])),
+            cells=dict(values=values, fill_color=_colors["depthArea"], font=dict(color=_colors["white"]))
+        )])
+        fig.update_layout(_layout(title, "", ""))
+        fig.show()
+        export_figure(fig, fname, export_fig_visual, export_fig_png)
+
+    top_abs = df.sort_values(df["Effektstärke"].abs(), ascending=False).head(top_n)
+    table(top_abs, f"Top {top_n} nach |d|", "sys_top_absd")
+
+    top_coup = df.sort_values(df["Kopplungsindex"].abs(), ascending=False).head(top_n)
+    table(top_coup, f"Top {top_n} nach |Kopplungsindex|", "sys_top_kopplung")
+
+# -----------------------------------------
+# Pipeline
+# -----------------------------------------
+def analyse_system(path_csv: str, map_csv: str = "system_mapping.csv"):
+    # Laden
+    df = load_data(path_csv)
+
+    # Systemklassifikation
+    mapping = load_system_mapping(map_csv)
+    df = classify_systems(df, mapping=mapping)
+
+    # Kopplungsindex
+    df = compute_coupling_index(df)
+
+    # Export Kern-Output
+    try:
+        out_cols = ["Thermometer_ID","Stichwort","Kapitel","Kapitelname","Effektstärke","Psych","Sozial","Adressierung","Kopplungsindex"]
+        df[out_cols].to_csv(os.path.join(EXPORT_DIR, "system_view.csv"), index=False)
+        export_json(df[out_cols].to_dict(orient="records"), "system_view.json")
+    except Exception:
+        pass
+
+    # Visualisierungen
+    plot_sign_system_2d(df)
+    plot_sign_system_3d(df)
+    plot_rank_tables(df, top_n=15)
+
+    # Konsolen-Report
+    print("—" * 60)
+    print("SYSTEMTHEORETISCHE SICHT – Zusammenfassung")
+    print(df.groupby("Adressierung")["Effektstärke"].agg(n="count", mean="mean").round(3))
+    print("\nTop 10 Kopplung (|Index|):")
+    print(
+        df.loc[:, ["Thermometer_ID", "Stichwort", "Kapitelname", "Effektstärke", "Kopplungsindex"]]
+          .assign(abs_idx=lambda t: t["Kopplungsindex"].abs())
+          .sort_values("abs_idx", ascending=False)
+          .head(10)
+          .drop(columns=["abs_idx"])
+          .to_string(index=False)
+    )
+
+# -----------------------------------------
+# Main
+# -----------------------------------------
+if __name__ == "__main__":
+    analyse_system(os.path.join(os.path.dirname(__file__), csv_file))