This repository has been archived on 2025-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
Files
visible-learning/visible-learning systemtheoretisch.py

525 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
"""
Visible Learning Systemtheoretische Sicht (nach Luhmann/Erziehungssystem)
---------------------------------------------------------------------------
Ziel
- Daten (Thermometer) laden
- Psychische und soziale Adressierungen identifizieren (Mapping/Heuristik)
- Kopplungsindex (Kommunikation ↔ Gedanke) als Indikator für Lernfähigkeit berechnen
- CI-konforme Visualisierungen (2D & 3D) erzeugen
CI
- Verwendet die gleiche CI wie die statistische Datei (ci_template.plotly_template)
- Farben/Styles ausschließlich über Template (keine Hardcodierung)
"""
# -----------------------------------------
# Imports
# -----------------------------------------
import os
import json
import math
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
# -----------------------------------------
# Konfiguration laden (identischer Mechanismus)
# -----------------------------------------
# Erwartete Felder: csv_file, theme, export_fig_visual, export_fig_png
from config_visible_learning import (
csv_file,
theme,
export_fig_visual,
export_fig_png,
)
# -----------------------------------------
# CI-Template (identisch zu statistischer Datei)
# -----------------------------------------
try:
from ci_template import plotly_template
plotly_template.set_theme(theme)
_layout = lambda title, x, y, z='Z': plotly_template.get_standard_layout(
title=title, x_title=x, y_title=y, z_title=z
)
_styles = plotly_template.get_plot_styles()
_colors = plotly_template.get_colors()
except Exception as _e:
# Fallback (neutral)
def _layout(title, x, y, z='Z'):
return dict(title=title, xaxis_title=x, yaxis_title=y)
_styles = {
"marker_accent": dict(color="#1f77b4", size=8, symbol="circle"),
"marker_positiveHighlight": dict(color="#2ca02c", size=8, symbol="circle"),
"marker_negativeHighlight": dict(color="#d62728", size=8, symbol="circle"),
"linie_primaryLine": dict(color="#1f77b4", width=2),
"linie_secondaryLine": dict(color="#ff7f0e", width=2),
"balken_accent": dict(color="#1f77b4"),
}
_colors = {
"accent": "#1f77b4",
"brightArea": "#66CCCC",
"depthArea": "#006666",
"positiveHighlight": "#2ca02c",
"negativeHighlight": "#d62728",
"text": "#333333",
"background": "#ffffff",
"white": "#ffffff",
}
# -----------------------------------------
# Export-Helfer
# -----------------------------------------
EXPORT_DIR = os.path.join(os.path.dirname(__file__), "export")
os.makedirs(EXPORT_DIR, exist_ok=True)
def export_figure(fig, name: str, do_html: bool, do_png: bool):
base = os.path.join(EXPORT_DIR, name)
if do_html:
pio.write_html(fig, file=f"{base}.html", auto_open=False, include_plotlyjs="cdn")
if do_png:
try:
pio.write_image(fig, f"{base}.png", scale=2)
except Exception:
pass
def export_json(obj: dict, name: str):
p = os.path.join(EXPORT_DIR, name)
try:
with open(p, "w", encoding="utf-8") as f:
json.dump(obj, f, ensure_ascii=False, indent=2)
except Exception:
pass
# -----------------------------------------
# Daten laden und vorbereiten
# -----------------------------------------
REQUIRED = ["Thermometer_ID", "Stichwort", "Effektstärke"]
def load_data(path: str) -> pd.DataFrame:
df = pd.read_csv(path)
missing = [c for c in REQUIRED if c not in df.columns]
if missing:
raise ValueError(f"Fehlende Spalten in CSV: {missing}")
df["Thermometer_ID"] = df["Thermometer_ID"].astype(str)
df["Effektstärke"] = (
df["Effektstärke"].astype(str).str.replace(",", ".", regex=False).str.strip()
)
df["Effektstärke"] = pd.to_numeric(df["Effektstärke"], errors="coerce")
df = df.dropna(subset=["Effektstärke"]).copy()
# Kapitelnummer & -name herstellen, falls nicht vorhanden
if "Kapitel" not in df.columns:
df["Kapitel"] = df["Thermometer_ID"].str.split(".").str[0].astype(int)
if "Kapitelname" not in df.columns:
kapitel_map = {
5: "Lernende",
6: "Elternhaus und Familie",
7: "Schule und Gesellschaft",
8: "Klassenzimmer",
9: "Lehrperson",
10: "Curriculum",
11: "Zielorientiertes Unterrichten",
12: "Lernstrategien",
13: "Lehrstrategien",
14: "Nutzung von Technologien",
15: "Schulische und außerschulische Einflüsse",
}
df["Kapitelname"] = df["Kapitel"].map(kapitel_map).fillna(df["Kapitel"].map(lambda k: f"Kapitel {k}"))
return df
# -----------------------------------------
# System-Mapping: Psychisch / Sozial
# -----------------------------------------
# Erwartete optionale Datei: system_mapping.csv
# Spalten: "Term","Psych","Sozial" (0/1), wobei "Term" gegen Stichwort gematcht wird (Teilstring, case-insensitive)
def load_system_mapping(map_csv: str = "system_mapping.csv") -> pd.DataFrame | None:
path = os.path.join(os.path.dirname(__file__), map_csv)
if os.path.exists(path):
m = pd.read_csv(path)
for col in ["Term", "Psych", "Sozial"]:
if col not in m.columns:
raise ValueError("system_mapping.csv muss die Spalten 'Term','Psych','Sozial' enthalten.")
m["Term"] = m["Term"].astype(str).str.strip()
m["Psych"] = m["Psych"].astype(int).clip(0, 1)
m["Sozial"] = m["Sozial"].astype(int).clip(0, 1)
return m
return None
def classify_systems(df: pd.DataFrame, mapping: pd.DataFrame | None = None) -> pd.DataFrame:
out = df.copy()
out["Psych"] = 0
out["Sozial"] = 0
# 1) Mapping-Datei (präzise)
if mapping is not None and not mapping.empty:
sw = out["Stichwort"].astype(str).str.lower()
for _, row in mapping.iterrows():
term = str(row["Term"]).lower().strip()
if not term:
continue
mask = sw.str.contains(term, na=False)
out.loc[mask, "Psych"] = np.maximum(out.loc[mask, "Psych"], int(row["Psych"]))
out.loc[mask, "Sozial"] = np.maximum(out.loc[mask, "Sozial"], int(row["Sozial"]))
# 2) Heuristik (falls nach Mapping noch 0/0), bewusst konservativ
# - Psychische Marker
psych_tokens = [
"intelligenz","kognition","exekutiv","gedächtnis","selbstwirksam",
"selbstbild","emotion","angst","depress","wut","frustration","konzentration",
"ausdauer","beharrlichkeit","zuversicht","mindset","kreativ","neugier",
"arbeitsgedächtnis","einstellung","motivation","willen"
]
# - Soziale Marker
sozial_tokens = [
"klasse","klassen","beziehung","lehrer","schüler","unterricht",
"klima","team","gruppe","beratung","schulleitung","schule","familie",
"eltern","konflikt","zusammenhalt","zugehörigkeit","tracking","sommerschule",
"curriculum","kalender","stundenplan","pause","bulling","ausschluss"
]
# nur dort heuristisch, wo noch keine Setzung vorhanden ist:
unset_mask = (out["Psych"] == 0) & (out["Sozial"] == 0)
sw2 = out.loc[unset_mask, "Stichwort"].astype(str).str.lower()
out.loc[unset_mask & sw2.str.contains("|".join(psych_tokens), na=False), "Psych"] = 1
out.loc[unset_mask & sw2.str.contains("|".join(sozial_tokens), na=False), "Sozial"] = 1
return out
# -----------------------------------------
# Soft Scores via Textähnlichkeit (TF-IDF + Cosine)
# -----------------------------------------
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def _normalize_01(a: np.ndarray) -> np.ndarray:
a = np.asarray(a, dtype=float)
if a.size == 0:
return a
lo, hi = np.nanmin(a), np.nanmax(a)
if not np.isfinite(lo) or not np.isfinite(hi) or hi - lo <= 1e-12:
return np.zeros_like(a, dtype=float)
return (a - lo) / (hi - lo)
def build_lexicons(df: pd.DataFrame, mapping: pd.DataFrame | None) -> tuple[str, str]:
"""
Erzeugt zwei 'Pseudodokumente' (Lexika) für psychische vs. soziale Marker.
Präferenz: Mapping → bereits klassifizierte Stichwörter → konservative Heuristik.
"""
# 1) Aus Mapping (explizite Terme)
psych_terms, sozial_terms = [], []
if mapping is not None and not mapping.empty:
psych_terms = mapping.loc[mapping["Psych"] == 1, "Term"].astype(str).tolist()
sozial_terms = mapping.loc[mapping["Sozial"] == 1, "Term"].astype(str).tolist()
# 2) Ergänzen durch bereits klassifizierte Stichwörter
if "Psych" in df.columns and "Sozial" in df.columns:
psych_terms += df.loc[df["Psych"] == 1, "Stichwort"].astype(str).tolist()
sozial_terms += df.loc[df["Sozial"] == 1, "Stichwort"].astype(str).tolist()
# 3) Fallback-Heuristik
if not psych_terms:
psych_terms = [
"Intelligenz","Kognition","Exekutive Funktionen","Gedächtnis","Selbstwirksamkeit",
"Selbstbild","Emotion","Motivation","Ausdauer","Beharrlichkeit","Zuversicht",
"Mindset","Kreativität","Neugier","Arbeitsgedächtnis","Einstellung","Wille"
]
if not sozial_terms:
sozial_terms = [
"Klasse","Beziehung","Lehrer","Schüler","Unterricht","Klima","Team","Gruppe",
"Beratung","Schulleitung","Schule","Familie","Eltern","Zusammenhalt",
"Zugehörigkeit","Curriculum","Stundenplan","Pause","Konflikt","Sommerschule"
]
# Als Pseudodokumente zusammenfassen
doc_psych = " ".join(map(str, psych_terms))
doc_sozial = " ".join(map(str, sozial_terms))
return doc_psych, doc_sozial
def compute_soft_system_scores(df: pd.DataFrame, mapping: pd.DataFrame | None) -> pd.DataFrame:
"""
Berechnet kontinuierliche Scores (0..1) für Psychisch/Sozial via TF-IDF + Cosine-Similarity
zu zwei Pseudodokumenten (Lexika).
"""
out = df.copy()
# Pseudodokumente bauen
doc_psych, doc_sozial = build_lexicons(out, mapping)
# Korpus = alle Stichwörter + 2 Pseudodokumente
corpus = out["Stichwort"].astype(str).tolist() + [doc_psych, doc_sozial]
vect = TfidfVectorizer(max_features=1000, ngram_range=(1,2))
X = vect.fit_transform(corpus)
# Indizes der Pseudodocs
idx_psych = X.shape[0] - 2
idx_sozial = X.shape[0] - 1
# Cosine-Similarity jedes Stichworts zu den Pseudodocs
S_psych = cosine_similarity(X[:-2], X[idx_psych])
S_sozial = cosine_similarity(X[:-2], X[idx_sozial])
# Auf [0,1] bringen (zeilenweise Vektoren → 1D)
p_raw = S_psych.ravel()
s_raw = S_sozial.ravel()
out["Psych_Score"] = _normalize_01(p_raw)
out["Sozial_Score"] = _normalize_01(s_raw)
return out
# -----------------------------------------
# Kopplungsindex (Erziehungssystem: Lernfähig/nicht lernfähig)
# -----------------------------------------
def minmax_norm(a: np.ndarray) -> np.ndarray:
a = np.asarray(a, dtype=float)
if a.size == 0:
return a
lo, hi = np.nanmin(a), np.nanmax(a)
if not np.isfinite(lo) or not np.isfinite(hi) or hi - lo <= 1e-12:
return np.zeros_like(a)
return (a - lo) / (hi - lo)
def compute_coupling_index(df: pd.DataFrame) -> pd.DataFrame:
"""
Kontinuierlicher Kopplungsindex:
- p = Psych_Score (0..1), s = Sozial_Score (0..1); Fallback auf binäre 'Psych'/'Sozial'
- H = harmonisches Mittel = 2ps/(p+s) (0, wenn p+s=0)
- |d| min-max-normalisiert
- Index = sign(d) * norm(|d|) * H
"""
out = df.copy()
# Soft Scores oder Fallback
if "Psych_Score" in out.columns and "Sozial_Score" in out.columns:
p = out["Psych_Score"].astype(float).values
s = out["Sozial_Score"].astype(float).values
else:
p = out.get("Psych", pd.Series(0, index=out.index)).astype(float).clip(0,1).values
s = out.get("Sozial", pd.Series(0, index=out.index)).astype(float).clip(0,1).values
# Harmonisches Mittel (numerisch stabil)
denom = p + s
H = np.zeros_like(denom, dtype=float)
mask = denom > 0
H[mask] = 2 * p[mask] * s[mask] / denom[mask]
# |d| normalisieren
abs_d = out["Effektstärke"].abs().values
abs_d_norm = _normalize_01(abs_d)
signed_index = np.sign(out["Effektstärke"].values) * abs_d_norm * H
out["Kopplungsindex"] = signed_index
# Adressierungslabel anhand Soft Scores
def addr_lab(pp, ss):
if pp >= 0.5 and ss >= 0.5:
return "Kopplung (Psych+Sozial)"
if pp >= 0.5 and ss < 0.5:
return "Psychisch adressiert"
if pp < 0.5 and ss >= 0.5:
return "Sozial adressiert"
return "Unspezifisch"
# Für Labels Soft-Scores nutzen, falls vorhanden
p_for_label = p
s_for_label = s
out["Adressierung"] = [addr_lab(pp, ss) for pp, ss in zip(p_for_label, s_for_label)]
# Ränge
out["Rank_abs_d"] = (-out["Effektstärke"].abs()).rank(method="min").astype(int)
out["Rank_kopplung"] = (-np.abs(out["Kopplungsindex"])).rank(method="min").astype(int)
return out
# -----------------------------------------
# Visualisierungen (CI-konform, keine Hardcodierung)
# -----------------------------------------
def plot_sign_system_2d(df: pd.DataFrame):
"""
2D-Sicht: X=Psych (0/1), Y=Sozial (0/1), Markergröße|Farbe ~ Kopplungsindex
"""
x = (df["Psych_Score"] if "Psych_Score" in df.columns else df["Psych"].astype(float).clip(0,1))
y = (df["Sozial_Score"] if "Sozial_Score" in df.columns else df["Sozial"].astype(float).clip(0,1))
size = (df["Kopplungsindex"].abs() * 22.0 + 6.0).astype(float)
color_pos = _colors.get("positiveHighlight", "#2ca02c")
color_neg = _colors.get("negativeHighlight", "#d62728")
point_colors = np.where(df["Kopplungsindex"] >= 0, color_pos, color_neg)
hover = (
"Thermometer: %{customdata[0]}<br>"
"Stichwort: %{text}<br>"
"d: %{customdata[1]:.2f}<br>"
"Adressierung: %{customdata[2]}<br>"
"Kopplungsindex: %{customdata[3]:.3f}<br>"
"Kapitel: %{customdata[4]}<extra></extra>"
)
fig = go.Figure()
fig.add_trace(go.Scatter(
x=x, y=y,
mode="markers",
marker=dict(size=size, color=point_colors),
text=df["Stichwort"],
customdata=np.stack([
df["Thermometer_ID"],
df["Effektstärke"],
df["Adressierung"],
df["Kopplungsindex"],
df["Kapitelname"]
], axis=-1),
hovertemplate=hover,
name="Thermometer"
))
# Kontinuierliche Achsen (0..1) mit CI-Layout
fig.update_layout(_layout(
"Erziehungssystem Adressierung & Kopplung (2D)",
"Psychisch (0..1)", "Sozial (0..1)"
))
fig.update_xaxes(range=[0,1], tickmode="array", tickvals=[0,0.25,0.5,0.75,1.0])
fig.update_yaxes(range=[0,1], tickmode="array", tickvals=[0,0.25,0.5,0.75,1.0])
fig.show()
export_figure(fig, "sys_erziehung_2d", export_fig_visual, export_fig_png)
def plot_sign_system_3d(df: pd.DataFrame):
"""
3D-Sicht: X=Psych_Score (0..1), Y=Sozial_Score (0..1), Z=Effektstärke; Farbe/Größe ~ Kopplungsindex
"""
size = (df["Kopplungsindex"].abs() * 8.0 + 4.0).astype(float)
color_pos = _colors.get("positiveHighlight", "#2ca02c")
color_neg = _colors.get("negativeHighlight", "#d62728")
point_colors = np.where(df["Kopplungsindex"] >= 0, color_pos, color_neg)
hover = (
"Thermometer: %{text}<br>"
"Kapitel: %{customdata[0]}<br>"
"Psych: %{x} | Sozial: %{y}<br>"
"d: %{z:.2f}<br>"
"Kopplungsindex: %{customdata[1]:.3f}<extra></extra>"
)
fig = go.Figure()
fig.add_trace(go.Scatter3d(
x=(df["Psych_Score"] if "Psych_Score" in df.columns else df["Psych"].astype(float).clip(0,1)),
y=(df["Sozial_Score"] if "Sozial_Score" in df.columns else df["Sozial"].astype(float).clip(0,1)),
z=df["Effektstärke"],
mode="markers",
marker={**_styles.get("marker_accent", {}), "size": size, "color": point_colors},
text=df["Stichwort"],
customdata=np.stack([df["Kapitelname"], df["Kopplungsindex"]], axis=-1),
hovertemplate=hover,
name="Thermometer"
))
fig.update_layout(_layout(
"Erziehungssystem 3D-Sicht (Psych × Sozial × d)",
"Psychisch (0..1)", "Sozial (0..1)", "Cohen d"
))
fig.update_scenes(
xaxis=dict(range=[0,1], tickmode="array", tickvals=[0,0.25,0.5,0.75,1.0]),
yaxis=dict(range=[0,1], tickmode="array", tickvals=[0,0.25,0.5,0.75,1.0])
)
fig.show()
export_figure(fig, "sys_erziehung_3d", export_fig_visual, export_fig_png)
def plot_rank_tables(df: pd.DataFrame, top_n: int = 15):
"""
Zwei tabellarische Sichten:
- Top |d| (stärkste Magnitude)
- Top |Kopplungsindex| (stärkste systemische Kopplung)
"""
from plotly.graph_objs import Table, Figure
def table(data: pd.DataFrame, title: str, fname: str):
cols = ["Thermometer_ID", "Stichwort", "Kapitelname", "Effektstärke", "Psych", "Sozial", "Kopplungsindex", "Adressierung"]
data = data[cols].copy()
data["Effektstärke"] = data["Effektstärke"].round(2)
data["Kopplungsindex"] = data["Kopplungsindex"].round(3)
headers = list(data.columns)
values = [data[c].astype(str).tolist() for c in headers]
fig = Figure(data=[Table(
header=dict(values=headers, fill_color=_colors["brightArea"], font=dict(color=_colors["white"])),
cells=dict(values=values, fill_color=_colors["depthArea"], font=dict(color=_colors["white"]))
)])
fig.update_layout(_layout(title, "", ""))
fig.show()
export_figure(fig, fname, export_fig_visual, export_fig_png)
top_abs = (df.assign(_absd=lambda t: t["Effektstärke"].abs())
.sort_values("_absd", ascending=False)
.head(top_n)
.drop(columns=["_absd"]))
table(top_abs, f"Top {top_n} nach |d|", "sys_top_absd")
top_coup = (df.assign(_absi=lambda t: t["Kopplungsindex"].abs())
.sort_values("_absi", ascending=False)
.head(top_n)
.drop(columns=["_absi"]))
table(top_coup, f"Top {top_n} nach |Kopplungsindex|", "sys_top_kopplung")
# -----------------------------------------
# Pipeline
# -----------------------------------------
def analyse_system(path_csv: str, map_csv: str = "system_mapping.csv"):
# Laden
df = load_data(path_csv)
# Systemklassifikation
mapping = load_system_mapping(map_csv)
df = classify_systems(df, mapping=mapping)
# Soft Scores aus Textähnlichkeit
df = compute_soft_system_scores(df, mapping=mapping)
# Kopplungsindex
df = compute_coupling_index(df)
# Export Kern-Output
try:
out_cols = [
"Thermometer_ID","Stichwort","Kapitel","Kapitelname","Effektstärke",
"Psych","Sozial","Psych_Score","Sozial_Score",
"Adressierung","Kopplungsindex"
]
df[out_cols].to_csv(os.path.join(EXPORT_DIR, "system_view.csv"), index=False)
export_json(df[out_cols].to_dict(orient="records"), "system_view.json")
except Exception:
pass
# Kurzdiagnostik
print("Soft-Score-Quartile (Psych, Sozial):")
for col in ["Psych_Score","Sozial_Score"]:
if col in df.columns:
q = df[col].quantile([0.25,0.5,0.75]).round(3).to_dict()
print(f" {col}: q25={q.get(0.25)}, q50={q.get(0.5)}, q75={q.get(0.75)}")
# Visualisierungen
plot_sign_system_2d(df)
plot_sign_system_3d(df)
plot_rank_tables(df, top_n=15)
# Konsolen-Report
print("" * 60)
print("SYSTEMTHEORETISCHE SICHT Zusammenfassung")
print(df.groupby("Adressierung")["Effektstärke"].agg(n="count", mean="mean").round(3))
print("\nTop 10 Kopplung (|Index|):")
print(
df.loc[:, ["Thermometer_ID", "Stichwort", "Kapitelname", "Effektstärke", "Kopplungsindex"]]
.assign(abs_idx=lambda t: t["Kopplungsindex"].abs())
.sort_values("abs_idx", ascending=False)
.head(10)
.drop(columns=["abs_idx"])
.to_string(index=False)
)
# -----------------------------------------
# Main
# -----------------------------------------
if __name__ == "__main__":
analyse_system(os.path.join(os.path.dirname(__file__), csv_file))