git commit -m "Promotion: fix(tag-matching) | Fehlerhafte Zählung durch unvollständiges Matching von zusammengesetzten Tags behoben (Nummer:Typ:Begriff), semantische Inkonsistenzen (z. B. 'digitales:lernen' statt 'digital:lernen') korrigiert"

2025-09-21 16:00:54 +02:00
6 changed files with 3847 additions and 139827 deletions
--- a/Suchergebnisse.bib
+++ b/Suchergebnisse.bib
--- a/Bibliothek/cleaned_Literaturverzeichnis.bib
+++ b/Bibliothek/cleaned_Literaturverzeichnis.bib
--- a/analyse_korrelation.py
+++ b/analyse_korrelation.py
@ -1,3 +1,4 @@
 import os
 # Neue Exportfunktion: HTML in /tmp speichern, per SCP übertragen, PNG lokal speichern
@ -288,7 +289,6 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
                            abs_corr = abs(corr)
                            significance = 'Signifikant' if p_value < 0.05 else 'Nicht signifikant'
                            hover_color = colors['brightArea'] if p_value < 0.05 else colors['depthArea']
                            cooccurrence_count = int(((df[x_term] == 1) & (df[y_term] == 1)).sum())
                            correlations.append({
                                'x_term': x_term,
                                'y_term': y_term,
@ -297,8 +297,6 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
                                'p_value': p_value,
                                'significance': significance,
                                'hover_color': hover_color,
                                'n_observations': int(len(x_valid)),
                                'cooccurrence_count': cooccurrence_count,
                                'interpretation': (
                                    f"Die Korrelation zwischen '{x_term}' und '{y_term}' beträgt {corr:.2f}. "
                                    f"p-Wert: {p_value:.3e} ({significance})"
@ -364,25 +362,12 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
            line=dict(width=1, color=colors['background'])
        ),
        hovertemplate=(
-            '<b>%{customdata[0]}</b> ↔ <b>%{customdata[1]}</b><br>'
+            '<b>%{customdata[0]}</b><br>'
            'Korrelation: %{marker.color:.2f}<br>'
-            'p-Wert: %{customdata[3]:.3e}<br>'
+            'p-Wert: %{customdata[1]:.3e}<br>'
-            'Signifikanz: %{customdata[4]}<br>'
+            'Signifikanz: %{customdata[2]}'
            'Stichprobe (n): %{customdata[5]}<br>'
            'Gemeinsame Treffer: %{customdata[6]}<br>'
            '%{customdata[7]}'
            '<extra></extra>'
        ),
-        customdata=np.array(list(zip(
+        customdata=correlation_df[['x_term', 'p_value', 'significance']].to_numpy()
            correlation_df['x_term'],
            correlation_df['y_term'],
            correlation_df['correlation'],
            correlation_df['p_value'],
            correlation_df['significance'],
            correlation_df['n_observations'],
            correlation_df['cooccurrence_count'],
            correlation_df['interpretation']
        )), dtype=object)
    )
    # Standardlayout verwenden und ggf. ergänzen, Margin dynamisch für Responsivität
@ -392,7 +377,6 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
            x_title=x_label,
            y_title=y_label
        ),
        hovermode='closest',
        xaxis=dict(
            tickangle=-45,
            automargin=True
@ -507,6 +491,11 @@ df['X_Dimension'] = df[[tag for tag in tags_to_search_processed if tag in df.col
 df['Y_Dimension'] = df[[cat for cat in categories_processed if cat in df.columns]].sum(axis=1)
 df['Z_Dimension'] = df[[rq for rq in research_questions_processed if rq in df.columns]].sum(axis=1)
 # Clusteranalyse mit K-Means basierend auf den deduktiven Dimensionen
 features = df[['X_Dimension', 'Y_Dimension', 'Z_Dimension']]
 scaler = StandardScaler()
 scaled_features = scaler.fit_transform(features)
 # Clusteranalyse mit K-Means basierend auf den deduktiven Dimensionen
 # Prüfung auf konstante deduktive Dimensionen
 if df[['X_Dimension', 'Y_Dimension', 'Z_Dimension']].nunique().eq(1).all():
@ -579,19 +568,6 @@ for cluster in cluster_means.index:
 # Statische Cluster-Beschriftungen in den DataFrame einfügen
 df['Cluster_Label'] = df['KMeans_Cluster'].map(cluster_labels)
 df['Cluster_Label'] = df['Cluster_Label'].fillna(df['KMeans_Cluster'])
 # Farbzuordnung für die Clusterlabels aus den CI-Farben ableiten
 fallback_color = cluster_colors.get("0", colors.get('primaryLine', '#1f77b4'))
 color_map = {}
 for cluster_key, label in cluster_labels.items():
    base_color = cluster_colors.get(str(cluster_key), fallback_color)
    color_map[label] = base_color
 # Sicherstellen, dass auch eventuelle Restlabels (z.B. "Nicht gültig") erfasst werden
 for label in df['Cluster_Label'].dropna().unique():
    if label not in color_map:
        color_map[label] = cluster_colors.get(str(label), fallback_color)
 # Ausgabe der statischen Cluster-Beschriftungen
 print("Cluster-Beschriftungen (inhaltlich):")
@ -608,7 +584,7 @@ fig_cluster = px.scatter_3d(
    color='Cluster_Label',
    size='Point_Size',
    size_max=100,
-    color_discrete_map=color_map,
+    color_discrete_sequence=list(cluster_colors.values()),
    hover_data={
        'Cluster_Label': True,
        'X_Dimension': True,
@ -626,6 +602,7 @@ fig_cluster = px.scatter_3d(
    }
 )
 # Layout mit Standardlayout und konsistenten CI-konformen Ergänzungen
 layout_cluster = get_standard_layout(
    title=plot_title,
@ -710,7 +687,6 @@ correlation_quality_results = {
    "Forschungsunterfragen & Kategorien": analyze_correlation_quality(df, research_questions_processed, categories_processed),
    "Forschungsunterfragen & Suchbegriffe": analyze_correlation_quality(df, research_questions_processed, tags_to_search_processed),
    "Forschungsunterfragen & Indizes": analyze_correlation_quality(df, research_questions_processed, index_terms_processed),
    "Forschungsunterfragen & Forschungsunterfragen": analyze_correlation_quality(df, research_questions_processed, research_questions_processed),
    "Indizes & Kategorien": analyze_correlation_quality(df, index_terms_processed, categories_processed),
    "Indizes & Suchbegriffe": analyze_correlation_quality(df, index_terms_processed, tags_to_search_processed),
    "Suchbegriffe & Kategorien": analyze_correlation_quality(df, tags_to_search_processed, categories_processed),
@ -777,17 +753,8 @@ def plot_average_correlation_plotly(summary_df):
    )
    # PNG-Export ergänzen
    png_path = os.path.join(export_path_png, f"{slugify('summary_plot_' + global_bib_filename.replace('.bib', ''))}.png")
    try:
    fig.write_image(png_path, width=1200, height=800, scale=2)
    print(f"✅ PNG-Summary-Datei gespeichert unter: {png_path}")
    except ValueError as err:
        if "kaleido" in str(err).lower():
            print("⚠️ PNG-Export übersprungen: Plotly benötigt das Paket 'kaleido'.")
            print("   Installation (falls gewünscht): pip install -U kaleido")
        else:
            print(f"⚠️ PNG-Export fehlgeschlagen: {err}")
    except Exception as err:
        print(f"⚠️ PNG-Export fehlgeschlagen: {err}")
 #============================
 # Aufruf Alle möglichen bivariaten Korrelationen visualisieren
--- a/analyse_netzwerk.py
+++ b/analyse_netzwerk.py
@ -1,6 +1,7 @@
 from config_netzwerk import theme, export_fig_visual, bib_filename
 import os
 # Clear the terminal
@ -59,14 +60,11 @@ from config_netzwerk import (
    export_fig_visualize_sources_status,
    export_fig_create_wordcloud_from_titles,
    export_fig_visualize_languages,
    export_fig_visualize_relevance_fu,
    export_fig_visualize_relevance_categories,
    export_fig_visualize_relevance_search_terms,
 )
 from config_netzwerk import export_fig_png
-def export_figure_local(fig, name, flag):
+def export_figure_local(fig, name, flag, bib_filename=None):
    from config_netzwerk import export_path_html, export_path_png
    # Einmalige Definition von safe_filename am Anfang der Funktion
    safe_filename = prepare_figure_export(fig, name).replace(".html", "")
@ -102,23 +100,6 @@ word_colors = [
    colors["negativeHighlight"]
 ]
 # Relevanz-Stufen (1 = gering, 5 = sehr hoch)
 RELEVANCE_LEVELS = [5, 4, 3, 2, 1]
 RELEVANCE_LEVEL_LABELS = {
    5: "Relevanz 5",
    4: "Relevanz 4",
    3: "Relevanz 3",
    2: "Relevanz 2",
    1: "Relevanz 1",
 }
 RELEVANCE_COLOR_MAP = {
    "Relevanz 5": colors['positiveHighlight'],
    "Relevanz 4": colors['accent'],
    "Relevanz 3": colors['brightArea'],
    "Relevanz 2": colors['depthArea'],
    "Relevanz 1": colors['negativeHighlight'],
 }
 # Aktuelles Datum
 current_date = datetime.now().strftime("%Y-%m-%d")
@ -139,13 +120,6 @@ with open('en_complete.txt', 'r', encoding='utf-8') as file:
 # Kombinierte Stoppliste
 stop_words = stop_words_de.union(stop_words_en)
 # Hilfsfunktion: Relevanzstufe aus Keywords extrahieren
 def extract_relevance_level(entry_keywords):
    for level in RELEVANCE_LEVELS:
        if f'promotion:relevanz:{level}' in entry_keywords:
            return level
    return None
 # Funktion zur Berechnung der Stichprobengröße
 def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05):
    n_0 = (Z**2 * p * (1 - p)) / (e**2)
@ -175,10 +149,8 @@ def visualize_network(bib_database):
        'Buch',
        'Buchteil',
        'Bericht',
-        'Konferenz-Paper',
+        'Konferenz-Paper'
        'Studienbrief'
    ]
    tags_to_search = set()
    for number, type_ in product(numbers, types):
        search_term = search_terms[number]
@ -314,15 +286,11 @@ def visualize_network(bib_database):
    secondary_nodes = []
    tertiary_nodes = []
    total_fundzahlen = sum(fundzahlen.values())
    for node in G.nodes():
        color = G.nodes[node]['color']
        size = math.log(G.nodes[node].get('size', 10) + 1) * 10
        x, y = pos[node]
-        count = fundzahlen.get(node, 0)
+        hovertext = f"{node}<br>Anzahl Funde: {fundzahlen.get(node, 0)}"
        percentage = (count / total_fundzahlen * 100) if total_fundzahlen else 0
        hovertext = f"{node}<br>Anzahl Funde: {count}<br>Anteil: {percentage:.1f}%"
        node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext)
        if color == colors['primaryLine']:
            primary_nodes.append(node_data)
@ -363,7 +331,7 @@ def visualize_network(bib_database):
    fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace])
    layout = get_standard_layout(
-        title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={total_fundzahlen}, Stand: {current_date})",
+        title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={sum(fundzahlen.values())}, Stand: {current_date})",
        x_title="Technologische Dimension",
        y_title="Pädagogische Dimension"
    )
@ -375,7 +343,7 @@ def visualize_network(bib_database):
    fig.update_layout(**layout)
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_network", export_fig_visualize_network)
+    export_figure_local(fig, "visualize_network", export_fig_visualize_network, bib_filename)
    # Einfache Pfadanalyse nach dem Anzeigen der Figur
    if 'e-learning' in G and 'online:lernen' in G:
@ -410,11 +378,10 @@ def visualize_tags(bib_database):
        'Buch',
        'Buchteil',
        'Bericht',
-        'Konferenz-Paper',
+        'Konferenz-Paper'
        'Studienbrief'
    ]
    tags_to_search = set(
-        f"#{number}:{type_}:{search_terms[number]}".lower()
+        f"#{number}:{type_}:{search_terms[number]}"
        for number, type_ in product(numbers, types)
    )
@ -436,46 +403,36 @@ def visualize_tags(bib_database):
                        tag_counts[tag] += 1
    # Daten für Visualisierung aufbereiten
-    data_rows = [
+    data = [
-        {
+        {'Tag': tag, 'Count': count, 'Type': tag.split(':')[1].lower()}
            'Tag': tag,
            'Count': count,
            'Type': tag.split(':')[1].lower()
        }
        for tag, count in tag_counts.items()
        if count > 0
    ]
-    if not data_rows:
+    if not data:
        print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.")
        return
    df = pd.DataFrame(data_rows)
    df['TypeLabel'] = df['Type'].str.replace('-', ' ').str.title()
    total_count = df['Count'].sum()
    df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
    # Farbzuordnung
    color_map = {
        'zeitschriftenartikel': colors['primaryLine'],
        'konferenz-paper': colors['secondaryLine'],
        'buch': colors['depthArea'],
        'buchteil': colors['brightArea'],
-        'bericht': colors['accent'],
+        'bericht': colors['accent']
        'studienbrief': colors['positiveHighlight']
    }
    # Visualisierung erstellen
    total_count = sum(tag_counts.values())
    fig = px.bar(
-        df,
+        data,
        x='Tag',
        y='Count',
        title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})',
        labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'},
        color='Type',
        color_discrete_map=color_map,
-        text_auto=True,
+        text_auto=True
        custom_data=['TypeLabel', 'Percentage']
    )
    layout = get_standard_layout(
@ -491,17 +448,9 @@ def visualize_tags(bib_database):
    layout["xaxis"]["automargin"] = True
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Typ: %{customdata[0]}<br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata[1]:.1f}%<extra></extra>"
        )
    )
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_tags", export_fig_visualize_tags)
+    export_figure_local(fig, "visualize_tags", export_fig_visualize_tags, bib_filename)
 # Visualisierung 3: Häufigkeit Index
 def visualize_index(bib_database):
@ -529,21 +478,11 @@ def visualize_index(bib_database):
    index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()]
    index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True)
-    index_df = pd.DataFrame(index_data)
+    total_count = sum(index_counts.values())
    total_count = index_df['Count'].sum()
    index_df['Percentage'] = index_df['Count'] / total_count * 100 if total_count else 0
    print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):")
-    print(tabulate(index_df.to_dict('records'), headers="keys", tablefmt="grid"))
+    print(tabulate(index_data, headers="keys", tablefmt="grid"))
-    fig = px.bar(
+    fig = px.bar(index_data, x='Index', y='Count', title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})', labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
        index_df,
        x='Index',
        y='Count',
        title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})',
        labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'},
        text_auto=True,
        custom_data=['Percentage']
    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title='Index',
@ -558,15 +497,8 @@ def visualize_index(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(marker=plot_styles['balken_primaryLine'])
    fig.update_traces(
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata[0]:.1f}%<extra></extra>"
        )
    )
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_index", export_fig_visualize_index)
+    export_figure_local(fig, "visualize_index", export_fig_visualize_index, bib_filename)
 # Visualisierung 4: Häufigkeit Forschungsunterfragen
 def visualize_research_questions(bib_database):
@ -593,22 +525,13 @@ def visualize_research_questions(bib_database):
    rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()]
    rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True)
-    rq_data_df = pd.DataFrame(rq_data, columns=['Research_Question', 'Count'])
+    rq_data_df = pd.DataFrame(rq_data)
    total_count = rq_data_df['Count'].sum()
    rq_data_df['Percentage'] = rq_data_df['Count'] / total_count * 100 if total_count else 0
    print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):")
    print(tabulate(rq_data, headers="keys", tablefmt="grid"))
-    fig = px.bar(
+    fig = px.bar(rq_data_df, x='Research_Question', y='Count', title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})', labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
        rq_data_df,
        x='Research_Question',
        y='Count',
        title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})',
        labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'},
        text_auto=True,
        custom_data=['Percentage']
    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title='Forschungsunterfrage',
@ -623,15 +546,8 @@ def visualize_research_questions(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(marker=plot_styles['balken_primaryLine'])
    fig.update_traces(
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata[0]:.1f}%<extra></extra>"
        )
    )
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions)
+    export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions, bib_filename)
 # Visualisierung 5: Häufigkeit spezifischer Kategorien
 def visualize_categories(bib_database):
@ -653,22 +569,13 @@ def visualize_categories(bib_database):
    cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()]
    cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True)
-    cat_data_df = pd.DataFrame(cat_data, columns=['Category', 'Count'])
+    cat_data_df = pd.DataFrame(cat_data)
    total_count = cat_data_df['Count'].sum()
    cat_data_df['Percentage'] = cat_data_df['Count'] / total_count * 100 if total_count else 0
    print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):")
    print(tabulate(cat_data, headers="keys", tablefmt="grid"))
-    fig = px.bar(
+    fig = px.bar(cat_data_df, x='Category', y='Count', title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})', labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
        cat_data_df,
        x='Category',
        y='Count',
        title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})',
        labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'},
        text_auto=True,
        custom_data=['Percentage']
    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title='Kategorie',
@ -683,179 +590,8 @@ def visualize_categories(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(marker=plot_styles['balken_primaryLine'])
    fig.update_traces(
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata[0]:.1f}%<extra></extra>"
        )
    )
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_categories", export_fig_visualize_categories)
+    export_figure_local(fig, "visualize_categories", export_fig_visualize_categories, bib_filename)
 # Relevanz-Auswertungen
 def build_relevance_distribution(bib_database, tag_to_label):
    records = []
    for entry in bib_database.entries:
        keywords_raw = entry.get('keywords', '')
        if not keywords_raw:
            continue
        entry_keywords = set(map(str.lower, map(str.strip, keywords_raw.replace('\\#', '#').split(','))))
        relevance_level = extract_relevance_level(entry_keywords)
        if relevance_level is None:
            continue
        for tag, label in tag_to_label.items():
            if tag in entry_keywords:
                records.append({
                    'Kategorie': label,
                    'Relevanzstufe': RELEVANCE_LEVEL_LABELS[relevance_level]
                })
    if not records:
        return pd.DataFrame()
    df = pd.DataFrame(records)
    df = (
        df.groupby(['Kategorie', 'Relevanzstufe'])
        .size()
        .reset_index(name='Count')
    )
    df['Relevanzstufe'] = pd.Categorical(
        df['Relevanzstufe'],
        categories=[RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS],
        ordered=True
    )
    return df.sort_values(['Kategorie', 'Relevanzstufe'])
 def plot_relevance_distribution(df, title, x_title, export_flag, filename):
    if df.empty:
        print(f"⚠️ Keine Relevanzdaten verfügbar für: {title}")
        return
    total_count = df['Count'].sum()
    df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
    fig = px.bar(
        df,
        x='Kategorie',
        y='Count',
        color='Relevanzstufe',
        color_discrete_map=RELEVANCE_COLOR_MAP,
        category_orders={'Relevanzstufe': [RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS]},
        title=f"{title} (n={total_count}, Stand: {current_date})",
        labels={'Kategorie': x_title, 'Count': 'Anzahl', 'Relevanzstufe': 'Relevanzstufe'},
        custom_data=['Relevanzstufe', 'Percentage']
    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title=x_title,
        y_title='Anzahl'
    )
    layout['barmode'] = 'stack'
    layout['font'] = {"size": 14, "color": colors['text']}
    layout['title'] = {"font": {"size": 16}}
    layout['margin'] = dict(b=160, t=60, l=40, r=40)
    layout['xaxis'] = layout.get('xaxis', {})
    layout['xaxis']['tickangle'] = -45
    layout['xaxis']['automargin'] = True
    layout['autosize'] = True
    fig.update_layout(**layout)
    fig.update_traces(
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Relevanzstufe: %{customdata[0]}<br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata[1]:.1f}%<extra></extra>"
        )
    )
    fig.show(config={"responsive": True})
    export_figure_local(fig, filename, export_flag)
 def visualize_relevance_vs_research_questions(bib_database):
    research_questions = {
        'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)',
        'promotion:fu2a': 'Effekt für Lernende (FU2a)',
        'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)',
        'promotion:fu3': 'Konzeption und Merkmale (FU3)',
        'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)',
        'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)',
        'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)',
        'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)',
        'promotion:fu7': 'Inputs und Strategien (FU7)'
    }
    tag_to_label = {key.lower(): value for key, value in research_questions.items()}
    df = build_relevance_distribution(bib_database, tag_to_label)
    plot_relevance_distribution(
        df,
        "Relevanzverteilung nach Forschungsunterfragen",
        "Forschungsunterfragen",
        export_fig_visualize_relevance_fu,
        "visualize_relevance_fu"
    )
 def visualize_relevance_vs_categories(bib_database):
    categories = {
        'promotion:argumentation': 'Argumentation',
        'promotion:kerngedanke': 'Kerngedanke',
        'promotion:weiterführung': 'Weiterführung',
        'promotion:schlussfolgerung': 'Schlussfolgerung'
    }
    tag_to_label = {key.lower(): value for key, value in categories.items()}
    df = build_relevance_distribution(bib_database, tag_to_label)
    plot_relevance_distribution(
        df,
        "Relevanzverteilung nach Kategorien",
        "Kategorien",
        export_fig_visualize_relevance_categories,
        "visualize_relevance_categories"
    )
 def visualize_relevance_vs_search_terms(bib_database):
    search_terms = {
        '0': 'digital:learning',
        '1': 'learning:management:system',
        '2': 'online:lernplattform',
        '3': 'online:lernumgebung',
        '4': 'mooc',
        '5': 'e-learning',
        '6': 'bildung:technologie',
        '7': 'digital:medien',
        '8': 'blended:learning',
        '9': 'digital:lernen',
        'a': 'online:lernen',
        'b': 'online:learning'
    }
    types = [
        'Zeitschriftenartikel',
        'Buch',
        'Buchteil',
        'Bericht',
        'Konferenz-Paper',
        'Studienbrief'
    ]
    tag_to_label = {}
    for number, term in search_terms.items():
        for type_ in types:
            tag = f'#{number}:{type_}:{term}'.lower()
            tag_to_label[tag] = f"#{number}:{term}"
    df = build_relevance_distribution(bib_database, tag_to_label)
    plot_relevance_distribution(
        df,
        "Relevanzverteilung nach Suchbegriffen",
        "Suchbegriffe",
        export_fig_visualize_relevance_search_terms,
        "visualize_relevance_search_terms"
    )
 # Zeitreihenanalyse der Veröffentlichungen
 def visualize_time_series(bib_database):
@ -878,16 +614,13 @@ def visualize_time_series(bib_database):
    if publication_years:
        year_counts = Counter(publication_years)
        df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year')
        total_publications = df['Count'].sum()
        df['Percentage'] = df['Count'] / total_publications * 100 if total_publications else 0
        fig = px.line(
            df,
            x='Year',
            y='Count',
            title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})',
-            labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'},
+            labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'}
            custom_data=['Percentage']
        )
        layout = get_standard_layout(
            title=fig.layout.title.text,
@ -904,15 +637,8 @@ def visualize_time_series(bib_database):
        layout["autosize"] = True
        fig.update_layout(**layout)
        fig.update_traces(line=plot_styles['linie_primaryLine'])
        fig.update_traces(
            hovertemplate=(
                "<b>%{x}</b><br>"
                "Anzahl: %{y}<br>"
                "Anteil: %{customdata[0]:.1f}%<extra></extra>"
            )
        )
        fig.show(config={"responsive": True})
-        export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series)
+        export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series, bib_filename)
    else:
        print("Keine gültigen Veröffentlichungsjahre gefunden.")
@ -929,18 +655,8 @@ def visualize_top_authors(bib_database):
    top_authors = Counter(author_counts).most_common(top_n)
    if top_authors:
        df = pd.DataFrame(top_authors, columns=['Author', 'Count'])
        overall_total = sum(author_counts.values())
        df['Percentage'] = df['Count'] / overall_total * 100 if overall_total else 0
-        fig = px.bar(
+        fig = px.bar(df, x='Author', y='Count', title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={sum(author_counts.values())}, Stand: {current_date})', labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'}, text_auto=True)
            df,
            x='Author',
            y='Count',
            title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={overall_total}, Stand: {current_date})',
            labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'},
            text_auto=True,
            custom_data=['Percentage']
        )
        layout = get_standard_layout(
            title=fig.layout.title.text,
            x_title='Autor',
@ -955,18 +671,55 @@ def visualize_top_authors(bib_database):
        layout["autosize"] = True
        fig.update_layout(**layout)
        fig.update_traces(marker=plot_styles['balken_primaryLine'])
        fig.update_traces(
            hovertemplate=(
                "<b>%{x}</b><br>"
                "Anzahl: %{y}<br>"
                "Anteil: %{customdata[0]:.1f}%<extra></extra>"
            )
        )
        fig.show(config={"responsive": True})
-        export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors)
+        export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors, bib_filename)
    else:
        print("Keine Autoren gefunden.")
 # Top Titel nach Anzahl der Werke
 def normalize_title(title):
    # Entfernen von Sonderzeichen und Standardisierung auf Kleinbuchstaben
    title = title.lower().translate(str.maketrans('', '', ",.!?\"'()[]{}:;"))
    # Zusammenführen ähnlicher Titel, die sich nur in geringfügigen Details unterscheiden
    title = " ".join(title.split())
    # Entfernen häufiger Füllwörter oder Standardphrasen, die die Unterscheidung nicht unterstützen
    common_phrases = ['eine studie', 'untersuchung der', 'analyse von']
    for phrase in common_phrases:
        title = title.replace(phrase, '')
    return title.strip()
 def visualize_top_publications(bib_database):
    top_n = 25  # Anzahl der Top-Publikationen, die angezeigt werden sollen
    publication_counts = defaultdict(int)
    for entry in bib_database.entries:
        if 'title' in entry:
            title = normalize_title(entry['title'])
            publication_counts[title] += 1
    top_publications = sorted(publication_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]
    publication_data = [{'Title': title[:50] + '...' if len(title) > 50 else title, 'Count': count} for title, count in top_publications]
    df = pd.DataFrame(publication_data)
    fig = px.bar(df, x='Title', y='Count', title=f'Häufig zitierte Publikationen in der Analyse (Top {top_n}, n={sum(publication_counts.values())}, Stand: {current_date})', labels={'Title': 'Titel', 'Count': 'Anzahl der Nennungen'})
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title='Titel',
        y_title='Anzahl der Nennungen'
    )
    layout["font"] = {"size": 14, "color": colors['text']}
    layout["title"] = {"font": {"size": 16}}
    layout["margin"] = dict(b=160, t=60, l=40, r=40)
    layout["xaxis"] = layout.get("xaxis", {})
    layout["xaxis"]["tickangle"] = -45
    layout["xaxis"]["automargin"] = True
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(marker=plot_styles['balken_primaryLine'])
    fig.show(config={"responsive": True})
    export_figure_local(fig, "visualize_top_publications", export_fig_visualize_top_publications, bib_filename)
 ##########
 # Daten vorbereiten
@ -1008,8 +761,7 @@ def prepare_path_data(bib_database):
        'Buch',
        'Buchteil',
        'Bericht',
-        'Konferenz-Paper',
+        'Konferenz-Paper'
        'Studienbrief'
    ]
    data = []
@ -1048,14 +800,12 @@ def create_path_diagram(data):
    sources = []
    targets = []
    values = []
    node_counts = Counter()
    color_map = {
        'zeitschriftenartikel': colors['primaryLine'],
        'konferenz-paper': colors['secondaryLine'],
        'buch': colors['depthArea'],
        'buchteil': colors['brightArea'],
-        'bericht': colors['accent'],
+        'bericht': colors['accent']
        'studienbrief': colors['positiveHighlight']
    }
    def add_to_labels(label):
@ -1072,19 +822,8 @@ def create_path_diagram(data):
        sources.extend([fu_idx, category_idx, index_idx])
        targets.extend([category_idx, index_idx, type_idx])
        values.extend([1, 1, 1])
        node_counts.update([entry['FU'], entry['Category'], entry['Index'], entry['Type']])
    node_colors = [color_map.get(label, colors['primaryLine']) for label in labels]
    total_paths = len(data)
    total_flows = sum(values)
    node_percentages = [
        node_counts.get(label, 0) / total_paths * 100 if total_paths else 0
        for label in labels
    ]
    link_percentages = [
        value / total_flows * 100 if total_flows else 0
        for value in values
    ]
    fig = go.Figure(data=[go.Sankey(
        node=dict(
@ -1092,24 +831,12 @@ def create_path_diagram(data):
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels,
-            color=node_colors,
+            color=node_colors
            customdata=node_percentages,
            hovertemplate=(
                "%{label}<br>"
                "Anzahl: %{value}<br>"
                "Anteil der Pfade: %{customdata:.1f}%<extra></extra>"
            )
        ),
        link=dict(
            source=sources,
            target=targets,
-            value=values,
+            value=values
            customdata=link_percentages,
            hovertemplate=(
                "%{source.label} → %{target.label}<br>"
                "Anzahl: %{value}<br>"
                "Anteil der Verbindungen: %{customdata:.1f}%<extra></extra>"
            )
        )
    )])
    layout = get_standard_layout(
@ -1123,7 +850,7 @@ def create_path_diagram(data):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram)
+    export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram, bib_filename)
 #############
@ -1227,54 +954,22 @@ def create_sankey_diagram(bib_database):
        colors['positiveHighlight']     # Ausgewählte Quellen
    ]
    node_values = [
        initial_sources,
        screened_sources,
        quality_sources,
        relevance_sources,
        thematic_sources,
        recent_sources,
        classic_sources,
        selected_sources
    ]
    node_percentages = [
        value / initial_sources * 100 if initial_sources else 0
        for value in node_values
    ]
    link_percentages = [
        value / initial_sources * 100 if initial_sources else 0
        for value in values
    ]
    # Sankey-Diagramm erstellen
    node_config = {
        **plot_styles["sankey_node"],
        "label": node_labels,
-        "color": node_colors,
+        "color": node_colors
        "customdata": node_percentages,
        "hovertemplate": (
            "%{label}<br>"
            "Anzahl: %{value}<br>"
            "Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
        )
    }
    # Remove any invalid 'font' key if present
    node_config.pop("font", None)
    link_config = {
        **plot_styles["sankey_link"],
        "source": sources,
        "target": targets,
        "value": values,
        "customdata": link_percentages,
        "hovertemplate": (
            "%{source.label} → %{target.label}<br>"
            "Anzahl: %{value}<br>"
            "Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
        )
    }
    fig = go.Figure(go.Sankey(
        node=node_config,
-        link=link_config
+        link=dict(
            **plot_styles["sankey_link"],
            source=sources,
            target=targets,
            value=values
        )
    ))
    # Layout anpassen
    layout = get_standard_layout(
@ -1288,7 +983,7 @@ def create_sankey_diagram(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram)
+    export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram, bib_filename)
 ##########
@ -1306,33 +1001,31 @@ def visualize_sources_status(bib_database):
    """
    Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner.
    """
    search_terms = {
        '0': 'digital:learning',
        '1': 'learning:management:system',
        '2': 'online:lernplattform',
        '3': 'online:lernumgebung',
        '4': 'mooc',
        '5': 'e-learning',
        '6': 'bildung:technologie',
        '7': 'digital:medien',
        '8': 'blended:learning',
        '9': 'digital:lernen',
        'a': 'online:lernen',
        'b': 'online:learning'
    }
    numbers_order = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b']
    type_order = [
        'Zeitschriftenartikel',
        'Buch',
        'Buchteil',
        'Bericht',
        'Konferenz-Paper',
        'Studienbrief'
    ]
    search_folder_tags = [
-        f"#{number}:{type_}:{search_terms[number]}".lower()
+        "#1:zeitschriftenartikel:learning:management:system",
-        for type_ in type_order
+        "#2:zeitschriftenartikel:online:lernplattform",
-        for number in numbers_order
+        "#3:zeitschriftenartikel:online:lernumgebung",
        "#4:zeitschriftenartikel:mooc",
        "#5:zeitschriftenartikel:e-learning",
        "#6:zeitschriftenartikel:bildung:technologie",
        "#7:zeitschriftenartikel:digital:medien",
        "#8:zeitschriftenartikel:blended:learning",
        "#9:zeitschriftenartikel:digital:lernen",
        "#a:zeitschriftenartikel:online:lernen",
        "#b:zeitschriftenartikel:online:learning",
        "#0:zeitschriftenartikel:digital:learning",
        "#1:konferenz-paper:learning:management:system",
        "#2:konferenz-paper:online:lernplattform",
        "#3:konferenz-paper:online:lernumgebung",
        "#4:konferenz-paper:mooc",
        "#5:konferenz-paper:e-learning",
        "#6:konferenz-paper:bildung:technologie",
        "#7:konferenz-paper:digital:medien",
        "#8:konferenz-paper:blended:learning",
        "#9:konferenz-paper:digital:lernen",
        "#a:konferenz-paper:online:lernen",
        "#b:konferenz-paper:online:learning",
        "#0:konferenz-paper:digital:learning"
    ]
    category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"}
@ -1387,45 +1080,21 @@ def visualize_sources_status(bib_database):
        tablefmt='grid'
    ))
    total_identifiziert = sum(counts["Identifiziert"] for counts in source_data.values())
    analysiert_percentages = [
        value / total_identifiziert * 100 if total_identifiziert else 0
        for value in analysiert_values
    ]
    nicht_analysiert_percentages = [
        value / total_identifiziert * 100 if total_identifiziert else 0
        for value in nicht_analysiert_values
    ]
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=tags,
        y=analysiert_values,
        name='Analysiert',
-        marker=dict(color=analysiert_colors),
+        marker=dict(color=analysiert_colors)
        customdata=analysiert_percentages,
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Status: Analysiert<br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata:.1f}%<extra></extra>"
        )
    ))
    fig.add_trace(go.Bar(
        x=tags,
        y=nicht_analysiert_values,
        name='Nicht-Analysiert',
-        marker=plot_styles['balken_primaryLine'],
+        marker=plot_styles['balken_primaryLine']
        customdata=nicht_analysiert_percentages,
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Status: Nicht-Analysiert<br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata:.1f}%<extra></extra>"
        )
    ))
    layout = get_standard_layout(
-        title=f'Analyse- und Stichprobenstatus je Suchordner (n={total_identifiziert}, Stand: {current_date})',
+        title=f'Analyse- und Stichprobenstatus je Suchordner (n={sum(counts["Identifiziert"] for counts in source_data.values())}, Stand: {current_date})',
        x_title='Suchbegriffsordner',
        y_title='Anzahl der Quellen'
    )
@ -1442,7 +1111,7 @@ def visualize_sources_status(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status)
+    export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status, bib_filename)
 #############
@ -1528,8 +1197,8 @@ def visualize_languages(bib_database):
        color='Gruppe',
        color_discrete_map=color_discrete_map,
        title=f'Sprachverteilung der analysierten Quellen (n={sum(norm_counts.values())}, Stand: {current_date})',
-        barmode="stack",
+        hover_data=["Sprache", "Gruppe", "Anzahl", "Anteil (%)"],
-        custom_data=['Gruppe', 'Anteil (%)']
+        barmode="stack"
    )
    layout = get_standard_layout(
@ -1544,18 +1213,10 @@ def visualize_languages(bib_database):
    # Ergänzung: Y-Achse logarithmisch skalieren
    layout["yaxis_type"] = "log"
    fig.update_layout(**layout)
    fig.update_traces(
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Sprachgruppe: %{customdata[0]}<br>"
            "Anzahl: %{y}<br>"
            "Anteil: %{customdata[1]:.2f}%<extra></extra>"
        )
    )
    fig.show(config={"responsive": True})
    # Tabelle ausgeben
    print(tabulate(df.sort_values("Anzahl", ascending=False), headers="keys", tablefmt="grid", showindex=False))
-    export_figure_local(fig, "visualize_languages", export_fig_visualize_languages)
+    export_figure_local(fig, "visualize_languages", export_fig_visualize_languages, bib_filename)
 # Visualisierung der Verteilung von ENTRYTYPE innerhalb jeder Sprache
 def visualize_language_entrytypes(bib_database):
@ -1605,8 +1266,6 @@ def visualize_language_entrytypes(bib_database):
    grouped.rename(columns={'ENTRYTYPE': 'Eintragstyp'}, inplace=True)
    # Anteil innerhalb Sprache (%)
    grouped["Anteil innerhalb Sprache (%)"] = grouped.groupby("Sprache")["Anzahl"].transform(lambda x: (x / x.sum() * 100).round(2))
    total_entrytypes = grouped['Anzahl'].sum()
    grouped["Anteil Gesamt (%)"] = grouped['Anzahl'] / total_entrytypes * 100 if total_entrytypes else 0
    # Mapping Eintragstyp zu Typgruppe
    eintragstyp_gruppen = {
@ -1643,8 +1302,7 @@ def visualize_language_entrytypes(bib_database):
        barmode="group",
        title=f'Verteilung der Eintragstypen pro Sprache (n={len(df)}, Stand: {current_date})',
        text='Anzahl',
-        labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'},
+        labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'}
        custom_data=['Eintragstyp', 'Typgruppe', 'Anteil Gesamt (%)', 'Anteil innerhalb Sprache (%)']
    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
@ -1658,19 +1316,9 @@ def visualize_language_entrytypes(bib_database):
    # Ergänzung: Y-Achse logarithmisch skalieren
    layout["yaxis_type"] = "log"
    fig.update_layout(**layout)
    fig.update_traces(
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Eintragstyp: %{customdata[0]}<br>"
            "Typgruppe: %{customdata[1]}<br>"
            "Anzahl: %{y}<br>"
            "Anteil gesamt: %{customdata[2]:.2f}%<br>"
            "Anteil innerhalb Sprache: %{customdata[3]:.2f}%<extra></extra>"
        )
    )
    fig.show(config={"responsive": True})
    print(tabulate(grouped.sort_values(["Sprache", "Eintragstyp"]), headers=["Sprache", "Eintragstyp", "Anzahl", "Anteil innerhalb Sprache (%)", "Typgruppe"], tablefmt="grid", showindex=False))
-    export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages)
+    export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages, bib_filename)
 #############
@ -1713,11 +1361,9 @@ visualize_tags(bib_database)
 visualize_index(bib_database)
 visualize_research_questions(bib_database)
 visualize_categories(bib_database)
 visualize_relevance_vs_research_questions(bib_database)
 visualize_relevance_vs_categories(bib_database)
 visualize_relevance_vs_search_terms(bib_database)
 visualize_time_series(bib_database)
 visualize_top_authors(bib_database)
 visualize_top_publications(bib_database)
 data = prepare_path_data(bib_database)
 create_path_diagram(data)
 create_sankey_diagram(bib_database)
--- a/cleaned_Literaturverzeichnis.bib
+++ b/cleaned_Literaturverzeichnis.bib
--- a/deskriptive-literaturauswahl.py
+++ b/deskriptive-literaturauswahl.py
@ -218,13 +218,14 @@ layout["legend"] = dict(
    itemdoubleclick="toggle"
 )
 layout["yaxis3"] = dict(
-    title=dict(text="Abweichung (ΔSCₙ)", font=dict(color=colors["text"])),
+    title="Abweichung (ΔSCₙ)",
    overlaying="y",
    side="right",
    showgrid=False,
    zeroline=True,
    zerolinewidth=2,
    zerolinecolor='grey',
    titlefont=dict(color=colors["text"]),
    tickfont=dict(color=colors["text"]),
    anchor="free",
    position=1.0