Promotion: Suchergebnisse optimieren

Promotion: Bibliothek aktualisiert und in Netzwerkanalyse prozentualen Anteil hinzugefügt
Promotion:
2025-10-19 23:44:33 +02:00 · 2025-10-19 16:37:27 +02:00 · 2025-10-11 20:26:08 +02:00 · 2025-10-11 18:21:17 +02:00 · 2025-10-11 17:57:54 +02:00 · 2025-10-06 00:46:52 +02:00
6 changed files with 139840 additions and 3859 deletions
--- a/Suchergebnisse.bib
+++ b/Suchergebnisse.bib
--- a/Bibliothek/cleaned_Literaturverzeichnis.bib
+++ b/Bibliothek/cleaned_Literaturverzeichnis.bib
--- a/analyse_korrelation.py
+++ b/analyse_korrelation.py
@ -1,4 +1,3 @@
-
 import os

 # Neue Exportfunktion: HTML in /tmp speichern, per SCP übertragen, PNG lokal speichern
@ -289,6 +288,7 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
                            abs_corr = abs(corr)
                            significance = 'Signifikant' if p_value < 0.05 else 'Nicht signifikant'
                            hover_color = colors['brightArea'] if p_value < 0.05 else colors['depthArea']
+                            cooccurrence_count = int(((df[x_term] == 1) & (df[y_term] == 1)).sum())
                            correlations.append({
                                'x_term': x_term,
                                'y_term': y_term,
@ -297,6 +297,8 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
                                'p_value': p_value,
                                'significance': significance,
                                'hover_color': hover_color,
+                                'n_observations': int(len(x_valid)),
+                                'cooccurrence_count': cooccurrence_count,
                                'interpretation': (
                                    f"Die Korrelation zwischen '{x_term}' und '{y_term}' beträgt {corr:.2f}. "
                                    f"p-Wert: {p_value:.3e} ({significance})"
@ -362,12 +364,25 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
            line=dict(width=1, color=colors['background'])
        ),
        hovertemplate=(
-            '<b>%{customdata[0]}</b><br>'
+            '<b>%{customdata[0]}</b> ↔ <b>%{customdata[1]}</b><br>'
            'Korrelation: %{marker.color:.2f}<br>'
-            'p-Wert: %{customdata[1]:.3e}<br>'
-            'Signifikanz: %{customdata[2]}'
+            'p-Wert: %{customdata[3]:.3e}<br>'
+            'Signifikanz: %{customdata[4]}<br>'
+            'Stichprobe (n): %{customdata[5]}<br>'
+            'Gemeinsame Treffer: %{customdata[6]}<br>'
+            '%{customdata[7]}'
+            '<extra></extra>'
        ),
-        customdata=correlation_df[['x_term', 'p_value', 'significance']].to_numpy()
+        customdata=np.array(list(zip(
+            correlation_df['x_term'],
+            correlation_df['y_term'],
+            correlation_df['correlation'],
+            correlation_df['p_value'],
+            correlation_df['significance'],
+            correlation_df['n_observations'],
+            correlation_df['cooccurrence_count'],
+            correlation_df['interpretation']
+        )), dtype=object)
    )

    # Standardlayout verwenden und ggf. ergänzen, Margin dynamisch für Responsivität
@ -377,6 +392,7 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
            x_title=x_label,
            y_title=y_label
        ),
+        hovermode='closest',
        xaxis=dict(
            tickangle=-45,
            automargin=True
@ -491,11 +507,6 @@ df['X_Dimension'] = df[[tag for tag in tags_to_search_processed if tag in df.col
 df['Y_Dimension'] = df[[cat for cat in categories_processed if cat in df.columns]].sum(axis=1)
 df['Z_Dimension'] = df[[rq for rq in research_questions_processed if rq in df.columns]].sum(axis=1)

-# Clusteranalyse mit K-Means basierend auf den deduktiven Dimensionen
-features = df[['X_Dimension', 'Y_Dimension', 'Z_Dimension']]
-scaler = StandardScaler()
-scaled_features = scaler.fit_transform(features)
-
 # Clusteranalyse mit K-Means basierend auf den deduktiven Dimensionen
 # Prüfung auf konstante deduktive Dimensionen
 if df[['X_Dimension', 'Y_Dimension', 'Z_Dimension']].nunique().eq(1).all():
@ -568,6 +579,19 @@ for cluster in cluster_means.index:

 # Statische Cluster-Beschriftungen in den DataFrame einfügen
 df['Cluster_Label'] = df['KMeans_Cluster'].map(cluster_labels)
+df['Cluster_Label'] = df['Cluster_Label'].fillna(df['KMeans_Cluster'])
+
+# Farbzuordnung für die Clusterlabels aus den CI-Farben ableiten
+fallback_color = cluster_colors.get("0", colors.get('primaryLine', '#1f77b4'))
+color_map = {}
+for cluster_key, label in cluster_labels.items():
+    base_color = cluster_colors.get(str(cluster_key), fallback_color)
+    color_map[label] = base_color
+
+# Sicherstellen, dass auch eventuelle Restlabels (z.B. "Nicht gültig") erfasst werden
+for label in df['Cluster_Label'].dropna().unique():
+    if label not in color_map:
+        color_map[label] = cluster_colors.get(str(label), fallback_color)

 # Ausgabe der statischen Cluster-Beschriftungen
 print("Cluster-Beschriftungen (inhaltlich):")
@ -584,7 +608,7 @@ fig_cluster = px.scatter_3d(
    color='Cluster_Label',
    size='Point_Size',
    size_max=100,
-    color_discrete_sequence=list(cluster_colors.values()),
+    color_discrete_map=color_map,
    hover_data={
        'Cluster_Label': True,
        'X_Dimension': True,
@ -602,7 +626,6 @@ fig_cluster = px.scatter_3d(
    }
 )

-
 # Layout mit Standardlayout und konsistenten CI-konformen Ergänzungen
 layout_cluster = get_standard_layout(
    title=plot_title,
@ -687,6 +710,7 @@ correlation_quality_results = {
    "Forschungsunterfragen & Kategorien": analyze_correlation_quality(df, research_questions_processed, categories_processed),
    "Forschungsunterfragen & Suchbegriffe": analyze_correlation_quality(df, research_questions_processed, tags_to_search_processed),
    "Forschungsunterfragen & Indizes": analyze_correlation_quality(df, research_questions_processed, index_terms_processed),
+    "Forschungsunterfragen & Forschungsunterfragen": analyze_correlation_quality(df, research_questions_processed, research_questions_processed),
    "Indizes & Kategorien": analyze_correlation_quality(df, index_terms_processed, categories_processed),
    "Indizes & Suchbegriffe": analyze_correlation_quality(df, index_terms_processed, tags_to_search_processed),
    "Suchbegriffe & Kategorien": analyze_correlation_quality(df, tags_to_search_processed, categories_processed),
@ -753,8 +777,17 @@ def plot_average_correlation_plotly(summary_df):
    )
    # PNG-Export ergänzen
    png_path = os.path.join(export_path_png, f"{slugify('summary_plot_' + global_bib_filename.replace('.bib', ''))}.png")
-    fig.write_image(png_path, width=1200, height=800, scale=2)
-    print(f"✅ PNG-Summary-Datei gespeichert unter: {png_path}")
+    try:
+        fig.write_image(png_path, width=1200, height=800, scale=2)
+        print(f"✅ PNG-Summary-Datei gespeichert unter: {png_path}")
+    except ValueError as err:
+        if "kaleido" in str(err).lower():
+            print("⚠️ PNG-Export übersprungen: Plotly benötigt das Paket 'kaleido'.")
+            print("   Installation (falls gewünscht): pip install -U kaleido")
+        else:
+            print(f"⚠️ PNG-Export fehlgeschlagen: {err}")
+    except Exception as err:
+        print(f"⚠️ PNG-Export fehlgeschlagen: {err}")

 #============================
 # Aufruf Alle möglichen bivariaten Korrelationen visualisieren
@ -773,4 +806,4 @@ plot_average_correlation_plotly(summary_df)

 # Visualisierungsoption für Plotly: Immer im Browser öffnen
 import plotly.io as pio
-pio.renderers.default = 'browser'
+pio.renderers.default = 'browser'
--- a/analyse_netzwerk.py
+++ b/analyse_netzwerk.py
@ -1,7 +1,6 @@

 from config_netzwerk import theme, export_fig_visual, bib_filename

-
 import os

 # Clear the terminal
@ -60,11 +59,14 @@ from config_netzwerk import (
    export_fig_visualize_sources_status,
    export_fig_create_wordcloud_from_titles,
    export_fig_visualize_languages,
+    export_fig_visualize_relevance_fu,
+    export_fig_visualize_relevance_categories,
+    export_fig_visualize_relevance_search_terms,
 )

 from config_netzwerk import export_fig_png

-def export_figure_local(fig, name, flag, bib_filename=None):
+def export_figure_local(fig, name, flag):
    from config_netzwerk import export_path_html, export_path_png
    # Einmalige Definition von safe_filename am Anfang der Funktion
    safe_filename = prepare_figure_export(fig, name).replace(".html", "")
@ -100,6 +102,23 @@ word_colors = [
    colors["negativeHighlight"]
 ]

+# Relevanz-Stufen (1 = gering, 5 = sehr hoch)
+RELEVANCE_LEVELS = [5, 4, 3, 2, 1]
+RELEVANCE_LEVEL_LABELS = {
+    5: "Relevanz 5",
+    4: "Relevanz 4",
+    3: "Relevanz 3",
+    2: "Relevanz 2",
+    1: "Relevanz 1",
+}
+RELEVANCE_COLOR_MAP = {
+    "Relevanz 5": colors['positiveHighlight'],
+    "Relevanz 4": colors['accent'],
+    "Relevanz 3": colors['brightArea'],
+    "Relevanz 2": colors['depthArea'],
+    "Relevanz 1": colors['negativeHighlight'],
+}
+
 # Aktuelles Datum
 current_date = datetime.now().strftime("%Y-%m-%d")

@ -120,6 +139,13 @@ with open('en_complete.txt', 'r', encoding='utf-8') as file:
 # Kombinierte Stoppliste
 stop_words = stop_words_de.union(stop_words_en)

+# Hilfsfunktion: Relevanzstufe aus Keywords extrahieren
+def extract_relevance_level(entry_keywords):
+    for level in RELEVANCE_LEVELS:
+        if f'promotion:relevanz:{level}' in entry_keywords:
+            return level
+    return None
+
 # Funktion zur Berechnung der Stichprobengröße
 def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05):
    n_0 = (Z**2 * p * (1 - p)) / (e**2)
@ -149,8 +175,10 @@ def visualize_network(bib_database):
        'Buch',
        'Buchteil',
        'Bericht',
-        'Konferenz-Paper'
+        'Konferenz-Paper',
+        'Studienbrief'
    ]
+    
    tags_to_search = set()
    for number, type_ in product(numbers, types):
        search_term = search_terms[number]
@ -166,13 +194,6 @@ def visualize_network(bib_database):
                    if tag in keyword:
                        tag_counts[tag] += 1

-    fundzahlen = defaultdict(int)
-    for tag, count in tag_counts.items():
-        search_term = tag.split(':')[-1]
-        for key, value in search_terms.items():
-            if search_term == value:
-                fundzahlen[value] += count
-
    search_terms_network = {
        "Primäre Begriffe": {
            "learning:management:system": [
@ -181,7 +202,7 @@ def visualize_network(bib_database):
                "online:lernplattform",
                "online:lernumgebung",
                "digital:learning",
-                "digitales:lernen"
+                "digital:lernen"
            ]
        },
        "Sekundäre Begriffe": {
@ -191,15 +212,15 @@ def visualize_network(bib_database):
            ],
            "bildung:technologie": [
                "digital:learning",
-                "digitales:lernen",
+                "digital:lernen",
                "blended:learning"
            ],
            "digital:learning": [
-                "digitale:medien",
+                "digital:medien",
                "online:learning"
            ],
-            "digitales:lernen": [
-                "digitale:medien",
+            "digital:lernen": [
+                "digital:medien",
                "online:lernen"
            ],
            "blended:learning": ["mooc"]
@ -210,6 +231,14 @@ def visualize_network(bib_database):
        }
    }

+    # Fundzählung exakt entlang der search_terms-Definition
+    fundzahlen = defaultdict(int)
+
+    for number, suchbegriff in search_terms.items():
+        for typ in types:
+            tag = f'#{number}:{typ}:{suchbegriff}'.lower()
+            fundzahlen[suchbegriff.lower()] += tag_counts.get(tag, 0)
+
    G = nx.Graph()

    hierarchy_colors = {
@ -285,11 +314,15 @@ def visualize_network(bib_database):
    secondary_nodes = []
    tertiary_nodes = []

+    total_fundzahlen = sum(fundzahlen.values())
+
    for node in G.nodes():
        color = G.nodes[node]['color']
        size = math.log(G.nodes[node].get('size', 10) + 1) * 10
        x, y = pos[node]
-        hovertext = f"{node}<br>Anzahl Funde: {fundzahlen.get(node, 0)}"
+        count = fundzahlen.get(node, 0)
+        percentage = (count / total_fundzahlen * 100) if total_fundzahlen else 0
+        hovertext = f"{node}<br>Anzahl Funde: {count}<br>Anteil: {percentage:.1f}%"
        node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext)
        if color == colors['primaryLine']:
            primary_nodes.append(node_data)
@ -330,7 +363,7 @@ def visualize_network(bib_database):

    fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace])
    layout = get_standard_layout(
-        title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={sum(fundzahlen.values())}, Stand: {current_date})",
+        title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={total_fundzahlen}, Stand: {current_date})",
        x_title="Technologische Dimension",
        y_title="Pädagogische Dimension"
    )
@ -342,7 +375,7 @@ def visualize_network(bib_database):
    fig.update_layout(**layout)

    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_network", export_fig_visualize_network, bib_filename)
+    export_figure_local(fig, "visualize_network", export_fig_visualize_network)

    # Einfache Pfadanalyse nach dem Anzeigen der Figur
    if 'e-learning' in G and 'online:lernen' in G:
@ -377,10 +410,11 @@ def visualize_tags(bib_database):
        'Buch',
        'Buchteil',
        'Bericht',
-        'Konferenz-Paper'
+        'Konferenz-Paper',
+        'Studienbrief'
    ]
    tags_to_search = set(
-        f"#{number}:{type_}:{search_terms[number]}"
+        f"#{number}:{type_}:{search_terms[number]}".lower()
        for number, type_ in product(numbers, types)
    )

@ -402,36 +436,46 @@ def visualize_tags(bib_database):
                        tag_counts[tag] += 1

    # Daten für Visualisierung aufbereiten
-    data = [
-        {'Tag': tag, 'Count': count, 'Type': tag.split(':')[1].lower()}
+    data_rows = [
+        {
+            'Tag': tag,
+            'Count': count,
+            'Type': tag.split(':')[1].lower()
+        }
        for tag, count in tag_counts.items()
        if count > 0
    ]

-    if not data:
+    if not data_rows:
        print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.")
        return

+    df = pd.DataFrame(data_rows)
+    df['TypeLabel'] = df['Type'].str.replace('-', ' ').str.title()
+    total_count = df['Count'].sum()
+    df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
+
    # Farbzuordnung
    color_map = {
        'zeitschriftenartikel': colors['primaryLine'],
        'konferenz-paper': colors['secondaryLine'],
        'buch': colors['depthArea'],
        'buchteil': colors['brightArea'],
-        'bericht': colors['accent']
+        'bericht': colors['accent'],
+        'studienbrief': colors['positiveHighlight']
    }

    # Visualisierung erstellen
-    total_count = sum(tag_counts.values())
    fig = px.bar(
-        data,
+        df,
        x='Tag',
        y='Count',
        title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})',
        labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'},
        color='Type',
        color_discrete_map=color_map,
-        text_auto=True
+        text_auto=True,
+        custom_data=['TypeLabel', 'Percentage']
    )

    layout = get_standard_layout(
@ -447,9 +491,17 @@ def visualize_tags(bib_database):
    layout["xaxis"]["automargin"] = True
    layout["autosize"] = True
    fig.update_layout(**layout)
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Typ: %{customdata[0]}<br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata[1]:.1f}%<extra></extra>"
+        )
+    )

    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_tags", export_fig_visualize_tags, bib_filename)
+    export_figure_local(fig, "visualize_tags", export_fig_visualize_tags)

 # Visualisierung 3: Häufigkeit Index
 def visualize_index(bib_database):
@ -477,11 +529,21 @@ def visualize_index(bib_database):
    index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()]
    index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True)

-    total_count = sum(index_counts.values())
+    index_df = pd.DataFrame(index_data)
+    total_count = index_df['Count'].sum()
+    index_df['Percentage'] = index_df['Count'] / total_count * 100 if total_count else 0
    print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):")
-    print(tabulate(index_data, headers="keys", tablefmt="grid"))
+    print(tabulate(index_df.to_dict('records'), headers="keys", tablefmt="grid"))

-    fig = px.bar(index_data, x='Index', y='Count', title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})', labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
+    fig = px.bar(
+        index_df,
+        x='Index',
+        y='Count',
+        title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})',
+        labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'},
+        text_auto=True,
+        custom_data=['Percentage']
+    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title='Index',
@ -496,8 +558,15 @@ def visualize_index(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(marker=plot_styles['balken_primaryLine'])
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata[0]:.1f}%<extra></extra>"
+        )
+    )
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_index", export_fig_visualize_index, bib_filename)
+    export_figure_local(fig, "visualize_index", export_fig_visualize_index)

 # Visualisierung 4: Häufigkeit Forschungsunterfragen
 def visualize_research_questions(bib_database):
@ -524,13 +593,22 @@ def visualize_research_questions(bib_database):
    rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()]
    rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True)

-    rq_data_df = pd.DataFrame(rq_data)
+    rq_data_df = pd.DataFrame(rq_data, columns=['Research_Question', 'Count'])

    total_count = rq_data_df['Count'].sum()
+    rq_data_df['Percentage'] = rq_data_df['Count'] / total_count * 100 if total_count else 0
    print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):")
    print(tabulate(rq_data, headers="keys", tablefmt="grid"))

-    fig = px.bar(rq_data_df, x='Research_Question', y='Count', title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})', labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
+    fig = px.bar(
+        rq_data_df,
+        x='Research_Question',
+        y='Count',
+        title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})',
+        labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'},
+        text_auto=True,
+        custom_data=['Percentage']
+    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title='Forschungsunterfrage',
@ -545,8 +623,15 @@ def visualize_research_questions(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(marker=plot_styles['balken_primaryLine'])
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata[0]:.1f}%<extra></extra>"
+        )
+    )
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions, bib_filename)
+    export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions)

 # Visualisierung 5: Häufigkeit spezifischer Kategorien
 def visualize_categories(bib_database):
@ -568,13 +653,22 @@ def visualize_categories(bib_database):
    cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()]
    cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True)

-    cat_data_df = pd.DataFrame(cat_data)
+    cat_data_df = pd.DataFrame(cat_data, columns=['Category', 'Count'])

    total_count = cat_data_df['Count'].sum()
+    cat_data_df['Percentage'] = cat_data_df['Count'] / total_count * 100 if total_count else 0
    print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):")
    print(tabulate(cat_data, headers="keys", tablefmt="grid"))

-    fig = px.bar(cat_data_df, x='Category', y='Count', title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})', labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
+    fig = px.bar(
+        cat_data_df,
+        x='Category',
+        y='Count',
+        title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})',
+        labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'},
+        text_auto=True,
+        custom_data=['Percentage']
+    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
        x_title='Kategorie',
@ -589,8 +683,179 @@ def visualize_categories(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.update_traces(marker=plot_styles['balken_primaryLine'])
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata[0]:.1f}%<extra></extra>"
+        )
+    )
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_categories", export_fig_visualize_categories, bib_filename)
+    export_figure_local(fig, "visualize_categories", export_fig_visualize_categories)
+
+# Relevanz-Auswertungen
+def build_relevance_distribution(bib_database, tag_to_label):
+    records = []
+
+    for entry in bib_database.entries:
+        keywords_raw = entry.get('keywords', '')
+        if not keywords_raw:
+            continue
+
+        entry_keywords = set(map(str.lower, map(str.strip, keywords_raw.replace('\\#', '#').split(','))))
+        relevance_level = extract_relevance_level(entry_keywords)
+        if relevance_level is None:
+            continue
+
+        for tag, label in tag_to_label.items():
+            if tag in entry_keywords:
+                records.append({
+                    'Kategorie': label,
+                    'Relevanzstufe': RELEVANCE_LEVEL_LABELS[relevance_level]
+                })
+
+    if not records:
+        return pd.DataFrame()
+
+    df = pd.DataFrame(records)
+    df = (
+        df.groupby(['Kategorie', 'Relevanzstufe'])
+        .size()
+        .reset_index(name='Count')
+    )
+    df['Relevanzstufe'] = pd.Categorical(
+        df['Relevanzstufe'],
+        categories=[RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS],
+        ordered=True
+    )
+    return df.sort_values(['Kategorie', 'Relevanzstufe'])
+
+
+def plot_relevance_distribution(df, title, x_title, export_flag, filename):
+    if df.empty:
+        print(f"⚠️ Keine Relevanzdaten verfügbar für: {title}")
+        return
+
+    total_count = df['Count'].sum()
+    df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
+    fig = px.bar(
+        df,
+        x='Kategorie',
+        y='Count',
+        color='Relevanzstufe',
+        color_discrete_map=RELEVANCE_COLOR_MAP,
+        category_orders={'Relevanzstufe': [RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS]},
+        title=f"{title} (n={total_count}, Stand: {current_date})",
+        labels={'Kategorie': x_title, 'Count': 'Anzahl', 'Relevanzstufe': 'Relevanzstufe'},
+        custom_data=['Relevanzstufe', 'Percentage']
+    )
+
+    layout = get_standard_layout(
+        title=fig.layout.title.text,
+        x_title=x_title,
+        y_title='Anzahl'
+    )
+    layout['barmode'] = 'stack'
+    layout['font'] = {"size": 14, "color": colors['text']}
+    layout['title'] = {"font": {"size": 16}}
+    layout['margin'] = dict(b=160, t=60, l=40, r=40)
+    layout['xaxis'] = layout.get('xaxis', {})
+    layout['xaxis']['tickangle'] = -45
+    layout['xaxis']['automargin'] = True
+    layout['autosize'] = True
+    fig.update_layout(**layout)
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Relevanzstufe: %{customdata[0]}<br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata[1]:.1f}%<extra></extra>"
+        )
+    )
+
+    fig.show(config={"responsive": True})
+    export_figure_local(fig, filename, export_flag)
+
+
+def visualize_relevance_vs_research_questions(bib_database):
+    research_questions = {
+        'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)',
+        'promotion:fu2a': 'Effekt für Lernende (FU2a)',
+        'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)',
+        'promotion:fu3': 'Konzeption und Merkmale (FU3)',
+        'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)',
+        'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)',
+        'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)',
+        'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)',
+        'promotion:fu7': 'Inputs und Strategien (FU7)'
+    }
+    tag_to_label = {key.lower(): value for key, value in research_questions.items()}
+    df = build_relevance_distribution(bib_database, tag_to_label)
+    plot_relevance_distribution(
+        df,
+        "Relevanzverteilung nach Forschungsunterfragen",
+        "Forschungsunterfragen",
+        export_fig_visualize_relevance_fu,
+        "visualize_relevance_fu"
+    )
+
+
+def visualize_relevance_vs_categories(bib_database):
+    categories = {
+        'promotion:argumentation': 'Argumentation',
+        'promotion:kerngedanke': 'Kerngedanke',
+        'promotion:weiterführung': 'Weiterführung',
+        'promotion:schlussfolgerung': 'Schlussfolgerung'
+    }
+    tag_to_label = {key.lower(): value for key, value in categories.items()}
+    df = build_relevance_distribution(bib_database, tag_to_label)
+    plot_relevance_distribution(
+        df,
+        "Relevanzverteilung nach Kategorien",
+        "Kategorien",
+        export_fig_visualize_relevance_categories,
+        "visualize_relevance_categories"
+    )
+
+
+def visualize_relevance_vs_search_terms(bib_database):
+    search_terms = {
+        '0': 'digital:learning',
+        '1': 'learning:management:system',
+        '2': 'online:lernplattform',
+        '3': 'online:lernumgebung',
+        '4': 'mooc',
+        '5': 'e-learning',
+        '6': 'bildung:technologie',
+        '7': 'digital:medien',
+        '8': 'blended:learning',
+        '9': 'digital:lernen',
+        'a': 'online:lernen',
+        'b': 'online:learning'
+    }
+    types = [
+        'Zeitschriftenartikel',
+        'Buch',
+        'Buchteil',
+        'Bericht',
+        'Konferenz-Paper',
+        'Studienbrief'
+    ]
+
+    tag_to_label = {}
+    for number, term in search_terms.items():
+        for type_ in types:
+            tag = f'#{number}:{type_}:{term}'.lower()
+            tag_to_label[tag] = f"#{number}:{term}"
+
+    df = build_relevance_distribution(bib_database, tag_to_label)
+    plot_relevance_distribution(
+        df,
+        "Relevanzverteilung nach Suchbegriffen",
+        "Suchbegriffe",
+        export_fig_visualize_relevance_search_terms,
+        "visualize_relevance_search_terms"
+    )

 # Zeitreihenanalyse der Veröffentlichungen
 def visualize_time_series(bib_database):
@ -613,13 +878,16 @@ def visualize_time_series(bib_database):
    if publication_years:
        year_counts = Counter(publication_years)
        df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year')
+        total_publications = df['Count'].sum()
+        df['Percentage'] = df['Count'] / total_publications * 100 if total_publications else 0

        fig = px.line(
            df,
            x='Year',
            y='Count',
            title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})',
-            labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'}
+            labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'},
+            custom_data=['Percentage']
        )
        layout = get_standard_layout(
            title=fig.layout.title.text,
@ -636,8 +904,15 @@ def visualize_time_series(bib_database):
        layout["autosize"] = True
        fig.update_layout(**layout)
        fig.update_traces(line=plot_styles['linie_primaryLine'])
+        fig.update_traces(
+            hovertemplate=(
+                "<b>%{x}</b><br>"
+                "Anzahl: %{y}<br>"
+                "Anteil: %{customdata[0]:.1f}%<extra></extra>"
+            )
+        )
        fig.show(config={"responsive": True})
-        export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series, bib_filename)
+        export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series)
    else:
        print("Keine gültigen Veröffentlichungsjahre gefunden.")

@ -654,8 +929,18 @@ def visualize_top_authors(bib_database):
    top_authors = Counter(author_counts).most_common(top_n)
    if top_authors:
        df = pd.DataFrame(top_authors, columns=['Author', 'Count'])
+        overall_total = sum(author_counts.values())
+        df['Percentage'] = df['Count'] / overall_total * 100 if overall_total else 0

-        fig = px.bar(df, x='Author', y='Count', title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={sum(author_counts.values())}, Stand: {current_date})', labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'}, text_auto=True)
+        fig = px.bar(
+            df,
+            x='Author',
+            y='Count',
+            title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={overall_total}, Stand: {current_date})',
+            labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'},
+            text_auto=True,
+            custom_data=['Percentage']
+        )
        layout = get_standard_layout(
            title=fig.layout.title.text,
            x_title='Autor',
@ -670,55 +955,18 @@ def visualize_top_authors(bib_database):
        layout["autosize"] = True
        fig.update_layout(**layout)
        fig.update_traces(marker=plot_styles['balken_primaryLine'])
+        fig.update_traces(
+            hovertemplate=(
+                "<b>%{x}</b><br>"
+                "Anzahl: %{y}<br>"
+                "Anteil: %{customdata[0]:.1f}%<extra></extra>"
+            )
+        )
        fig.show(config={"responsive": True})
-        export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors, bib_filename)
+        export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors)
    else:
        print("Keine Autoren gefunden.")

- # Top Titel nach Anzahl der Werke
-def normalize_title(title):
-    # Entfernen von Sonderzeichen und Standardisierung auf Kleinbuchstaben
-    title = title.lower().translate(str.maketrans('', '', ",.!?\"'()[]{}:;"))
-    # Zusammenführen ähnlicher Titel, die sich nur in geringfügigen Details unterscheiden
-    title = " ".join(title.split())
-    # Entfernen häufiger Füllwörter oder Standardphrasen, die die Unterscheidung nicht unterstützen
-    common_phrases = ['eine studie', 'untersuchung der', 'analyse von']
-    for phrase in common_phrases:
-        title = title.replace(phrase, '')
-    return title.strip()
-
-def visualize_top_publications(bib_database):
-    top_n = 25  # Anzahl der Top-Publikationen, die angezeigt werden sollen
-    publication_counts = defaultdict(int)
-    
-    for entry in bib_database.entries:
-        if 'title' in entry:
-            title = normalize_title(entry['title'])
-            publication_counts[title] += 1
-
-    top_publications = sorted(publication_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]
-    publication_data = [{'Title': title[:50] + '...' if len(title) > 50 else title, 'Count': count} for title, count in top_publications]
-
-    df = pd.DataFrame(publication_data)
-    
-    fig = px.bar(df, x='Title', y='Count', title=f'Häufig zitierte Publikationen in der Analyse (Top {top_n}, n={sum(publication_counts.values())}, Stand: {current_date})', labels={'Title': 'Titel', 'Count': 'Anzahl der Nennungen'})
-    layout = get_standard_layout(
-        title=fig.layout.title.text,
-        x_title='Titel',
-        y_title='Anzahl der Nennungen'
-    )
-    layout["font"] = {"size": 14, "color": colors['text']}
-    layout["title"] = {"font": {"size": 16}}
-    layout["margin"] = dict(b=160, t=60, l=40, r=40)
-    layout["xaxis"] = layout.get("xaxis", {})
-    layout["xaxis"]["tickangle"] = -45
-    layout["xaxis"]["automargin"] = True
-    layout["autosize"] = True
-    fig.update_layout(**layout)
-    fig.update_traces(marker=plot_styles['balken_primaryLine'])
-    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_top_publications", export_fig_visualize_top_publications, bib_filename)
-
 ##########

 # Daten vorbereiten
@ -760,7 +1008,8 @@ def prepare_path_data(bib_database):
        'Buch',
        'Buchteil',
        'Bericht',
-        'Konferenz-Paper'
+        'Konferenz-Paper',
+        'Studienbrief'
    ]

    data = []
@ -799,12 +1048,14 @@ def create_path_diagram(data):
    sources = []
    targets = []
    values = []
+    node_counts = Counter()
    color_map = {
        'zeitschriftenartikel': colors['primaryLine'],
        'konferenz-paper': colors['secondaryLine'],
        'buch': colors['depthArea'],
        'buchteil': colors['brightArea'],
-        'bericht': colors['accent']
+        'bericht': colors['accent'],
+        'studienbrief': colors['positiveHighlight']
    }

    def add_to_labels(label):
@ -821,8 +1072,19 @@ def create_path_diagram(data):
        sources.extend([fu_idx, category_idx, index_idx])
        targets.extend([category_idx, index_idx, type_idx])
        values.extend([1, 1, 1])
+        node_counts.update([entry['FU'], entry['Category'], entry['Index'], entry['Type']])

    node_colors = [color_map.get(label, colors['primaryLine']) for label in labels]
+    total_paths = len(data)
+    total_flows = sum(values)
+    node_percentages = [
+        node_counts.get(label, 0) / total_paths * 100 if total_paths else 0
+        for label in labels
+    ]
+    link_percentages = [
+        value / total_flows * 100 if total_flows else 0
+        for value in values
+    ]

    fig = go.Figure(data=[go.Sankey(
        node=dict(
@ -830,12 +1092,24 @@ def create_path_diagram(data):
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels,
-            color=node_colors
+            color=node_colors,
+            customdata=node_percentages,
+            hovertemplate=(
+                "%{label}<br>"
+                "Anzahl: %{value}<br>"
+                "Anteil der Pfade: %{customdata:.1f}%<extra></extra>"
+            )
        ),
        link=dict(
            source=sources,
            target=targets,
-            value=values
+            value=values,
+            customdata=link_percentages,
+            hovertemplate=(
+                "%{source.label} → %{target.label}<br>"
+                "Anzahl: %{value}<br>"
+                "Anteil der Verbindungen: %{customdata:.1f}%<extra></extra>"
+            )
        )
    )])
    layout = get_standard_layout(
@ -849,7 +1123,7 @@ def create_path_diagram(data):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram, bib_filename)
+    export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram)

 #############

@ -953,22 +1227,54 @@ def create_sankey_diagram(bib_database):
        colors['positiveHighlight']     # Ausgewählte Quellen
    ]

+    node_values = [
+        initial_sources,
+        screened_sources,
+        quality_sources,
+        relevance_sources,
+        thematic_sources,
+        recent_sources,
+        classic_sources,
+        selected_sources
+    ]
+    node_percentages = [
+        value / initial_sources * 100 if initial_sources else 0
+        for value in node_values
+    ]
+    link_percentages = [
+        value / initial_sources * 100 if initial_sources else 0
+        for value in values
+    ]
+
    # Sankey-Diagramm erstellen
    node_config = {
        **plot_styles["sankey_node"],
        "label": node_labels,
-        "color": node_colors
+        "color": node_colors,
+        "customdata": node_percentages,
+        "hovertemplate": (
+            "%{label}<br>"
+            "Anzahl: %{value}<br>"
+            "Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
+        )
    }
    # Remove any invalid 'font' key if present
    node_config.pop("font", None)
+    link_config = {
+        **plot_styles["sankey_link"],
+        "source": sources,
+        "target": targets,
+        "value": values,
+        "customdata": link_percentages,
+        "hovertemplate": (
+            "%{source.label} → %{target.label}<br>"
+            "Anzahl: %{value}<br>"
+            "Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
+        )
+    }
    fig = go.Figure(go.Sankey(
        node=node_config,
-        link=dict(
-            **plot_styles["sankey_link"],
-            source=sources,
-            target=targets,
-            value=values
-        )
+        link=link_config
    ))
    # Layout anpassen
    layout = get_standard_layout(
@ -982,7 +1288,7 @@ def create_sankey_diagram(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram, bib_filename)
+    export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram)

 ##########

@ -1000,31 +1306,33 @@ def visualize_sources_status(bib_database):
    """
    Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner.
    """
+    search_terms = {
+        '0': 'digital:learning',
+        '1': 'learning:management:system',
+        '2': 'online:lernplattform',
+        '3': 'online:lernumgebung',
+        '4': 'mooc',
+        '5': 'e-learning',
+        '6': 'bildung:technologie',
+        '7': 'digital:medien',
+        '8': 'blended:learning',
+        '9': 'digital:lernen',
+        'a': 'online:lernen',
+        'b': 'online:learning'
+    }
+    numbers_order = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b']
+    type_order = [
+        'Zeitschriftenartikel',
+        'Buch',
+        'Buchteil',
+        'Bericht',
+        'Konferenz-Paper',
+        'Studienbrief'
+    ]
    search_folder_tags = [
-        "#1:zeitschriftenartikel:learning:management:system",
-        "#2:zeitschriftenartikel:online:lernplattform",
-        "#3:zeitschriftenartikel:online:lernumgebung",
-        "#4:zeitschriftenartikel:mooc",
-        "#5:zeitschriftenartikel:e-learning",
-        "#6:zeitschriftenartikel:bildung:technologie",
-        "#7:zeitschriftenartikel:digital:medien",
-        "#8:zeitschriftenartikel:blended:learning",
-        "#9:zeitschriftenartikel:digital:lernen",
-        "#a:zeitschriftenartikel:online:lernen",
-        "#b:zeitschriftenartikel:online:learning",
-        "#0:zeitschriftenartikel:digital:learning",
-        "#1:konferenz-paper:learning:management:system",
-        "#2:konferenz-paper:online:lernplattform",
-        "#3:konferenz-paper:online:lernumgebung",
-        "#4:konferenz-paper:mooc",
-        "#5:konferenz-paper:e-learning",
-        "#6:konferenz-paper:bildung:technologie",
-        "#7:konferenz-paper:digital:medien",
-        "#8:konferenz-paper:blended:learning",
-        "#9:konferenz-paper:digital:lernen",
-        "#a:konferenz-paper:online:lernen",
-        "#b:konferenz-paper:online:learning",
-        "#0:konferenz-paper:digital:learning"
+        f"#{number}:{type_}:{search_terms[number]}".lower()
+        for type_ in type_order
+        for number in numbers_order
    ]

    category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"}
@ -1079,21 +1387,45 @@ def visualize_sources_status(bib_database):
        tablefmt='grid'
    ))

+    total_identifiziert = sum(counts["Identifiziert"] for counts in source_data.values())
+    analysiert_percentages = [
+        value / total_identifiziert * 100 if total_identifiziert else 0
+        for value in analysiert_values
+    ]
+    nicht_analysiert_percentages = [
+        value / total_identifiziert * 100 if total_identifiziert else 0
+        for value in nicht_analysiert_values
+    ]
+
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=tags,
        y=analysiert_values,
        name='Analysiert',
-        marker=dict(color=analysiert_colors)
+        marker=dict(color=analysiert_colors),
+        customdata=analysiert_percentages,
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Status: Analysiert<br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata:.1f}%<extra></extra>"
+        )
    ))
    fig.add_trace(go.Bar(
        x=tags,
        y=nicht_analysiert_values,
        name='Nicht-Analysiert',
-        marker=plot_styles['balken_primaryLine']
+        marker=plot_styles['balken_primaryLine'],
+        customdata=nicht_analysiert_percentages,
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Status: Nicht-Analysiert<br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata:.1f}%<extra></extra>"
+        )
    ))
    layout = get_standard_layout(
-        title=f'Analyse- und Stichprobenstatus je Suchordner (n={sum(counts["Identifiziert"] for counts in source_data.values())}, Stand: {current_date})',
+        title=f'Analyse- und Stichprobenstatus je Suchordner (n={total_identifiziert}, Stand: {current_date})',
        x_title='Suchbegriffsordner',
        y_title='Anzahl der Quellen'
    )
@ -1110,7 +1442,7 @@ def visualize_sources_status(bib_database):
    layout["autosize"] = True
    fig.update_layout(**layout)
    fig.show(config={"responsive": True})
-    export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status, bib_filename)
+    export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status)

 #############

@ -1196,8 +1528,8 @@ def visualize_languages(bib_database):
        color='Gruppe',
        color_discrete_map=color_discrete_map,
        title=f'Sprachverteilung der analysierten Quellen (n={sum(norm_counts.values())}, Stand: {current_date})',
-        hover_data=["Sprache", "Gruppe", "Anzahl", "Anteil (%)"],
-        barmode="stack"
+        barmode="stack",
+        custom_data=['Gruppe', 'Anteil (%)']
    )

    layout = get_standard_layout(
@ -1212,10 +1544,18 @@ def visualize_languages(bib_database):
    # Ergänzung: Y-Achse logarithmisch skalieren
    layout["yaxis_type"] = "log"
    fig.update_layout(**layout)
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Sprachgruppe: %{customdata[0]}<br>"
+            "Anzahl: %{y}<br>"
+            "Anteil: %{customdata[1]:.2f}%<extra></extra>"
+        )
+    )
    fig.show(config={"responsive": True})
    # Tabelle ausgeben
    print(tabulate(df.sort_values("Anzahl", ascending=False), headers="keys", tablefmt="grid", showindex=False))
-    export_figure_local(fig, "visualize_languages", export_fig_visualize_languages, bib_filename)
+    export_figure_local(fig, "visualize_languages", export_fig_visualize_languages)

 # Visualisierung der Verteilung von ENTRYTYPE innerhalb jeder Sprache
 def visualize_language_entrytypes(bib_database):
@ -1265,6 +1605,8 @@ def visualize_language_entrytypes(bib_database):
    grouped.rename(columns={'ENTRYTYPE': 'Eintragstyp'}, inplace=True)
    # Anteil innerhalb Sprache (%)
    grouped["Anteil innerhalb Sprache (%)"] = grouped.groupby("Sprache")["Anzahl"].transform(lambda x: (x / x.sum() * 100).round(2))
+    total_entrytypes = grouped['Anzahl'].sum()
+    grouped["Anteil Gesamt (%)"] = grouped['Anzahl'] / total_entrytypes * 100 if total_entrytypes else 0

    # Mapping Eintragstyp zu Typgruppe
    eintragstyp_gruppen = {
@ -1301,7 +1643,8 @@ def visualize_language_entrytypes(bib_database):
        barmode="group",
        title=f'Verteilung der Eintragstypen pro Sprache (n={len(df)}, Stand: {current_date})',
        text='Anzahl',
-        labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'}
+        labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'},
+        custom_data=['Eintragstyp', 'Typgruppe', 'Anteil Gesamt (%)', 'Anteil innerhalb Sprache (%)']
    )
    layout = get_standard_layout(
        title=fig.layout.title.text,
@ -1315,9 +1658,19 @@ def visualize_language_entrytypes(bib_database):
    # Ergänzung: Y-Achse logarithmisch skalieren
    layout["yaxis_type"] = "log"
    fig.update_layout(**layout)
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{x}</b><br>"
+            "Eintragstyp: %{customdata[0]}<br>"
+            "Typgruppe: %{customdata[1]}<br>"
+            "Anzahl: %{y}<br>"
+            "Anteil gesamt: %{customdata[2]:.2f}%<br>"
+            "Anteil innerhalb Sprache: %{customdata[3]:.2f}%<extra></extra>"
+        )
+    )
    fig.show(config={"responsive": True})
    print(tabulate(grouped.sort_values(["Sprache", "Eintragstyp"]), headers=["Sprache", "Eintragstyp", "Anzahl", "Anteil innerhalb Sprache (%)", "Typgruppe"], tablefmt="grid", showindex=False))
-    export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages, bib_filename)
+    export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages)

 #############

@ -1360,9 +1713,11 @@ visualize_tags(bib_database)
 visualize_index(bib_database)
 visualize_research_questions(bib_database)
 visualize_categories(bib_database)
+visualize_relevance_vs_research_questions(bib_database)
+visualize_relevance_vs_categories(bib_database)
+visualize_relevance_vs_search_terms(bib_database)
 visualize_time_series(bib_database)
 visualize_top_authors(bib_database)
-visualize_top_publications(bib_database)
 data = prepare_path_data(bib_database)
 create_path_diagram(data)
 create_sankey_diagram(bib_database)
--- a/cleaned_Literaturverzeichnis.bib
+++ b/cleaned_Literaturverzeichnis.bib
--- a/deskriptive-literaturauswahl.py
+++ b/deskriptive-literaturauswahl.py
@ -218,14 +218,13 @@ layout["legend"] = dict(
    itemdoubleclick="toggle"
 )
 layout["yaxis3"] = dict(
-    title="Abweichung (ΔSCₙ)",
+    title=dict(text="Abweichung (ΔSCₙ)", font=dict(color=colors["text"])),
    overlaying="y",
    side="right",
    showgrid=False,
    zeroline=True,
    zerolinewidth=2,
    zerolinecolor='grey',
-    titlefont=dict(color=colors["text"]),
    tickfont=dict(color=colors["text"]),
    anchor="free",
    position=1.0
Author	SHA1	Message	Date
Jochen Hanisch-Johannsen	1abaf478cd	Promotion: Suchergebnisse optimieren	2025-10-19 23:44:33 +02:00
Jochen Hanisch-Johannsen	02ec176582	Promotion: Bibliothek aktualisiert und in Netzwerkanalyse prozentualen Anteil hinzugefügt	2025-10-19 16:37:27 +02:00
Jochen Hanisch-Johannsen	127fc78b81	Promotion: Plotly 6 Kompatibilität, verbesserte Korrelation-Hovers und CI‑Layout-Fixes Motivation Plotly 6 hat veraltete Achsenattribute (titlefont) entfernt; dadurch brachen 2D/3D‑Layouts und Sekundärachsen. Korrelationen sollten im Hover klarer und einzeln (closest) erklärbar sein. PNG‑Export bleibt funktional mit Kaleido 1.x; Code sollte robust bleiben. Änderungen (nach Datei) Jochen-Hanisch/CI/ci_template/plotly_template.py Achsentitel auf Plotly‑6‑Konvention umgestellt: 2D: xaxis.title/yaxis.title jetzt als dict(text, font) statt titlefont. 3D: scene.xaxis\|yaxis\|zaxis.title auf dict(text, font) umgestellt. Sekundärachse (yaxis2): Titelobjekt sauber erzeugt, Defaults für tickfont, Linienfarben etc. gesetzt. Keine API-Änderung der Helper‑Signaturen, nur interne Struktur aktualisiert. Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/analyse_korrelation.py Hovers angereichert: n_observations (Stichprobe), cooccurrence_count (gemeinsame Treffer), prägnante „Interpretation“-Zeile. Hovertemplate über customdata neu strukturiert (zeigt X ↔ Y, r, p, Signifikanz, n, gemeinsame Treffer, Interpretation). hovermode='closest' gesetzt, damit pro Punkt gezielt der Hover angezeigt wird (kein „x unified“). Farbskala dynamisch, 0 wird weiß verankert. Auf Wunsch: Artikelliste im Hover entfernt; Punktgröße wieder direkt size='abs_correlation'. Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/deskriptive-literaturauswahl.py yaxis3 auf Plotly‑6‑Notation umgestellt: title=dict(text=..., font=...); übrige Layoutwerte unverändert. Research/Eigene Forschungsprojekte/Kompetenzentwicklung/simulation-bildungswirkgefuege.py Alle 3D‑Achsen (Scatter3D, Trajektorie, Morphologie) auf title=dict(text, font) migriert. Sekundärachsentitel (yaxis2) auf title=dict(...) umgestellt. Keine inhaltlichen Änderungen an Daten/Logik; reines Layout‑Refactoring für Plotly‑6‑Kompatibilität. Auswirkungen Keine Breaking Changes in den Helper‑APIs, aber visuelle Kleinigkeiten (Achsentitel/Spacing) können sich leicht ändern. Sekundärachsen werden konsistent im CI‑Farbschema gerendert. Korrelationen: Alle Punkte sind über Hover eindeutig erfassbar; keine Zusammenfassung pro X‑Spalte mehr. Validierung Korrelationen: Hovers zeigen r/p/Signifikanz/n/Gemeinsame Treffer korrekt; CSV‑Export bleibt unverändert. Suche nach Alt‑Attributen: Im betroffenen Ordner keine titlefont‑Vorkommen mehr. Simulation: Läuft durch; PNG‑Export lokal mit Plotly 6.3.1 + Kaleido 1.1.0 + installiertem Chrome erfolgreich. Deployment‑Hinweise (Umgebung) Für PNG‑Export: Plotly ≥6,<7 und Kaleido ≥1.1.0 sowie einmalig plotly_get_chrome -y. Keine Code‑Änderungen nötig für HTML‑Export. Follow‑ups Falls außerhalb der angepassten Ordner noch Skripte titlefont nutzen, bitte melden; Migration ist analog trivial.	2025-10-11 20:26:08 +02:00
Jochen Hanisch-Johannsen	835f58de3f	Promotion: Verbesserung Hover in der Korrelationsanalyse	2025-10-11 18:21:17 +02:00
Jochen Hanisch-Johannsen	4e7e479304	Promotion: Aktualiserung Suchergebnisse	2025-10-11 17:57:54 +02:00
Jochen Hanisch-Johannsen	7913d842f6	Netzwerkanalyse: Visualisierung der Relevanz	2025-10-06 00:46:52 +02:00
Jochen Hanisch-Johannsen	6f6475b7d0	Promotion: Anpassungen an den Visualisierungen; Bibliothek aktualisiert.	2025-10-06 00:13:19 +02:00
Jochen Hanisch-Johannsen	1e9b84bae2	Promotion: 02-01 Suchergebnisse aktualisiert	2025-10-05 22:04:55 +02:00
Jochen Hanisch-Johannsen	a2e7055e14	Netzwerkanalyse: Änderungen Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/analyse_netzwerk.py:678-699 bereinigt – Hilfsfunktionen und visualize_top_publications komplett entfernt, damit die wenig aussagekräftige Grafik entfällt. Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/analyse_netzwerk.py:1314-1320 Funktionsaufruf gestrichen, sodass das Skript nach den verbleibenden Visualisierungen endet. Tests Nicht ausgeführt (nur Code entfernt). --- Bibliothek: Aktualsiert	2025-10-05 00:50:11 +02:00
Jochen Hanisch-Johannsen	3a69f9d3ed	Promotion: Geo-Literatur-bib und Suchergebnisse aktualisiert	2025-10-04 23:52:22 +02:00
Jochen Hanisch-Johannsen	f7b41790b8	Netzwerkanalyse: Anpassungen export_figure_local nutzt nun konsistent die globale Bib-Datei und erwartet keinen ungenutzten Parameter mehr; sämtliche Aufrufe wurden daraufhin bereinigt (analyse_netzwerk.py:66, analyse_netzwerk.py:345, analyse_netzwerk.py:452, analyse_netzwerk.py:517, analyse_netzwerk.py:586, analyse_netzwerk.py:659, analyse_netzwerk.py:724). Die Tag-Kombinationen für die Häufigkeitsanalyse werden direkt kleingeschrieben erzeugt, sodass sie zuverlässig mit den bereits normalisierten Keyword-Strings matchen und die Zählung nun greift (analyse_netzwerk.py:382-402).	2025-10-04 16:00:07 +02:00
Jochen Hanisch-Johannsen	32639222c3	Korrelation: CI-Verbesserung und Konsistenzprüfung	2025-10-04 15:46:24 +02:00
Jochen Hanisch-Johannsen	f30480b297	Korrelation: Farbanpassung und Konsistenzprüfung	2025-10-04 15:45:05 +02:00