From c60d7789e31ce3e833a584075a233dc96a432d12 Mon Sep 17 00:00:00 2001
From: Jochen Hanisch-Johannsen <kontakt@jochen-hanisch.de>
Date: Sun, 11 May 2025 14:55:15 +0200
Subject: [PATCH] Testpush

---
 Automatisierte Literaturanalyse.py            |    0
 Geovisualization                              |   80 +
 Geovisualization copy.py                      |   78 +
 ...ranalyse db | Netzwerk- und Pfadanalyse.py | 1482 +++++++++++++++++
 4 files changed, 1640 insertions(+)
 create mode 100644 Automatisierte Literaturanalyse.py
 create mode 100644 Geovisualization
 create mode 100644 Geovisualization copy.py
 create mode 100644 Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py

diff --git a/Automatisierte Literaturanalyse.py b/Automatisierte Literaturanalyse.py
new file mode 100644
index 0000000..e69de29
diff --git a/Geovisualization b/Geovisualization
new file mode 100644
index 0000000..eef7ef8
--- /dev/null
+++ b/Geovisualization
@@ -0,0 +1,80 @@
+import os
+import pandas as pd
+import bibtexparser
+import folium
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+
+# Clear the terminal
+os.system('cls' if os.name == 'nt' else 'clear')
+
+# Pfade zu den Dateien
+geonames_file_path = 'allCountries.txt'
+bib_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/Literaturverzeichnis.bib'
+cleaned_bib_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/cleaned_Literaturverzeichnis.bib'
+
+# Spaltennamen laut Geonames README
+columns = [
+    'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude', 'longitude',
+    'feature class', 'feature code', 'country code', 'cc2', 'admin1 code',
+    'admin2 code', 'admin3 code', 'admin4 code', 'population', 'elevation', 'dem',
+    'timezone', 'modification date'
+]
+
+# GeoNames-Daten laden
+print("Lade GeoNames-Daten...")
+geo_df = pd.read_csv(geonames_file_path, sep='\t', header=None, names=columns, usecols=['name', 'asciiname', 'latitude', 'longitude'])
+geo_df.dropna(subset=['latitude', 'longitude'], inplace=True)
+print("GeoNames-Daten geladen.")
+
+# BibTeX-Datei laden
+print("Lade BibTeX-Datei...")
+with open(bib_file_path, encoding='utf-8') as bibtex_file:
+    bib_database = bibtexparser.load(bibtex_file)
+print("BibTeX-Datei geladen.")
+
+# Ortsnamen extrahieren und bereinigen
+print("Extrahiere und bereinige Ortsnamen...")
+locations = set()
+for entry in bib_database.entries:
+    if 'address' in entry:
+        for loc in entry['address'].split(';'):
+            locations.update(loc.split(','))
+
+cleaned_locations = {loc.strip() for loc in locations}
+print("Ortsnamen extrahiert und bereinigt.")
+
+# Geo-Koordinaten zuordnen
+def find_coordinates(location):
+    match = geo_df[(geo_df['name'].str.lower() == location.lower()) | (geo_df['asciiname'].str.lower() == location.lower())]
+    if not match.empty:
+        return match.iloc[0]['latitude'], match.iloc[0]['longitude']
+    return None, None
+
+print("Suche Geo-Koordinaten...")
+location_coords = {}
+
+for location in tqdm(cleaned_locations, desc="Bearbeitung der Ortsnamen"):
+    latitude, longitude = find_coordinates(location)
+    if latitude is not None and longitude is not None:
+        location_coords[location] = (latitude, longitude)
+
+print("Geo-Koordinaten gefunden.")
+
+# Erstelle die Karte
+print("Erstelle Karte...")
+map_center = [geo_df['latitude'].mean(), geo_df['longitude'].mean()]
+map = folium.Map(location=map_center, zoom_start=2)
+
+for location, coords in location_coords.items():
+    folium.Marker(location=coords, popup=location).add_to(map)
+
+map_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/literature_map.html'
+map.save(map_file_path)
+print(f"Karte gespeichert unter {map_file_path}")
+
+# Bereinigen und Speichern der BibTeX-Datei
+print("Speichere bereinigte BibTeX-Datei...")
+with open(cleaned_bib_file_path, 'w', encoding='utf-8') as bibtex_file:
+    bibtexparser.dump(bib_database, bibtex_file)
+print(f"Bereinigte BibTeX-Datei gespeichert unter {cleaned_bib_file_path}")
diff --git a/Geovisualization copy.py b/Geovisualization copy.py
new file mode 100644
index 0000000..6f272dd
--- /dev/null
+++ b/Geovisualization copy.py	
@@ -0,0 +1,78 @@
+import os
+import bibtexparser
+import pandas as pd
+from tqdm import tqdm
+import folium
+from folium.plugins import Fullscreen
+
+# Terminal bereinigen
+os.system('cls' if os.name == 'nt' else 'clear')
+
+# Dateipfade
+geonames_file = 'allCountries.txt'
+bib_file = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/Literaturverzeichnis.bib'
+
+# Laden der GeoNames-Daten
+print("Laden der GeoNames-Daten...")
+geonames_columns = [
+    'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude',
+    'longitude', 'feature class', 'feature code', 'country code', 'cc2',
+    'admin1 code', 'admin2 code', 'admin3 code', 'admin4 code', 'population',
+    'elevation', 'dem', 'timezone', 'modification date'
+]
+
+chunksize = 10**6
+geonames_data = pd.DataFrame()
+for chunk in tqdm(pd.read_csv(geonames_file, sep='\t', header=None, names=geonames_columns, chunksize=chunksize, dtype=str, encoding='utf-8')):
+    geonames_data = pd.concat([geonames_data, chunk], ignore_index=True)
+
+# Laden der BibTeX-Daten
+print("Laden der BibTeX-Daten...")
+with open(bib_file, encoding='utf-8') as bibtex_file:
+    bib_database = bibtexparser.load(bibtex_file)
+
+# Ortsnamen extrahieren und bereinigen
+print("Extrahieren und Bereinigen der Ortsnamen...")
+locations = set()
+for entry in bib_database.entries:
+    if 'address' in entry:
+        locations.update(entry['address'].replace(';', ',').replace('&', 'and').split(','))
+
+locations = {loc.strip() for loc in locations}
+print(f"Bereinigte Ortsnamen: {locations}")
+
+# Geo-Koordinaten zuordnen
+print("Zuordnen der Geo-Koordinaten...")
+geo_data = []
+for location in tqdm(locations):
+    matching_rows = geonames_data[geonames_data['name'].str.contains(location, case=False, na=False)]
+    if not matching_rows.empty:
+        best_match = matching_rows.iloc[0]
+        geo_data.append({
+            'name': location,
+            'latitude': best_match['latitude'],
+            'longitude': best_match['longitude']
+        })
+
+if not geo_data:
+    print("Keine gültigen Koordinaten gefunden.")
+else:
+    df = pd.DataFrame(geo_data)
+
+    # Karte erstellen
+    print("Erstellen der Karte...")
+    m = folium.Map(location=[0, 0], zoom_start=2)
+
+    for _, row in df.iterrows():
+        folium.Marker(
+            location=[row['latitude'], row['longitude']],
+            popup=row['name']
+        ).add_to(m)
+
+    # Vollbildmodus und LayerControl hinzufügen
+    Fullscreen(position='topright').add_to(m)
+    folium.LayerControl().add_to(m)
+
+    # Karte speichern
+    m.save('literature_map_with_zoom.html')
+    print("Karte wurde gespeichert als 'literature_map_with_zoom.html'.")
diff --git a/Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py b/Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py
new file mode 100644
index 0000000..fffd2e0
--- /dev/null
+++ b/Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py	
@@ -0,0 +1,1482 @@
+"""
+CAVE!!!!!
+
+Datei muss aus Zotero mit BibTeX exportiert werden!
+"""
+
+import os
+
+# Clear the terminal
+os.system('cls' if os.name == 'nt' else 'clear')
+
+import bibtexparser
+import pandas as pd
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+from datetime import datetime
+from collections import defaultdict, Counter
+from itertools import product
+from wordcloud import WordCloud
+from tabulate import tabulate
+import plotly.express as px
+import plotly.graph_objects as go
+import random
+import math
+import re
+import subprocess
+
+# Export-Flags für Visualisierungen
+export_fig_visualize_network = False
+export_fig_visualize_tags = False
+export_fig_visualize_index = False
+export_fig_visualize_research_questions = False
+export_fig_visualize_categories = False
+export_fig_visualize_time_series = False
+export_fig_visualize_top_authors = False
+export_fig_visualize_top_publications = False
+export_fig_create_path_diagram = False
+export_fig_create_sankey_diagram = False
+export_fig_visualize_sources_status = False
+export_fig_create_wordcloud_from_titles = False
+export_fig_visualize_search_term_distribution = False
+
+# Optional: slugify-Funktion
+def slugify(value):
+    return re.sub(r'[^a-zA-Z0-9_-]', '', value.replace(' ', '_').lower())
+
+# Exportfunktionen für jede Visualisierung
+def export_visualize_network(fig):
+    if export_fig_visualize_network:
+        safe_filename = slugify("visualize_network")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_tags(fig):
+    if export_fig_visualize_tags:
+        safe_filename = slugify("visualize_tags")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_index(fig):
+    if export_fig_visualize_index:
+        safe_filename = slugify("visualize_index")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_research_questions(fig):
+    if export_fig_visualize_research_questions:
+        safe_filename = slugify("visualize_research_questions")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_categories(fig):
+    if export_fig_visualize_categories:
+        safe_filename = slugify("visualize_categories")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_time_series(fig):
+    if export_fig_visualize_time_series:
+        safe_filename = slugify("visualize_time_series")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_top_authors(fig):
+    if export_fig_visualize_top_authors:
+        safe_filename = slugify("visualize_top_authors")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_top_publications(fig):
+    if export_fig_visualize_top_publications:
+        safe_filename = slugify("visualize_top_publications")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_create_path_diagram(fig):
+    if export_fig_create_path_diagram:
+        safe_filename = slugify("create_path_diagram")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_create_sankey_diagram(fig):
+    if export_fig_create_sankey_diagram:
+        safe_filename = slugify("create_sankey_diagram")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+def export_visualize_sources_status(fig):
+    if export_fig_visualize_sources_status:
+        safe_filename = slugify("visualize_sources_status")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+# Farben definieren
+colors = {
+    "background": "#003366",            # Hintergrundfarbe
+    "text": "#333333",                  # Textfarbe
+    "accent": "#663300",                # Akzentfarbe
+    "primaryLine": "#660066",           # Bildungswirkfaktor
+    "secondaryLine": "#cc6600",         # Bildungswirkindikator
+    "depthArea": "#006666",             # Kompetenzmessunsicherheit
+    "brightArea": "#66CCCC",            # Kompetenzentwicklungsunsicherheit
+    "positiveHighlight": "#336600",     # Positive Hervorhebung
+    "negativeHighlight": "#990000",     # Negative Hervorhebung
+    "white": "#ffffff"                  # Weiß
+}
+
+# Liste der Farben, die für die Wörter verwendet werden sollen
+word_colors = [
+    colors["white"],
+    colors["brightArea"],
+    colors["positiveHighlight"],
+    colors["negativeHighlight"]
+]
+
+# Aktuelles Datum
+current_date = datetime.now().strftime("%Y-%m-%d")
+
+
+# Lade Zotero-SQLite-Datenbank und erzeuge bib_database.entries-ähnliche Struktur
+import sqlite3
+
+def load_zotero_entries(sqlite_path):
+    conn = sqlite3.connect(sqlite_path)
+    cursor = conn.cursor()
+
+    query = """
+    SELECT
+        items.itemID,
+        COALESCE(value_title.value, '') AS title,
+        COALESCE(value_year.value, '') AS year,
+        COALESCE(creators.lastName || ', ' || creators.firstName, '') AS author,
+        GROUP_CONCAT(DISTINCT tags.name) AS keywords,
+        itemTypes.typeName AS type
+    FROM items
+
+    -- Titel
+    LEFT JOIN itemData AS title_data ON items.itemID = title_data.itemID
+    LEFT JOIN fields AS title_field ON title_data.fieldID = title_field.fieldID AND title_field.fieldName = 'title'
+    LEFT JOIN itemDataValues AS value_title ON title_data.valueID = value_title.valueID
+
+    -- Jahr
+    LEFT JOIN itemData AS year_data ON items.itemID = year_data.itemID
+    LEFT JOIN fields AS year_field ON year_data.fieldID = year_field.fieldID AND year_field.fieldName = 'date'
+    LEFT JOIN itemDataValues AS value_year ON year_data.valueID = value_year.valueID
+
+    -- Autoren
+    LEFT JOIN itemCreators ON items.itemID = itemCreators.itemID
+    LEFT JOIN creators ON itemCreators.creatorID = creators.creatorID
+
+    -- Tags
+    LEFT JOIN itemTags ON items.itemID = itemTags.itemID
+    LEFT JOIN tags ON itemTags.tagID = tags.tagID
+
+    -- Typ
+    LEFT JOIN itemTypes ON items.itemTypeID = itemTypes.itemTypeID
+
+    -- Sammlungen
+    LEFT JOIN collectionItems ON items.itemID = collectionItems.itemID
+    LEFT JOIN collections ON collectionItems.collectionID = collections.collectionID
+
+    WHERE collections.collectionName IN (
+        'S:01 Learning Management System',
+        'S:02 Online-Lernplattform',
+        'S:03 Online-Lernumgebung',
+        'S:05 eLearning',
+        'S:04 MOOC',
+        'S:06 Bildungstechnologie',
+        'S:07 Digitale Medien',
+        'S:08 Blended Learning',
+        'S:09 Digitales Lernen',
+        'S:12 Digital Learning',
+        'S:10 Online Lernen',
+        'S:11 Online Learning',
+        'S:13 Berichte',
+        'S:14 Agiles Lernen',
+        'S:15 Learning Analytics',
+        'S:16 Dissertationen',
+        'S:17 ePortfolio'
+    )
+    GROUP BY items.itemID
+    """
+
+    cursor.execute(query)
+    rows = cursor.fetchall()
+
+    # Umwandlung in bib_database.entries-kompatibles Format
+    entries = []
+    for row in rows:
+        item = {
+            'ID': str(row[0]),
+            'title': row[1],
+            'year': row[2],
+            'author': row[3],
+            'keywords': row[4] if row[4] else '',
+            'ENTRYTYPE': row[5]
+        }
+        entries.append(item)
+
+    conn.close()
+    return entries
+
+bib_database = type("BibDatabase", (object,), {})()
+bib_database.entries = load_zotero_entries('/Users/jochen_hanisch-johannsen/Zotero/zotero.sqlite')
+
+# Stopplisten laden
+with open('de_complete.txt', 'r', encoding='utf-8') as file:
+    stop_words_de = set(file.read().split())
+
+with open('en_complete.txt', 'r', encoding='utf-8') as file:
+    stop_words_en = set(file.read().split())
+
+# Kombinierte Stoppliste
+stop_words = stop_words_de.union(stop_words_en)
+
+# Funktion zur Berechnung der Stichprobengröße
+def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05):
+    n_0 = (Z**2 * p * (1 - p)) / (e**2)
+    n = n_0 / (1 + ((n_0 - 1) / N))
+    return math.ceil(n)
+
+# Visualisierung 1: Netzwerkanalyse
+# Visualisierung 1: Netzwerkanalyse
+def visualize_network(bib_database):
+    search_terms = {
+        '0': 'digital:learning',
+        '1': 'learning:management:system',
+        '2': 'online:Lernplattform',
+        '3': 'online:Lernumgebung',
+        '4': 'MOOC',
+        '5': 'e-learning',
+        '6': 'Bildung:Technologie',
+        '7': 'digital:Medien',
+        '8': 'blended:learning',
+        '9': 'digital:lernen',
+        'a': 'online:lernen',
+        'b': 'online:learning'
+    }
+
+    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b']
+    types = [
+        'Zeitschriftenartikel',
+        'Buch',
+        'Buchteil',
+        'Bericht',
+        'Konferenz-Paper'
+    ]
+    tags_to_search = set()
+    for number, type_ in product(numbers, types):
+        search_term = search_terms[number]
+        tag = f'#{number}:{type_}:{search_term}'
+        tags_to_search.add(tag.lower())
+
+    tag_counts = defaultdict(int)
+    for entry in bib_database.entries:
+        if 'keywords' in entry:
+            entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(','))))
+            for keyword in entry_keywords:
+                for tag in tags_to_search:
+                    if tag in keyword:
+                        tag_counts[tag] += 1
+
+    fundzahlen = defaultdict(int)
+    for tag, count in tag_counts.items():
+        search_term = tag.split(':')[-1]
+        for key, value in search_terms.items():
+            if search_term == value:
+                fundzahlen[value] += count
+
+    search_terms_network = {
+        "Primäre Begriffe": {
+            "learning:management:system": [
+                "e-learning",
+                "bildung:technologie",
+                "online:lernplattform",
+                "online:lernumgebung",
+                "digital:learning",
+                "digitales:lernen"
+            ]
+        },
+        "Sekundäre Begriffe": {
+            "e-learning": [
+                "mooc",
+                "online:lernplattform"
+            ],
+            "bildung:technologie": [
+                "digital:learning",
+                "digitales:lernen",
+                "blended:learning"
+            ],
+            "digital:learning": [
+                "digitale:medien",
+                "online:learning"
+            ],
+            "digitales:lernen": [
+                "digitale:medien",
+                "online:lernen"
+            ],
+            "blended:learning": ["mooc"]
+        },
+        "Tertiäre Begriffe": {
+            "online:learning": [],
+            "online:lernen": []
+        }
+    }
+
+    G = nx.Graph()
+
+    hierarchy_colors = {
+        "Primäre Begriffe": colors['primaryLine'],
+        "Sekundäre Begriffe": colors['secondaryLine'],
+        "Tertiäre Begriffe": colors['brightArea']
+    }
+
+    def add_terms_to_graph(level, terms):
+        for primary_term, related_terms in terms.items():
+            if primary_term not in G:
+                G.add_node(primary_term, color=hierarchy_colors[level], size=fundzahlen.get(primary_term, 10))
+            else:
+                if level == "Tertiäre Begriffe":
+                    G.nodes[primary_term]['color'] = hierarchy_colors[level]
+            for related_term in related_terms:
+                if related_term not in G:
+                    G.add_node(related_term, color=hierarchy_colors[level], size=fundzahlen.get(related_term, 10))
+                else:
+                    if level == "Tertiäre Begriffe":
+                        G.nodes[related_term]['color'] = hierarchy_colors[level]
+                G.add_edge(primary_term, related_term)
+
+    for level, terms in search_terms_network.items():
+        add_terms_to_graph(level, terms)
+
+    np.random.seed(42)
+    pos = nx.spring_layout(G)
+
+    x_scale_min, x_scale_max = 0, 10
+    y_scale_min, y_scale_max = 0, 10
+
+    min_x = min(pos[node][0] for node in pos)
+    max_x = max(pos[node][0] for node in pos)
+    min_y = min(pos[node][1] for node in pos)
+    max_y = max(pos[node][1] for node in pos)
+
+    scale_x_range = x_scale_max - x_scale_min
+    scale_y_range = y_scale_max - y_scale_min
+
+    for node in pos:
+        x, y = pos[node]
+        norm_x = scale_x_range * (x - min_x) / (max_x - min_x) + x_scale_min
+        norm_y = scale_y_range * (y - min_y) / (max_y - min_y) + y_scale_min
+        pos[node] = (norm_x, norm_y)
+
+    for node in pos:
+        x, y = pos[node]
+        x = max(min(x, x_scale_max), x_scale_min)
+        y = max(min(y, y_scale_max), y_scale_min)
+        pos[node] = (x, y)
+
+    edge_x = []
+    edge_y = []
+    for edge in G.edges():
+        x0, y0 = pos[edge[0]]
+        x1, y1 = pos[edge[1]]
+        edge_x.append(x0)
+        edge_x.append(x1)
+        edge_x.append(None)
+        edge_y.append(y0)
+        edge_y.append(y1)
+        edge_y.append(None)
+
+    edge_trace = go.Scatter(
+        x=edge_x, y=edge_y,
+        line=dict(width=0.5, color=colors['white']),
+        hoverinfo='none',
+        mode='lines')
+
+    # Knoten in drei Traces aufteilen: Primär, Sekundär, Tertiär
+    primary_nodes = []
+    secondary_nodes = []
+    tertiary_nodes = []
+
+    for node in G.nodes():
+        color = G.nodes[node]['color']
+        size = math.log(G.nodes[node].get('size', 10) + 1) * 10
+        x, y = pos[node]
+        hovertext = f"{node}<br>Anzahl Funde: {fundzahlen.get(node, 0)}"
+        node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext)
+        if color == colors['primaryLine']:
+            primary_nodes.append(node_data)
+        elif color == colors['secondaryLine']:
+            secondary_nodes.append(node_data)
+        elif color == colors['brightArea']:
+            tertiary_nodes.append(node_data)
+
+    def create_node_trace(nodes, name, color):
+        return go.Scatter(
+            x=[n['x'] for n in nodes],
+            y=[n['y'] for n in nodes],
+            mode='markers+text',
+            text=[n['text'] for n in nodes],
+            hovertext=[n['hovertext'] for n in nodes],
+            hoverinfo='text',
+            marker=dict(
+                size=[n['size'] for n in nodes],
+                color=color,
+                line_width=2
+            ),
+            textposition="top center",
+            textfont=dict(size=12),
+            name=name
+        )
+
+    primary_trace = create_node_trace(primary_nodes, "Primäre Begriffe", colors['primaryLine'])
+    secondary_trace = create_node_trace(secondary_nodes, "Sekundäre Begriffe", colors['secondaryLine'])
+    tertiary_trace = create_node_trace(tertiary_nodes, "Tertiäre Begriffe", colors['brightArea'])
+
+    fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace],
+                    layout=go.Layout(
+                        title=f'Suchbegriff-Netzwerk nach Relevanz und Semantik (n={sum(fundzahlen.values())}, Stand: {current_date})',
+                        titlefont_size=16,
+                        showlegend=True,
+                        legend=dict(
+                            bgcolor=colors['background'],
+                            bordercolor=colors['white'],
+                            borderwidth=1,
+                            font=dict(color=colors['white']),
+                            itemsizing='constant'
+                        ),
+                        hovermode='closest',
+                        margin=dict(b=20, l=5, r=5, t=40),
+                        xaxis=dict(
+                            range=[x_scale_min, x_scale_max + 1],
+                            showgrid=True,
+                            zeroline=True,
+                            tickmode='linear',
+                            tick0=x_scale_min,
+                            dtick=(x_scale_max - x_scale_min) / 4,
+                            title='Technologische Dimension'
+                        ),
+                        yaxis=dict(
+                            range=[y_scale_min, y_scale_max + 1],
+                            showgrid=True,
+                            zeroline=True,
+                            tickmode='linear',
+                            tick0=y_scale_min,
+                            dtick=(y_scale_max - y_scale_min) / 4,
+                            title='Pädagogische Dimension'
+                        ),
+                        plot_bgcolor=colors['background'],
+                        paper_bgcolor=colors['background'],
+                        font=dict(color=colors['white'])
+                    ))
+
+    fig.show()
+    export_visualize_network(fig)
+
+    # Einfache Pfadanalyse nach dem Anzeigen der Figur
+    if 'e-learning' in G and 'online:lernen' in G:
+        try:
+            pfad = nx.shortest_path(G, source='e-learning', target='online:lernen')
+            print(f"Kürzester Pfad von 'e-learning' zu 'online:lernen': {pfad}")
+        except nx.NetworkXNoPath:
+            print("Kein Pfad von 'e-learning' zu 'online:lernen' gefunden.")
+
+ # Visualisierung 2: Häufigkeit spezifischer Tags
+def visualize_tags(bib_database):
+    # Definierte Suchbegriffe
+    search_terms = {
+        '0': 'digital:learning',
+        '1': 'learning:management:system',
+        '2': 'online:Lernplattform',
+        '3': 'online:Lernumgebung',
+        '4': 'MOOC',
+        '5': 'e-learning',
+        '6': 'Bildung:Technologie',
+        '7': 'digital:Medien',
+        '8': 'blended:learning',
+        '9': 'digital:lernen',
+        'a': 'online:lernen',
+        'b': 'online:learning'
+    }
+
+    # Kombinierte Tags erzeugen
+    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b']
+    types = [
+        'Zeitschriftenartikel',
+        'Buch',
+        'Buchteil',
+        'Bericht',
+        'Konferenz-Paper'
+    ]
+    tags_to_search = set(
+        f"#{number}:{type_}:{search_terms[number]}"
+        for number, type_ in product(numbers, types)
+    )
+
+    # Tag-Zählungen initialisieren
+    tag_counts = defaultdict(int)
+    if not bib_database or not bib_database.entries:
+        print("Fehler: Keine Einträge in der Datenbank gefunden.")
+        return
+
+    for entry in bib_database.entries:
+        if 'keywords' in entry:
+            entry_keywords = map(
+                str.lower,
+                map(str.strip, entry['keywords'].replace('\\#', '#').split(','))
+            )
+            for keyword in entry_keywords:
+                for tag in tags_to_search:
+                    if tag in keyword:
+                        tag_counts[tag] += 1
+
+    # Daten für Visualisierung aufbereiten
+    data = [
+        {'Tag': tag, 'Count': count, 'Type': tag.split(':')[1].lower()}
+        for tag, count in tag_counts.items()
+        if count > 0
+    ]
+
+    if not data:
+        print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.")
+        return
+
+    # Farbzuordnung
+    color_map = {
+        'zeitschriftenartikel': colors['primaryLine'],
+        'konferenz-paper': colors['secondaryLine'],
+        'buch': colors['depthArea'],
+        'buchteil': colors['brightArea'],
+        'bericht': colors['accent']
+    }
+
+    # Visualisierung erstellen
+    total_count = sum(tag_counts.values())
+    fig = px.bar(
+        data,
+        x='Tag',
+        y='Count',
+        title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})',
+        labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'},
+        color='Type',
+        color_discrete_map=color_map,
+        text_auto=True
+    )
+
+    # Layout anpassen
+    fig.update_layout(
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white']),
+        margin=dict(l=0, r=0, t=40, b=40),
+        autosize=True
+    )
+
+    fig.update_traces(
+        marker_line_color=colors['white'],
+        marker_line_width=1.5
+    )
+
+    fig.show(config={"responsive": True})
+    export_visualize_tags(fig)
+
+ # Visualisierung 3: Häufigkeit Index
+def visualize_index(bib_database):
+    index_terms = [
+        'Lernsystemarchitektur',
+        'Bildungstheorien',
+        'Lehr- und Lerneffektivität',
+        'Kollaboratives Lernen',
+        'Bewertungsmethoden',
+        'Technologieintegration',
+        'Datenschutz und IT-Sicherheit',
+        'Systemanpassung',
+        'Krisenreaktion im Bildungsbereich',
+        'Forschungsansätze'
+    ]
+
+    index_counts = defaultdict(int)
+    for entry in bib_database.entries:
+        if 'keywords' in entry:
+            entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(','))))
+            for index_term in index_terms:
+                if index_term.lower() in entry_keywords:
+                    index_counts[index_term] += 1
+
+    index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()]
+    index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True)
+
+    total_count = sum(index_counts.values())
+    print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):")
+    print(tabulate(index_data, headers="keys", tablefmt="grid"))
+
+    fig = px.bar(index_data, x='Index', y='Count', title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})', labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
+
+    fig.update_layout(
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white']),
+        margin=dict(l=0, r=0, t=40, b=40),
+        autosize=True
+    )
+
+    fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5)
+
+    fig.show(config={"responsive": True})
+    export_visualize_index(fig)
+
+ # Visualisierung 4: Häufigkeit Forschungsunterfragen
+def visualize_research_questions(bib_database):
+    research_questions = {
+        'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)',
+        'promotion:fu2a': 'Effekt für Lernende (FU2a)',
+        'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)',
+        'promotion:fu3': 'Konzeption und Merkmale (FU3)',
+        'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)',
+        'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)',
+        'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)',
+        'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)',
+        'promotion:fu7': 'Inputs und Strategien (FU7)'
+    }
+
+    rq_counts = defaultdict(int)
+    for entry in bib_database.entries:
+        if 'keywords' in entry:
+            entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(','))))
+            for keyword in entry_keywords:
+                if keyword in research_questions:
+                    rq_counts[keyword] += 1
+
+    rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()]
+    rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True)
+
+    rq_data_df = pd.DataFrame(rq_data)
+
+    total_count = rq_data_df['Count'].sum()
+    print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):")
+    print(tabulate(rq_data, headers="keys", tablefmt="grid"))
+
+    fig = px.bar(rq_data_df, x='Research_Question', y='Count', title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})', labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
+
+    fig.update_layout(
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white']),
+        margin=dict(l=0, r=0, t=40, b=40),
+        autosize=True
+    )
+
+    fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5)
+
+    fig.show(config={"responsive": True})
+    export_visualize_research_questions(fig)
+
+ # Visualisierung 5: Häufigkeit spezifischer Kategorien
+def visualize_categories(bib_database):
+    categories = {
+        'promotion:argumentation': 'Argumentation',
+        'promotion:kerngedanke': 'Kerngedanke',
+        'promotion:weiterführung': 'Weiterführung',
+        'promotion:schlussfolgerung': 'Schlussfolgerung'
+    }
+
+    cat_counts = defaultdict(int)
+    for entry in bib_database.entries:
+        if 'keywords' in entry:
+            entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(','))))
+            for keyword in entry_keywords:
+                if keyword in categories:
+                    cat_counts[keyword] += 1
+
+    cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()]
+    cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True)
+
+    cat_data_df = pd.DataFrame(cat_data)
+
+    total_count = cat_data_df['Count'].sum()
+    print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):")
+    print(tabulate(cat_data, headers="keys", tablefmt="grid"))
+
+    fig = px.bar(cat_data_df, x='Category', y='Count', title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})', labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
+
+    fig.update_layout(
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white']),
+        margin=dict(l=0, r=0, t=40, b=40),
+        autosize=True
+    )
+
+    fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5)
+
+    fig.show(config={"responsive": True})
+    export_visualize_categories(fig)
+
+ # Zeitreihenanalyse der Veröffentlichungen
+def extract_year_from_entry(entry):
+    year_str = entry.get('year', '').strip()
+    if not year_str:
+        return None
+    try:
+        matches = re.findall(r'\b(19[0-9]{2}|20[0-9]{2})\b', year_str)
+        years = [int(y) for y in matches if 1900 <= int(y) <= datetime.now().year + 1]
+        return min(years) if years else None
+    except Exception as e:
+        print(f"⚠️ Fehler bei Jahreswert '{year_str}': {e}")
+        return None
+
+def visualize_time_series(bib_database):
+    publication_years = []
+
+    for entry in bib_database.entries:
+        year = extract_year_from_entry(entry)
+        if year is not None:
+            publication_years.append(year)
+
+    if publication_years:
+        year_counts = Counter(publication_years)
+        df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year')
+
+        fig = px.line(
+            df,
+            x='Year',
+            y='Count',
+            title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})',
+            labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'}
+        )
+
+        fig.update_layout(
+            plot_bgcolor=colors['background'],
+            paper_bgcolor=colors['background'],
+            font=dict(color=colors['white']),
+            xaxis=dict(
+                tickmode='linear',
+                dtick=2,
+                tick0=min(publication_years)
+            ),
+            margin=dict(l=0, r=0, t=40, b=40),
+            autosize=True
+        )
+
+        fig.update_traces(line=dict(color=colors['secondaryLine'], width=3))
+        fig.show(config={"responsive": True})
+        export_visualize_time_series(fig)
+    else:
+        print("Keine gültigen Veröffentlichungsjahre gefunden.")
+
+ # Top Autoren nach Anzahl der Werke
+def visualize_top_authors(bib_database):
+    top_n = 25  # Anzahl der Top-Autoren, die angezeigt werden sollen
+    author_counts = defaultdict(int)
+    for entry in bib_database.entries:
+        if 'author' in entry and entry['author'].strip():
+            authors = [a.strip() for a in entry['author'].split(' and ') if a.strip()]
+            for author in authors:
+                author_counts[author] += 1
+
+    top_authors = Counter(author_counts).most_common(top_n)
+    if top_authors:
+        df = pd.DataFrame(top_authors, columns=['Author', 'Count'])
+
+        fig = px.bar(df, x='Author', y='Count', title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={sum(author_counts.values())}, Stand: {current_date})', labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'}, text_auto=True)
+        fig.update_layout(
+            plot_bgcolor=colors['background'],
+            paper_bgcolor=colors['background'],
+            font=dict(color=colors['white']),
+            margin=dict(l=0, r=0, t=40, b=40),
+            autosize=True
+        )
+        fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5)
+
+        fig.show(config={"responsive": True})
+        export_visualize_top_authors(fig)
+    else:
+        print("Keine Autoren gefunden.")
+
+
+ # Top Titel nach Anzahl der Werke
+def normalize_title(title):
+    # Entfernen von Sonderzeichen und Standardisierung auf Kleinbuchstaben
+    title = title.lower().translate(str.maketrans('', '', ",.!?\"'()[]{}:;"))
+    # Zusammenführen ähnlicher Titel, die sich nur in geringfügigen Details unterscheiden
+    title = " ".join(title.split())
+    # Entfernen häufiger Füllwörter oder Standardphrasen, die die Unterscheidung nicht unterstützen
+    common_phrases = ['eine studie', 'untersuchung der', 'analyse von']
+    for phrase in common_phrases:
+        title = title.replace(phrase, '')
+    return title.strip()
+
+def visualize_top_publications(bib_database):
+    top_n = 25  # Anzahl der Top-Publikationen, die angezeigt werden sollen
+    publication_counts = defaultdict(int)
+    
+    for entry in bib_database.entries:
+        invalid_titles = {"pdf", "no title found", "published entry", "", None}
+        title = normalize_title(entry.get('title', ''))
+        if title.lower() not in invalid_titles and len(title) > 5:
+            publication_counts[title] += 1
+
+    top_publications = sorted(publication_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]
+    publication_data = [{'Title': title[:50] + '...' if len(title) > 50 else title, 'Count': count} for title, count in top_publications]
+
+    df = pd.DataFrame(publication_data)
+    
+    fig = px.bar(df, x='Title', y='Count', title=f'Häufig zitierte Publikationen in der Analyse (Top {top_n}, n={sum(publication_counts.values())}, Stand: {current_date})', labels={'Title': 'Titel', 'Count': 'Anzahl der Nennungen'})
+    
+    fig.update_layout(
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white']),
+        xaxis_tickangle=-45,
+        margin=dict(l=0, r=0, t=40, b=40),
+        autosize=True
+    )
+    
+    fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5)
+    
+    fig.show(config={"responsive": True})
+    export_visualize_top_publications(fig)
+
+
+
+##########
+
+
+# Daten vorbereiten
+def prepare_path_data(bib_database):
+    research_questions = {
+        'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)',
+        'promotion:fu2a': 'Effekt für Lernende (FU2a)',
+        'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)',
+        'promotion:fu3': 'Konzeption und Merkmale (FU3)',
+        'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)',
+        'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)',
+        'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)',
+        'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)',
+        'promotion:fu7': 'Inputs und Strategien (FU7)'
+    }
+
+    categories = {
+        'promotion:argumentation': 'Argumentation',
+        'promotion:kerngedanke': 'Kerngedanke',
+        'promotion:weiterführung': 'Weiterführung',
+        'promotion:schlussfolgerung': 'Schlussfolgerung'
+    }
+
+    index_terms = [
+        'Lernsystemarchitektur',
+        'Bildungstheorien',
+        'Lehr- und Lerneffektivität',
+        'Kollaboratives Lernen',
+        'Bewertungsmethoden',
+        'Technologieintegration',
+        'Datenschutz und IT-Sicherheit',
+        'Systemanpassung',
+        'Krisenreaktion im Bildungsbereich',
+        'Forschungsansätze'
+    ]
+
+    entry_types = [
+        'Zeitschriftenartikel',
+        'Buch',
+        'Buchteil',
+        'Bericht',
+        'Konferenz-Paper'
+    ]
+
+    data = []
+
+    for entry in bib_database.entries:
+        entry_data = {
+            'FU': None,
+            'Category': None,
+            'Index': None,
+            'Type': entry.get('ENTRYTYPE', '').lower()
+        }
+
+        if 'keywords' in entry:
+            entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(','))))
+
+            for key, value in research_questions.items():
+                if key in entry_keywords:
+                    entry_data['FU'] = value
+
+            for key, value in categories.items():
+                if key in entry_keywords:
+                    entry_data['Category'] = value
+
+            for index_term in index_terms:
+                if index_term.lower() in entry_keywords:
+                    entry_data['Index'] = index_term
+
+        if all(value is not None for value in entry_data.values()):
+            data.append(entry_data)
+
+    return data
+
+ # Pfaddiagramm erstellen
+def create_path_diagram(data):
+    labels = []
+    sources = []
+    targets = []
+    values = []
+    color_map = {
+        'zeitschriftenartikel': colors['primaryLine'],
+        'konferenz-paper': colors['secondaryLine'],
+        'buch': colors['depthArea'],
+        'buchteil': colors['brightArea'],
+        'bericht': colors['accent']
+    }
+
+    def add_to_labels(label):
+        if label not in labels:
+            labels.append(label)
+        return labels.index(label)
+
+    for entry in data:
+        fu_idx = add_to_labels(entry['FU'])
+        category_idx = add_to_labels(entry['Category'])
+        index_idx = add_to_labels(entry['Index'])
+        type_idx = add_to_labels(entry['Type'])
+
+        sources.extend([fu_idx, category_idx, index_idx])
+        targets.extend([category_idx, index_idx, type_idx])
+        values.extend([1, 1, 1])
+
+    node_colors = [color_map.get(label, colors['primaryLine']) for label in labels]
+
+    fig = go.Figure(data=[go.Sankey(
+        node=dict(
+            pad=15,
+            thickness=20,
+            line=dict(color=colors['white'], width=0.5),
+            label=labels,
+            color=node_colors
+        ),
+        link=dict(
+            source=sources,
+            target=targets,
+            value=values
+        )
+    )])
+
+    fig.update_layout(
+        title_text=f'Kategorischer Analysepfad der Literatur (n={len(data)}, Stand: {current_date})',
+        font=dict(size=10, color=colors['white']),
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background']
+    )
+
+    fig.show()
+    export_create_path_diagram(fig)
+
+
+#############
+
+def create_sankey_diagram(bib_database):
+    def extract_year(entry):
+        """
+        Extrahiert ein gültiges Jahr aus dem `year`-Feld eines Eintrags.
+        """
+        year_str = entry.get('year', '').strip()
+        try:
+            # Suche nach einer 4-stelligen Jahreszahl
+            year_match = re.search(r'\b\d{4}\b', year_str)
+            if year_match:
+                return int(year_match.group())
+            else:
+                raise ValueError(f"Kein gültiges Jahr gefunden: {year_str}")
+        except ValueError:
+            print(f"Warnung: Ungültiger Jahreswert in Eintrag übersprungen: {year_str}")
+            return None
+
+    current_year = datetime.now().year
+    filtered_entries = [
+        entry for entry in bib_database.entries
+        if 'promotion:literaturanalyse' in entry.get('keywords', '').lower()
+    ]
+
+    initial_sources = len(filtered_entries)
+    screened_sources = initial_sources  # Da bereits gefiltert
+    quality_sources = sum(
+        1 for entry in filtered_entries
+        if entry.get('ENTRYTYPE') in ['article', 'phdthesis']
+    )
+    relevance_sources = sum(
+        1 for entry in filtered_entries
+        if entry.get('ENTRYTYPE') in ['article', 'phdthesis']
+        and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke'])
+    )
+    thematic_sources = sum(
+        1 for entry in filtered_entries
+        if entry.get('ENTRYTYPE') in ['article', 'phdthesis']
+        and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke'])
+        and any(kw in entry.get('keywords', '').lower() for kw in ['digital', 'learning'])
+    )
+    recent_sources = sum(
+        1 for entry in filtered_entries
+        if entry.get('ENTRYTYPE') in ['article', 'phdthesis']
+        and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke'])
+        and any(kw in entry.get('keywords', '').lower() for kw in ['digital', 'learning'])
+        and (year := extract_year(entry)) and year >= current_year - 5
+    )
+    classic_sources = sum(
+        1 for entry in filtered_entries
+        if entry.get('ENTRYTYPE') in ['article', 'phdthesis']
+        and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke'])
+        and any(kw in entry.get('keywords', '').lower() for kw in ['digital', 'learning'])
+        and (year := extract_year(entry)) and year < current_year - 5
+        and 'classic' in entry.get('keywords', '').lower()
+    )
+    selected_sources = recent_sources + classic_sources
+
+    # Stichprobengröße berechnen
+    sample_size = calculate_sample_size(initial_sources)
+
+    # Phasen und Verbindungen definieren
+    phases = [
+        "Identifizierte Quellen",
+        "Nach Screening (Literaturanalyse-Markierung)",
+        "Nach Qualitätsprüfung (Artikel und Dissertationen)",
+        "Nach Relevanzprüfung (FU3 und Kerngedanken)",
+        "Nach thematischer Prüfung (Digital & Learning)",
+        "Aktuelle Forschung (letzte 5 Jahre)",
+        "Klassische Werke",
+        "Ausgewählte Quellen (Endauswahl)"
+    ]
+
+    sources = [0, 1, 2, 3, 4, 4, 4]
+    targets = [1, 2, 3, 4, 5, 6, 7]
+    values = [
+        screened_sources,
+        quality_sources,
+        relevance_sources,
+        thematic_sources,
+        recent_sources,
+        classic_sources,
+        selected_sources
+    ]
+
+    # Prozentsätze berechnen für die Labels
+    percentages = [
+        "100.0%",  # Startwert
+        f"{screened_sources / initial_sources * 100:.1f}%",
+        f"{quality_sources / screened_sources * 100:.1f}%" if screened_sources > 0 else "0.0%",
+        f"{relevance_sources / quality_sources * 100:.1f}%" if quality_sources > 0 else "0.0%",
+        f"{thematic_sources / relevance_sources * 100:.1f}%" if relevance_sources > 0 else "0.0%",
+        f"{recent_sources / thematic_sources * 100:.1f}%" if thematic_sources > 0 else "0.0%",
+        f"{classic_sources / thematic_sources * 100:.1f}%" if thematic_sources > 0 else "0.0%",
+        f"{selected_sources / (recent_sources + classic_sources) * 100:.1f}%" if (recent_sources + classic_sources) > 0 else "0.0%"
+    ]
+
+    # Labels für Knoten anpassen, um Prozentsätze anzuzeigen
+    node_labels = [f"{ph} ({pct})" for ph, pct in zip(phases, percentages)]
+
+    # Farben für die einzelnen Phasen
+    node_colors = [
+        colors['primaryLine'],          # Identifizierte Quellen
+        colors['secondaryLine'],        # Nach Screening
+        colors['brightArea'],           # Nach Qualitätsprüfung
+        colors['depthArea'],            # Nach Relevanzprüfung
+        colors['positiveHighlight'],    # Nach thematischer Prüfung
+        colors['negativeHighlight'],    # Aktuelle Forschung
+        colors['accent'],               # Klassische Werke
+        colors['positiveHighlight']     # Ausgewählte Quellen
+    ]
+
+    # Sankey-Diagramm erstellen
+    fig = go.Figure(go.Sankey(
+        node=dict(
+            pad=15,
+            thickness=20,
+            line=dict(color="black", width=0.5),
+            label=node_labels,
+            color=node_colors
+        ),
+        link=dict(
+            source=sources,
+            target=targets,
+            value=values,
+            hoverinfo='all',  # Zeigt detaillierte Infos bei Mouseover an
+            color=colors['accent']
+        )
+    ))
+
+    # Layout anpassen
+    fig.update_layout(
+        title_text=f"Flussdiagramm der Literaturselektion (Stichprobe: n={sample_size}, Stand: {current_date})",
+        font_size=12,  # Größere Schriftgröße für bessere Lesbarkeit
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white'])
+    )
+
+    fig.show()
+    export_create_sankey_diagram(fig)
+
+##########
+
+def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05):
+    """
+    Berechnet die Stichprobengröße basierend auf der Gesamtanzahl der Einträge (N).
+    """
+    if N <= 0:
+        return 0
+    n_0 = (Z**2 * p * (1 - p)) / (e**2)
+    n = n_0 / (1 + ((n_0 - 1) / N))
+    return math.ceil(n)
+
+def visualize_sources_status(bib_database):
+    """
+    Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner.
+    """
+    search_folder_tags = [
+        "#1:zeitschriftenartikel:learning:management:system",
+        "#2:zeitschriftenartikel:online:lernplattform",
+        "#3:zeitschriftenartikel:online:lernumgebung",
+        "#4:zeitschriftenartikel:mooc",
+        "#5:zeitschriftenartikel:e-learning",
+        "#6:zeitschriftenartikel:bildung:technologie",
+        "#7:zeitschriftenartikel:digital:medien",
+        "#8:zeitschriftenartikel:blended:learning",
+        "#9:zeitschriftenartikel:digital:lernen",
+        "#a:zeitschriftenartikel:online:lernen",
+        "#b:zeitschriftenartikel:online:learning",
+        "#0:zeitschriftenartikel:digital:learning",
+        "#1:konferenz-paper:learning:management:system",
+        "#2:konferenz-paper:online:lernplattform",
+        "#3:konferenz-paper:online:lernumgebung",
+        "#4:konferenz-paper:mooc",
+        "#5:konferenz-paper:e-learning",
+        "#6:konferenz-paper:bildung:technologie",
+        "#7:konferenz-paper:digital:medien",
+        "#8:konferenz-paper:blended:learning",
+        "#9:konferenz-paper:digital:lernen",
+        "#a:konferenz-paper:online:lernen",
+        "#b:konferenz-paper:online:learning",
+        "#0:konferenz-paper:digital:learning"
+    ]
+
+    category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"}
+    source_data = defaultdict(lambda: {'Identifiziert': 0, 'Analysiert': 0})
+
+    if not bib_database or not bib_database.entries:
+        print("Fehler: Die Datenbank enthält keine Einträge.")
+        return
+
+    for entry in bib_database.entries:
+        keywords = entry.get('keywords', '')
+        if not keywords:
+            continue
+
+        entry_keywords = set(map(str.lower, map(str.strip, keywords.replace('\\#', '#').split(','))))
+
+        for tag in search_folder_tags:
+            if tag.lower() in entry_keywords:
+                source_data[tag]['Identifiziert'] += 1
+                if entry_keywords & category_tags:
+                    source_data[tag]['Analysiert'] += 1
+
+    table_data = []
+    analysiert_values = []
+    nicht_analysiert_values = []
+    analysiert_colors = []
+    tags = []
+
+    for tag, counts in sorted(source_data.items(), key=lambda item: item[1]['Identifiziert'], reverse=True):
+        stichprobe = calculate_sample_size(counts['Identifiziert'])
+        noch_zu_analysieren = counts['Identifiziert'] - counts['Analysiert']
+        noch_benoetigt_fuer_stichprobe = max(0, stichprobe - counts['Analysiert'])
+
+        table_data.append([
+            tag,
+            counts['Identifiziert'],
+            counts['Analysiert'],
+            noch_zu_analysieren,
+            stichprobe,
+            noch_benoetigt_fuer_stichprobe
+        ])
+
+        analysiert_values.append(counts['Analysiert'])
+        nicht_analysiert_values.append(noch_zu_analysieren)
+        tags.append(tag)
+
+        analysiert_colors.append(colors['positiveHighlight'] if counts['Analysiert'] >= stichprobe else colors['negativeHighlight'])
+
+    print(tabulate(
+        table_data,
+        headers=['Suchordner', 'Identifiziert', 'Analysiert', 'nicht-Analysiert', 'Stichprobe', 'Noch benötigt für Stichprobe'],
+        tablefmt='grid'
+    ))
+
+    fig = go.Figure()
+
+    fig.add_trace(go.Bar(
+        x=tags,
+        y=analysiert_values,
+        name='Analysiert',
+        marker=dict(color=analysiert_colors)
+    ))
+
+    fig.add_trace(go.Bar(
+        x=tags,
+        y=nicht_analysiert_values,
+        name='Nicht-Analysiert',
+        marker=dict(color=colors['primaryLine'])
+    ))
+
+    fig.update_layout(
+        barmode='stack',
+        title=f'Analyse- und Stichprobenstatus je Suchordner (n={sum(counts["Identifiziert"] for counts in source_data.values())}, Stand: {current_date})',
+        xaxis_title='Suchbegriffsordner',
+        yaxis_title='Anzahl der Quellen',
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white']),
+        xaxis=dict(
+            categoryorder='array',
+            categoryarray=search_folder_tags
+        )
+    )
+
+    fig.show()
+    export_visualize_sources_status(fig)
+
+#############
+
+# Funktion zur Erstellung einer Wortwolke aus Überschriften
+def create_wordcloud_from_titles(bib_database, stop_words):
+    titles = [entry.get('title', '') for entry in bib_database.entries]
+
+    # Wörter zählen
+    word_counts = defaultdict(int)
+    for title in titles:
+        for word in title.split():
+            word = word.lower().strip(",.!?\"'()[]{}:;")
+            if word and word not in stop_words:
+                word_counts[word] += 1
+
+    # Wortwolke erstellen
+    wordcloud = WordCloud(
+        width=800,
+        height=400,
+        background_color=colors['background'],
+        color_func=lambda *args, **kwargs: random.choice(word_colors)
+    ).generate_from_frequencies(word_counts)
+
+    # Wortwolke anzeigen
+    plt.figure(figsize=(10, 5))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis('off')
+    plt.title(f'Häufigkeitsanalyse von Titelwörtern (Stand: {current_date})', color=colors['white'])
+    plt.show()
+
+# Exportfunktion für visualize_search_term_distribution
+def export_visualize_search_term_distribution(fig):
+    if export_fig_visualize_search_term_distribution:
+        safe_filename = slugify("visualize_search_term_distribution")
+        export_path = f"{safe_filename}.html"
+        fig.write_html(export_path, full_html=True, include_plotlyjs="cdn")
+        remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/"
+        try:
+            subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True)
+            print(f"✅ Datei '{export_path}' erfolgreich übertragen.")
+        except subprocess.CalledProcessError as e:
+            print("❌ Fehler beim Übertragen:")
+            print(e.stderr)
+
+# Kuchengrafik zur Verteilung der Einträge auf primäre, sekundäre und tertiäre Begriffsordner
+def visualize_search_term_distribution(bib_database):
+    """
+    Erstellt eine Kuchengrafik zur Verteilung der Einträge auf primäre, sekundäre und tertiäre Begriffsordner.
+    """
+    hierarchy_counts = {
+        'Primär': 0,
+        'Sekundär': 0,
+        'Tertiär': 0
+    }
+
+    primary_folders = {
+        'S:01 Learning Management System',
+        'S:02 Online-Lernplattform',
+        'S:03 Online-Lernumgebung',
+        'S:05 eLearning',
+        'S:04 MOOC',
+        'S:06 Bildungstechnologie',
+        'S:07 Digitale Medien',
+        'S:08 Blended Learning',
+        'S:09 Digitales Lernen',
+        'S:12 Digital Learning',
+        'S:10 Online Lernen',
+        'S:11 Online Learning'
+    }
+
+    secondary_folders = {
+        'S:13 Berichte',
+        'S:14 Agiles Lernen',
+        'S:15 Learning Analytics'
+    }
+
+    tertiary_folders = {
+        'S:16 Dissertationen',
+        'S:17 ePortfolio'
+    }
+
+    conn = sqlite3.connect('/Users/jochen_hanisch-johannsen/Zotero/zotero.sqlite')
+    cursor = conn.cursor()
+
+    query = """
+    SELECT collections.collectionName, COUNT(DISTINCT items.itemID)
+    FROM items
+    JOIN collectionItems ON items.itemID = collectionItems.itemID
+    JOIN collections ON collectionItems.collectionID = collections.collectionID
+    WHERE collections.collectionName IN (
+        'S:01 Learning Management System',
+        'S:02 Online-Lernplattform',
+        'S:03 Online-Lernumgebung',
+        'S:05 eLearning',
+        'S:04 MOOC',
+        'S:06 Bildungstechnologie',
+        'S:07 Digitale Medien',
+        'S:08 Blended Learning',
+        'S:09 Digitales Lernen',
+        'S:12 Digital Learning',
+        'S:10 Online Lernen',
+        'S:11 Online Learning',
+        'S:13 Berichte',
+        'S:14 Agiles Lernen',
+        'S:15 Learning Analytics',
+        'S:16 Dissertationen',
+        'S:17 ePortfolio'
+    )
+    GROUP BY collections.collectionName
+    """
+
+    cursor.execute(query)
+    rows = cursor.fetchall()
+    conn.close()
+
+    for collection, count in rows:
+        if collection in primary_folders:
+            hierarchy_counts['Primär'] += count
+        elif collection in secondary_folders:
+            hierarchy_counts['Sekundär'] += count
+        elif collection in tertiary_folders:
+            hierarchy_counts['Tertiär'] += count
+
+    labels = list(hierarchy_counts.keys())
+    values = list(hierarchy_counts.values())
+    colors_pie = [colors['primaryLine'], colors['secondaryLine'], colors['brightArea']]
+
+    fig = go.Figure(data=[go.Pie(
+        labels=labels,
+        values=values,
+        marker=dict(colors=colors_pie),
+        textinfo='label+percent',
+        hoverinfo='label+value'
+    )])
+
+    fig.update_layout(
+        title='Verteilung der Suchbegriffsordner (Primär, Sekundär, Tertiär)',
+        plot_bgcolor=colors['background'],
+        paper_bgcolor=colors['background'],
+        font=dict(color=colors['white'])
+    )
+
+    fig.show()
+    export_visualize_search_term_distribution(fig)
+
+# Aufrufen der Visualisierungsfunktionen
+visualize_network(bib_database)
+visualize_tags(bib_database)
+visualize_index(bib_database)
+visualize_research_questions(bib_database)
+visualize_categories(bib_database)
+visualize_time_series(bib_database)
+visualize_top_authors(bib_database)
+visualize_top_publications(bib_database)
+data = prepare_path_data(bib_database)
+create_path_diagram(data)
+create_sankey_diagram(bib_database)
+visualize_sources_status(bib_database)
+create_wordcloud_from_titles(bib_database, stop_words)
+visualize_search_term_distribution(bib_database)