diff --git a/Automatisierte Literaturanalyse.py b/Automatisierte Literaturanalyse.py deleted file mode 100644 index e69de29..0000000 diff --git a/Geovisualization copy.py b/Geovisualization copy.py deleted file mode 100644 index 6f272dd..0000000 --- a/Geovisualization copy.py +++ /dev/null @@ -1,78 +0,0 @@ -import os -import bibtexparser -import pandas as pd -from tqdm import tqdm -import folium -from folium.plugins import Fullscreen - -# Terminal bereinigen -os.system('cls' if os.name == 'nt' else 'clear') - -# Dateipfade -geonames_file = 'allCountries.txt' -bib_file = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/Literaturverzeichnis.bib' - -# Laden der GeoNames-Daten -print("Laden der GeoNames-Daten...") -geonames_columns = [ - 'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude', - 'longitude', 'feature class', 'feature code', 'country code', 'cc2', - 'admin1 code', 'admin2 code', 'admin3 code', 'admin4 code', 'population', - 'elevation', 'dem', 'timezone', 'modification date' -] - -chunksize = 10**6 -geonames_data = pd.DataFrame() -for chunk in tqdm(pd.read_csv(geonames_file, sep='\t', header=None, names=geonames_columns, chunksize=chunksize, dtype=str, encoding='utf-8')): - geonames_data = pd.concat([geonames_data, chunk], ignore_index=True) - -# Laden der BibTeX-Daten -print("Laden der BibTeX-Daten...") -with open(bib_file, encoding='utf-8') as bibtex_file: - bib_database = bibtexparser.load(bibtex_file) - -# Ortsnamen extrahieren und bereinigen -print("Extrahieren und Bereinigen der Ortsnamen...") -locations = set() -for entry in bib_database.entries: - if 'address' in entry: - locations.update(entry['address'].replace(';', ',').replace('&', 'and').split(',')) - -locations = {loc.strip() for loc in locations} -print(f"Bereinigte Ortsnamen: {locations}") - -# Geo-Koordinaten zuordnen -print("Zuordnen der Geo-Koordinaten...") -geo_data = [] -for location in tqdm(locations): - matching_rows = geonames_data[geonames_data['name'].str.contains(location, case=False, na=False)] - if not matching_rows.empty: - best_match = matching_rows.iloc[0] - geo_data.append({ - 'name': location, - 'latitude': best_match['latitude'], - 'longitude': best_match['longitude'] - }) - -if not geo_data: - print("Keine gültigen Koordinaten gefunden.") -else: - df = pd.DataFrame(geo_data) - - # Karte erstellen - print("Erstellen der Karte...") - m = folium.Map(location=[0, 0], zoom_start=2) - - for _, row in df.iterrows(): - folium.Marker( - location=[row['latitude'], row['longitude']], - popup=row['name'] - ).add_to(m) - - # Vollbildmodus und LayerControl hinzufügen - Fullscreen(position='topright').add_to(m) - folium.LayerControl().add_to(m) - - # Karte speichern - m.save('literature_map_with_zoom.html') - print("Karte wurde gespeichert als 'literature_map_with_zoom.html'.") diff --git a/Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py b/Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py deleted file mode 100644 index fffd2e0..0000000 --- a/Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py +++ /dev/null @@ -1,1482 +0,0 @@ -""" -CAVE!!!!! - -Datei muss aus Zotero mit BibTeX exportiert werden! -""" - -import os - -# Clear the terminal -os.system('cls' if os.name == 'nt' else 'clear') - -import bibtexparser -import pandas as pd -import numpy as np -import networkx as nx -import matplotlib.pyplot as plt -from datetime import datetime -from collections import defaultdict, Counter -from itertools import product -from wordcloud import WordCloud -from tabulate import tabulate -import plotly.express as px -import plotly.graph_objects as go -import random -import math -import re -import subprocess - -# Export-Flags für Visualisierungen -export_fig_visualize_network = False -export_fig_visualize_tags = False -export_fig_visualize_index = False -export_fig_visualize_research_questions = False -export_fig_visualize_categories = False -export_fig_visualize_time_series = False -export_fig_visualize_top_authors = False -export_fig_visualize_top_publications = False -export_fig_create_path_diagram = False -export_fig_create_sankey_diagram = False -export_fig_visualize_sources_status = False -export_fig_create_wordcloud_from_titles = False -export_fig_visualize_search_term_distribution = False - -# Optional: slugify-Funktion -def slugify(value): - return re.sub(r'[^a-zA-Z0-9_-]', '', value.replace(' ', '_').lower()) - -# Exportfunktionen für jede Visualisierung -def export_visualize_network(fig): - if export_fig_visualize_network: - safe_filename = slugify("visualize_network") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_tags(fig): - if export_fig_visualize_tags: - safe_filename = slugify("visualize_tags") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_index(fig): - if export_fig_visualize_index: - safe_filename = slugify("visualize_index") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_research_questions(fig): - if export_fig_visualize_research_questions: - safe_filename = slugify("visualize_research_questions") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_categories(fig): - if export_fig_visualize_categories: - safe_filename = slugify("visualize_categories") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_time_series(fig): - if export_fig_visualize_time_series: - safe_filename = slugify("visualize_time_series") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_top_authors(fig): - if export_fig_visualize_top_authors: - safe_filename = slugify("visualize_top_authors") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_top_publications(fig): - if export_fig_visualize_top_publications: - safe_filename = slugify("visualize_top_publications") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_create_path_diagram(fig): - if export_fig_create_path_diagram: - safe_filename = slugify("create_path_diagram") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_create_sankey_diagram(fig): - if export_fig_create_sankey_diagram: - safe_filename = slugify("create_sankey_diagram") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -def export_visualize_sources_status(fig): - if export_fig_visualize_sources_status: - safe_filename = slugify("visualize_sources_status") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -# Farben definieren -colors = { - "background": "#003366", # Hintergrundfarbe - "text": "#333333", # Textfarbe - "accent": "#663300", # Akzentfarbe - "primaryLine": "#660066", # Bildungswirkfaktor - "secondaryLine": "#cc6600", # Bildungswirkindikator - "depthArea": "#006666", # Kompetenzmessunsicherheit - "brightArea": "#66CCCC", # Kompetenzentwicklungsunsicherheit - "positiveHighlight": "#336600", # Positive Hervorhebung - "negativeHighlight": "#990000", # Negative Hervorhebung - "white": "#ffffff" # Weiß -} - -# Liste der Farben, die für die Wörter verwendet werden sollen -word_colors = [ - colors["white"], - colors["brightArea"], - colors["positiveHighlight"], - colors["negativeHighlight"] -] - -# Aktuelles Datum -current_date = datetime.now().strftime("%Y-%m-%d") - - -# Lade Zotero-SQLite-Datenbank und erzeuge bib_database.entries-ähnliche Struktur -import sqlite3 - -def load_zotero_entries(sqlite_path): - conn = sqlite3.connect(sqlite_path) - cursor = conn.cursor() - - query = """ - SELECT - items.itemID, - COALESCE(value_title.value, '') AS title, - COALESCE(value_year.value, '') AS year, - COALESCE(creators.lastName || ', ' || creators.firstName, '') AS author, - GROUP_CONCAT(DISTINCT tags.name) AS keywords, - itemTypes.typeName AS type - FROM items - - -- Titel - LEFT JOIN itemData AS title_data ON items.itemID = title_data.itemID - LEFT JOIN fields AS title_field ON title_data.fieldID = title_field.fieldID AND title_field.fieldName = 'title' - LEFT JOIN itemDataValues AS value_title ON title_data.valueID = value_title.valueID - - -- Jahr - LEFT JOIN itemData AS year_data ON items.itemID = year_data.itemID - LEFT JOIN fields AS year_field ON year_data.fieldID = year_field.fieldID AND year_field.fieldName = 'date' - LEFT JOIN itemDataValues AS value_year ON year_data.valueID = value_year.valueID - - -- Autoren - LEFT JOIN itemCreators ON items.itemID = itemCreators.itemID - LEFT JOIN creators ON itemCreators.creatorID = creators.creatorID - - -- Tags - LEFT JOIN itemTags ON items.itemID = itemTags.itemID - LEFT JOIN tags ON itemTags.tagID = tags.tagID - - -- Typ - LEFT JOIN itemTypes ON items.itemTypeID = itemTypes.itemTypeID - - -- Sammlungen - LEFT JOIN collectionItems ON items.itemID = collectionItems.itemID - LEFT JOIN collections ON collectionItems.collectionID = collections.collectionID - - WHERE collections.collectionName IN ( - 'S:01 Learning Management System', - 'S:02 Online-Lernplattform', - 'S:03 Online-Lernumgebung', - 'S:05 eLearning', - 'S:04 MOOC', - 'S:06 Bildungstechnologie', - 'S:07 Digitale Medien', - 'S:08 Blended Learning', - 'S:09 Digitales Lernen', - 'S:12 Digital Learning', - 'S:10 Online Lernen', - 'S:11 Online Learning', - 'S:13 Berichte', - 'S:14 Agiles Lernen', - 'S:15 Learning Analytics', - 'S:16 Dissertationen', - 'S:17 ePortfolio' - ) - GROUP BY items.itemID - """ - - cursor.execute(query) - rows = cursor.fetchall() - - # Umwandlung in bib_database.entries-kompatibles Format - entries = [] - for row in rows: - item = { - 'ID': str(row[0]), - 'title': row[1], - 'year': row[2], - 'author': row[3], - 'keywords': row[4] if row[4] else '', - 'ENTRYTYPE': row[5] - } - entries.append(item) - - conn.close() - return entries - -bib_database = type("BibDatabase", (object,), {})() -bib_database.entries = load_zotero_entries('/Users/jochen_hanisch-johannsen/Zotero/zotero.sqlite') - -# Stopplisten laden -with open('de_complete.txt', 'r', encoding='utf-8') as file: - stop_words_de = set(file.read().split()) - -with open('en_complete.txt', 'r', encoding='utf-8') as file: - stop_words_en = set(file.read().split()) - -# Kombinierte Stoppliste -stop_words = stop_words_de.union(stop_words_en) - -# Funktion zur Berechnung der Stichprobengröße -def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05): - n_0 = (Z**2 * p * (1 - p)) / (e**2) - n = n_0 / (1 + ((n_0 - 1) / N)) - return math.ceil(n) - -# Visualisierung 1: Netzwerkanalyse -# Visualisierung 1: Netzwerkanalyse -def visualize_network(bib_database): - search_terms = { - '0': 'digital:learning', - '1': 'learning:management:system', - '2': 'online:Lernplattform', - '3': 'online:Lernumgebung', - '4': 'MOOC', - '5': 'e-learning', - '6': 'Bildung:Technologie', - '7': 'digital:Medien', - '8': 'blended:learning', - '9': 'digital:lernen', - 'a': 'online:lernen', - 'b': 'online:learning' - } - - numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b'] - types = [ - 'Zeitschriftenartikel', - 'Buch', - 'Buchteil', - 'Bericht', - 'Konferenz-Paper' - ] - tags_to_search = set() - for number, type_ in product(numbers, types): - search_term = search_terms[number] - tag = f'#{number}:{type_}:{search_term}' - tags_to_search.add(tag.lower()) - - tag_counts = defaultdict(int) - for entry in bib_database.entries: - if 'keywords' in entry: - entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(',')))) - for keyword in entry_keywords: - for tag in tags_to_search: - if tag in keyword: - tag_counts[tag] += 1 - - fundzahlen = defaultdict(int) - for tag, count in tag_counts.items(): - search_term = tag.split(':')[-1] - for key, value in search_terms.items(): - if search_term == value: - fundzahlen[value] += count - - search_terms_network = { - "Primäre Begriffe": { - "learning:management:system": [ - "e-learning", - "bildung:technologie", - "online:lernplattform", - "online:lernumgebung", - "digital:learning", - "digitales:lernen" - ] - }, - "Sekundäre Begriffe": { - "e-learning": [ - "mooc", - "online:lernplattform" - ], - "bildung:technologie": [ - "digital:learning", - "digitales:lernen", - "blended:learning" - ], - "digital:learning": [ - "digitale:medien", - "online:learning" - ], - "digitales:lernen": [ - "digitale:medien", - "online:lernen" - ], - "blended:learning": ["mooc"] - }, - "Tertiäre Begriffe": { - "online:learning": [], - "online:lernen": [] - } - } - - G = nx.Graph() - - hierarchy_colors = { - "Primäre Begriffe": colors['primaryLine'], - "Sekundäre Begriffe": colors['secondaryLine'], - "Tertiäre Begriffe": colors['brightArea'] - } - - def add_terms_to_graph(level, terms): - for primary_term, related_terms in terms.items(): - if primary_term not in G: - G.add_node(primary_term, color=hierarchy_colors[level], size=fundzahlen.get(primary_term, 10)) - else: - if level == "Tertiäre Begriffe": - G.nodes[primary_term]['color'] = hierarchy_colors[level] - for related_term in related_terms: - if related_term not in G: - G.add_node(related_term, color=hierarchy_colors[level], size=fundzahlen.get(related_term, 10)) - else: - if level == "Tertiäre Begriffe": - G.nodes[related_term]['color'] = hierarchy_colors[level] - G.add_edge(primary_term, related_term) - - for level, terms in search_terms_network.items(): - add_terms_to_graph(level, terms) - - np.random.seed(42) - pos = nx.spring_layout(G) - - x_scale_min, x_scale_max = 0, 10 - y_scale_min, y_scale_max = 0, 10 - - min_x = min(pos[node][0] for node in pos) - max_x = max(pos[node][0] for node in pos) - min_y = min(pos[node][1] for node in pos) - max_y = max(pos[node][1] for node in pos) - - scale_x_range = x_scale_max - x_scale_min - scale_y_range = y_scale_max - y_scale_min - - for node in pos: - x, y = pos[node] - norm_x = scale_x_range * (x - min_x) / (max_x - min_x) + x_scale_min - norm_y = scale_y_range * (y - min_y) / (max_y - min_y) + y_scale_min - pos[node] = (norm_x, norm_y) - - for node in pos: - x, y = pos[node] - x = max(min(x, x_scale_max), x_scale_min) - y = max(min(y, y_scale_max), y_scale_min) - pos[node] = (x, y) - - edge_x = [] - edge_y = [] - for edge in G.edges(): - x0, y0 = pos[edge[0]] - x1, y1 = pos[edge[1]] - edge_x.append(x0) - edge_x.append(x1) - edge_x.append(None) - edge_y.append(y0) - edge_y.append(y1) - edge_y.append(None) - - edge_trace = go.Scatter( - x=edge_x, y=edge_y, - line=dict(width=0.5, color=colors['white']), - hoverinfo='none', - mode='lines') - - # Knoten in drei Traces aufteilen: Primär, Sekundär, Tertiär - primary_nodes = [] - secondary_nodes = [] - tertiary_nodes = [] - - for node in G.nodes(): - color = G.nodes[node]['color'] - size = math.log(G.nodes[node].get('size', 10) + 1) * 10 - x, y = pos[node] - hovertext = f"{node}
Anzahl Funde: {fundzahlen.get(node, 0)}" - node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext) - if color == colors['primaryLine']: - primary_nodes.append(node_data) - elif color == colors['secondaryLine']: - secondary_nodes.append(node_data) - elif color == colors['brightArea']: - tertiary_nodes.append(node_data) - - def create_node_trace(nodes, name, color): - return go.Scatter( - x=[n['x'] for n in nodes], - y=[n['y'] for n in nodes], - mode='markers+text', - text=[n['text'] for n in nodes], - hovertext=[n['hovertext'] for n in nodes], - hoverinfo='text', - marker=dict( - size=[n['size'] for n in nodes], - color=color, - line_width=2 - ), - textposition="top center", - textfont=dict(size=12), - name=name - ) - - primary_trace = create_node_trace(primary_nodes, "Primäre Begriffe", colors['primaryLine']) - secondary_trace = create_node_trace(secondary_nodes, "Sekundäre Begriffe", colors['secondaryLine']) - tertiary_trace = create_node_trace(tertiary_nodes, "Tertiäre Begriffe", colors['brightArea']) - - fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace], - layout=go.Layout( - title=f'Suchbegriff-Netzwerk nach Relevanz und Semantik (n={sum(fundzahlen.values())}, Stand: {current_date})', - titlefont_size=16, - showlegend=True, - legend=dict( - bgcolor=colors['background'], - bordercolor=colors['white'], - borderwidth=1, - font=dict(color=colors['white']), - itemsizing='constant' - ), - hovermode='closest', - margin=dict(b=20, l=5, r=5, t=40), - xaxis=dict( - range=[x_scale_min, x_scale_max + 1], - showgrid=True, - zeroline=True, - tickmode='linear', - tick0=x_scale_min, - dtick=(x_scale_max - x_scale_min) / 4, - title='Technologische Dimension' - ), - yaxis=dict( - range=[y_scale_min, y_scale_max + 1], - showgrid=True, - zeroline=True, - tickmode='linear', - tick0=y_scale_min, - dtick=(y_scale_max - y_scale_min) / 4, - title='Pädagogische Dimension' - ), - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']) - )) - - fig.show() - export_visualize_network(fig) - - # Einfache Pfadanalyse nach dem Anzeigen der Figur - if 'e-learning' in G and 'online:lernen' in G: - try: - pfad = nx.shortest_path(G, source='e-learning', target='online:lernen') - print(f"Kürzester Pfad von 'e-learning' zu 'online:lernen': {pfad}") - except nx.NetworkXNoPath: - print("Kein Pfad von 'e-learning' zu 'online:lernen' gefunden.") - - # Visualisierung 2: Häufigkeit spezifischer Tags -def visualize_tags(bib_database): - # Definierte Suchbegriffe - search_terms = { - '0': 'digital:learning', - '1': 'learning:management:system', - '2': 'online:Lernplattform', - '3': 'online:Lernumgebung', - '4': 'MOOC', - '5': 'e-learning', - '6': 'Bildung:Technologie', - '7': 'digital:Medien', - '8': 'blended:learning', - '9': 'digital:lernen', - 'a': 'online:lernen', - 'b': 'online:learning' - } - - # Kombinierte Tags erzeugen - numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b'] - types = [ - 'Zeitschriftenartikel', - 'Buch', - 'Buchteil', - 'Bericht', - 'Konferenz-Paper' - ] - tags_to_search = set( - f"#{number}:{type_}:{search_terms[number]}" - for number, type_ in product(numbers, types) - ) - - # Tag-Zählungen initialisieren - tag_counts = defaultdict(int) - if not bib_database or not bib_database.entries: - print("Fehler: Keine Einträge in der Datenbank gefunden.") - return - - for entry in bib_database.entries: - if 'keywords' in entry: - entry_keywords = map( - str.lower, - map(str.strip, entry['keywords'].replace('\\#', '#').split(',')) - ) - for keyword in entry_keywords: - for tag in tags_to_search: - if tag in keyword: - tag_counts[tag] += 1 - - # Daten für Visualisierung aufbereiten - data = [ - {'Tag': tag, 'Count': count, 'Type': tag.split(':')[1].lower()} - for tag, count in tag_counts.items() - if count > 0 - ] - - if not data: - print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.") - return - - # Farbzuordnung - color_map = { - 'zeitschriftenartikel': colors['primaryLine'], - 'konferenz-paper': colors['secondaryLine'], - 'buch': colors['depthArea'], - 'buchteil': colors['brightArea'], - 'bericht': colors['accent'] - } - - # Visualisierung erstellen - total_count = sum(tag_counts.values()) - fig = px.bar( - data, - x='Tag', - y='Count', - title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})', - labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'}, - color='Type', - color_discrete_map=color_map, - text_auto=True - ) - - # Layout anpassen - fig.update_layout( - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - margin=dict(l=0, r=0, t=40, b=40), - autosize=True - ) - - fig.update_traces( - marker_line_color=colors['white'], - marker_line_width=1.5 - ) - - fig.show(config={"responsive": True}) - export_visualize_tags(fig) - - # Visualisierung 3: Häufigkeit Index -def visualize_index(bib_database): - index_terms = [ - 'Lernsystemarchitektur', - 'Bildungstheorien', - 'Lehr- und Lerneffektivität', - 'Kollaboratives Lernen', - 'Bewertungsmethoden', - 'Technologieintegration', - 'Datenschutz und IT-Sicherheit', - 'Systemanpassung', - 'Krisenreaktion im Bildungsbereich', - 'Forschungsansätze' - ] - - index_counts = defaultdict(int) - for entry in bib_database.entries: - if 'keywords' in entry: - entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(',')))) - for index_term in index_terms: - if index_term.lower() in entry_keywords: - index_counts[index_term] += 1 - - index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()] - index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True) - - total_count = sum(index_counts.values()) - print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):") - print(tabulate(index_data, headers="keys", tablefmt="grid")) - - fig = px.bar(index_data, x='Index', y='Count', title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})', labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'}, text_auto=True) - - fig.update_layout( - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - margin=dict(l=0, r=0, t=40, b=40), - autosize=True - ) - - fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5) - - fig.show(config={"responsive": True}) - export_visualize_index(fig) - - # Visualisierung 4: Häufigkeit Forschungsunterfragen -def visualize_research_questions(bib_database): - research_questions = { - 'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)', - 'promotion:fu2a': 'Effekt für Lernende (FU2a)', - 'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)', - 'promotion:fu3': 'Konzeption und Merkmale (FU3)', - 'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)', - 'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)', - 'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)', - 'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)', - 'promotion:fu7': 'Inputs und Strategien (FU7)' - } - - rq_counts = defaultdict(int) - for entry in bib_database.entries: - if 'keywords' in entry: - entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(',')))) - for keyword in entry_keywords: - if keyword in research_questions: - rq_counts[keyword] += 1 - - rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()] - rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True) - - rq_data_df = pd.DataFrame(rq_data) - - total_count = rq_data_df['Count'].sum() - print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):") - print(tabulate(rq_data, headers="keys", tablefmt="grid")) - - fig = px.bar(rq_data_df, x='Research_Question', y='Count', title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})', labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'}, text_auto=True) - - fig.update_layout( - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - margin=dict(l=0, r=0, t=40, b=40), - autosize=True - ) - - fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5) - - fig.show(config={"responsive": True}) - export_visualize_research_questions(fig) - - # Visualisierung 5: Häufigkeit spezifischer Kategorien -def visualize_categories(bib_database): - categories = { - 'promotion:argumentation': 'Argumentation', - 'promotion:kerngedanke': 'Kerngedanke', - 'promotion:weiterführung': 'Weiterführung', - 'promotion:schlussfolgerung': 'Schlussfolgerung' - } - - cat_counts = defaultdict(int) - for entry in bib_database.entries: - if 'keywords' in entry: - entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(',')))) - for keyword in entry_keywords: - if keyword in categories: - cat_counts[keyword] += 1 - - cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()] - cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True) - - cat_data_df = pd.DataFrame(cat_data) - - total_count = cat_data_df['Count'].sum() - print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):") - print(tabulate(cat_data, headers="keys", tablefmt="grid")) - - fig = px.bar(cat_data_df, x='Category', y='Count', title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})', labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'}, text_auto=True) - - fig.update_layout( - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - margin=dict(l=0, r=0, t=40, b=40), - autosize=True - ) - - fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5) - - fig.show(config={"responsive": True}) - export_visualize_categories(fig) - - # Zeitreihenanalyse der Veröffentlichungen -def extract_year_from_entry(entry): - year_str = entry.get('year', '').strip() - if not year_str: - return None - try: - matches = re.findall(r'\b(19[0-9]{2}|20[0-9]{2})\b', year_str) - years = [int(y) for y in matches if 1900 <= int(y) <= datetime.now().year + 1] - return min(years) if years else None - except Exception as e: - print(f"⚠️ Fehler bei Jahreswert '{year_str}': {e}") - return None - -def visualize_time_series(bib_database): - publication_years = [] - - for entry in bib_database.entries: - year = extract_year_from_entry(entry) - if year is not None: - publication_years.append(year) - - if publication_years: - year_counts = Counter(publication_years) - df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year') - - fig = px.line( - df, - x='Year', - y='Count', - title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})', - labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'} - ) - - fig.update_layout( - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - xaxis=dict( - tickmode='linear', - dtick=2, - tick0=min(publication_years) - ), - margin=dict(l=0, r=0, t=40, b=40), - autosize=True - ) - - fig.update_traces(line=dict(color=colors['secondaryLine'], width=3)) - fig.show(config={"responsive": True}) - export_visualize_time_series(fig) - else: - print("Keine gültigen Veröffentlichungsjahre gefunden.") - - # Top Autoren nach Anzahl der Werke -def visualize_top_authors(bib_database): - top_n = 25 # Anzahl der Top-Autoren, die angezeigt werden sollen - author_counts = defaultdict(int) - for entry in bib_database.entries: - if 'author' in entry and entry['author'].strip(): - authors = [a.strip() for a in entry['author'].split(' and ') if a.strip()] - for author in authors: - author_counts[author] += 1 - - top_authors = Counter(author_counts).most_common(top_n) - if top_authors: - df = pd.DataFrame(top_authors, columns=['Author', 'Count']) - - fig = px.bar(df, x='Author', y='Count', title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={sum(author_counts.values())}, Stand: {current_date})', labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'}, text_auto=True) - fig.update_layout( - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - margin=dict(l=0, r=0, t=40, b=40), - autosize=True - ) - fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5) - - fig.show(config={"responsive": True}) - export_visualize_top_authors(fig) - else: - print("Keine Autoren gefunden.") - - - # Top Titel nach Anzahl der Werke -def normalize_title(title): - # Entfernen von Sonderzeichen und Standardisierung auf Kleinbuchstaben - title = title.lower().translate(str.maketrans('', '', ",.!?\"'()[]{}:;")) - # Zusammenführen ähnlicher Titel, die sich nur in geringfügigen Details unterscheiden - title = " ".join(title.split()) - # Entfernen häufiger Füllwörter oder Standardphrasen, die die Unterscheidung nicht unterstützen - common_phrases = ['eine studie', 'untersuchung der', 'analyse von'] - for phrase in common_phrases: - title = title.replace(phrase, '') - return title.strip() - -def visualize_top_publications(bib_database): - top_n = 25 # Anzahl der Top-Publikationen, die angezeigt werden sollen - publication_counts = defaultdict(int) - - for entry in bib_database.entries: - invalid_titles = {"pdf", "no title found", "published entry", "", None} - title = normalize_title(entry.get('title', '')) - if title.lower() not in invalid_titles and len(title) > 5: - publication_counts[title] += 1 - - top_publications = sorted(publication_counts.items(), key=lambda x: x[1], reverse=True)[:top_n] - publication_data = [{'Title': title[:50] + '...' if len(title) > 50 else title, 'Count': count} for title, count in top_publications] - - df = pd.DataFrame(publication_data) - - fig = px.bar(df, x='Title', y='Count', title=f'Häufig zitierte Publikationen in der Analyse (Top {top_n}, n={sum(publication_counts.values())}, Stand: {current_date})', labels={'Title': 'Titel', 'Count': 'Anzahl der Nennungen'}) - - fig.update_layout( - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - xaxis_tickangle=-45, - margin=dict(l=0, r=0, t=40, b=40), - autosize=True - ) - - fig.update_traces(marker_color=colors['primaryLine'], marker_line_color=colors['white'], marker_line_width=1.5) - - fig.show(config={"responsive": True}) - export_visualize_top_publications(fig) - - - -########## - - -# Daten vorbereiten -def prepare_path_data(bib_database): - research_questions = { - 'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)', - 'promotion:fu2a': 'Effekt für Lernende (FU2a)', - 'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)', - 'promotion:fu3': 'Konzeption und Merkmale (FU3)', - 'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)', - 'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)', - 'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)', - 'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)', - 'promotion:fu7': 'Inputs und Strategien (FU7)' - } - - categories = { - 'promotion:argumentation': 'Argumentation', - 'promotion:kerngedanke': 'Kerngedanke', - 'promotion:weiterführung': 'Weiterführung', - 'promotion:schlussfolgerung': 'Schlussfolgerung' - } - - index_terms = [ - 'Lernsystemarchitektur', - 'Bildungstheorien', - 'Lehr- und Lerneffektivität', - 'Kollaboratives Lernen', - 'Bewertungsmethoden', - 'Technologieintegration', - 'Datenschutz und IT-Sicherheit', - 'Systemanpassung', - 'Krisenreaktion im Bildungsbereich', - 'Forschungsansätze' - ] - - entry_types = [ - 'Zeitschriftenartikel', - 'Buch', - 'Buchteil', - 'Bericht', - 'Konferenz-Paper' - ] - - data = [] - - for entry in bib_database.entries: - entry_data = { - 'FU': None, - 'Category': None, - 'Index': None, - 'Type': entry.get('ENTRYTYPE', '').lower() - } - - if 'keywords' in entry: - entry_keywords = list(map(str.lower, map(str.strip, entry['keywords'].replace('\\#', '#').split(',')))) - - for key, value in research_questions.items(): - if key in entry_keywords: - entry_data['FU'] = value - - for key, value in categories.items(): - if key in entry_keywords: - entry_data['Category'] = value - - for index_term in index_terms: - if index_term.lower() in entry_keywords: - entry_data['Index'] = index_term - - if all(value is not None for value in entry_data.values()): - data.append(entry_data) - - return data - - # Pfaddiagramm erstellen -def create_path_diagram(data): - labels = [] - sources = [] - targets = [] - values = [] - color_map = { - 'zeitschriftenartikel': colors['primaryLine'], - 'konferenz-paper': colors['secondaryLine'], - 'buch': colors['depthArea'], - 'buchteil': colors['brightArea'], - 'bericht': colors['accent'] - } - - def add_to_labels(label): - if label not in labels: - labels.append(label) - return labels.index(label) - - for entry in data: - fu_idx = add_to_labels(entry['FU']) - category_idx = add_to_labels(entry['Category']) - index_idx = add_to_labels(entry['Index']) - type_idx = add_to_labels(entry['Type']) - - sources.extend([fu_idx, category_idx, index_idx]) - targets.extend([category_idx, index_idx, type_idx]) - values.extend([1, 1, 1]) - - node_colors = [color_map.get(label, colors['primaryLine']) for label in labels] - - fig = go.Figure(data=[go.Sankey( - node=dict( - pad=15, - thickness=20, - line=dict(color=colors['white'], width=0.5), - label=labels, - color=node_colors - ), - link=dict( - source=sources, - target=targets, - value=values - ) - )]) - - fig.update_layout( - title_text=f'Kategorischer Analysepfad der Literatur (n={len(data)}, Stand: {current_date})', - font=dict(size=10, color=colors['white']), - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'] - ) - - fig.show() - export_create_path_diagram(fig) - - -############# - -def create_sankey_diagram(bib_database): - def extract_year(entry): - """ - Extrahiert ein gültiges Jahr aus dem `year`-Feld eines Eintrags. - """ - year_str = entry.get('year', '').strip() - try: - # Suche nach einer 4-stelligen Jahreszahl - year_match = re.search(r'\b\d{4}\b', year_str) - if year_match: - return int(year_match.group()) - else: - raise ValueError(f"Kein gültiges Jahr gefunden: {year_str}") - except ValueError: - print(f"Warnung: Ungültiger Jahreswert in Eintrag übersprungen: {year_str}") - return None - - current_year = datetime.now().year - filtered_entries = [ - entry for entry in bib_database.entries - if 'promotion:literaturanalyse' in entry.get('keywords', '').lower() - ] - - initial_sources = len(filtered_entries) - screened_sources = initial_sources # Da bereits gefiltert - quality_sources = sum( - 1 for entry in filtered_entries - if entry.get('ENTRYTYPE') in ['article', 'phdthesis'] - ) - relevance_sources = sum( - 1 for entry in filtered_entries - if entry.get('ENTRYTYPE') in ['article', 'phdthesis'] - and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke']) - ) - thematic_sources = sum( - 1 for entry in filtered_entries - if entry.get('ENTRYTYPE') in ['article', 'phdthesis'] - and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke']) - and any(kw in entry.get('keywords', '').lower() for kw in ['digital', 'learning']) - ) - recent_sources = sum( - 1 for entry in filtered_entries - if entry.get('ENTRYTYPE') in ['article', 'phdthesis'] - and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke']) - and any(kw in entry.get('keywords', '').lower() for kw in ['digital', 'learning']) - and (year := extract_year(entry)) and year >= current_year - 5 - ) - classic_sources = sum( - 1 for entry in filtered_entries - if entry.get('ENTRYTYPE') in ['article', 'phdthesis'] - and any(rq in entry.get('keywords', '').lower() for rq in ['promotion:fu3', 'promotion:kerngedanke']) - and any(kw in entry.get('keywords', '').lower() for kw in ['digital', 'learning']) - and (year := extract_year(entry)) and year < current_year - 5 - and 'classic' in entry.get('keywords', '').lower() - ) - selected_sources = recent_sources + classic_sources - - # Stichprobengröße berechnen - sample_size = calculate_sample_size(initial_sources) - - # Phasen und Verbindungen definieren - phases = [ - "Identifizierte Quellen", - "Nach Screening (Literaturanalyse-Markierung)", - "Nach Qualitätsprüfung (Artikel und Dissertationen)", - "Nach Relevanzprüfung (FU3 und Kerngedanken)", - "Nach thematischer Prüfung (Digital & Learning)", - "Aktuelle Forschung (letzte 5 Jahre)", - "Klassische Werke", - "Ausgewählte Quellen (Endauswahl)" - ] - - sources = [0, 1, 2, 3, 4, 4, 4] - targets = [1, 2, 3, 4, 5, 6, 7] - values = [ - screened_sources, - quality_sources, - relevance_sources, - thematic_sources, - recent_sources, - classic_sources, - selected_sources - ] - - # Prozentsätze berechnen für die Labels - percentages = [ - "100.0%", # Startwert - f"{screened_sources / initial_sources * 100:.1f}%", - f"{quality_sources / screened_sources * 100:.1f}%" if screened_sources > 0 else "0.0%", - f"{relevance_sources / quality_sources * 100:.1f}%" if quality_sources > 0 else "0.0%", - f"{thematic_sources / relevance_sources * 100:.1f}%" if relevance_sources > 0 else "0.0%", - f"{recent_sources / thematic_sources * 100:.1f}%" if thematic_sources > 0 else "0.0%", - f"{classic_sources / thematic_sources * 100:.1f}%" if thematic_sources > 0 else "0.0%", - f"{selected_sources / (recent_sources + classic_sources) * 100:.1f}%" if (recent_sources + classic_sources) > 0 else "0.0%" - ] - - # Labels für Knoten anpassen, um Prozentsätze anzuzeigen - node_labels = [f"{ph} ({pct})" for ph, pct in zip(phases, percentages)] - - # Farben für die einzelnen Phasen - node_colors = [ - colors['primaryLine'], # Identifizierte Quellen - colors['secondaryLine'], # Nach Screening - colors['brightArea'], # Nach Qualitätsprüfung - colors['depthArea'], # Nach Relevanzprüfung - colors['positiveHighlight'], # Nach thematischer Prüfung - colors['negativeHighlight'], # Aktuelle Forschung - colors['accent'], # Klassische Werke - colors['positiveHighlight'] # Ausgewählte Quellen - ] - - # Sankey-Diagramm erstellen - fig = go.Figure(go.Sankey( - node=dict( - pad=15, - thickness=20, - line=dict(color="black", width=0.5), - label=node_labels, - color=node_colors - ), - link=dict( - source=sources, - target=targets, - value=values, - hoverinfo='all', # Zeigt detaillierte Infos bei Mouseover an - color=colors['accent'] - ) - )) - - # Layout anpassen - fig.update_layout( - title_text=f"Flussdiagramm der Literaturselektion (Stichprobe: n={sample_size}, Stand: {current_date})", - font_size=12, # Größere Schriftgröße für bessere Lesbarkeit - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']) - ) - - fig.show() - export_create_sankey_diagram(fig) - -########## - -def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05): - """ - Berechnet die Stichprobengröße basierend auf der Gesamtanzahl der Einträge (N). - """ - if N <= 0: - return 0 - n_0 = (Z**2 * p * (1 - p)) / (e**2) - n = n_0 / (1 + ((n_0 - 1) / N)) - return math.ceil(n) - -def visualize_sources_status(bib_database): - """ - Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner. - """ - search_folder_tags = [ - "#1:zeitschriftenartikel:learning:management:system", - "#2:zeitschriftenartikel:online:lernplattform", - "#3:zeitschriftenartikel:online:lernumgebung", - "#4:zeitschriftenartikel:mooc", - "#5:zeitschriftenartikel:e-learning", - "#6:zeitschriftenartikel:bildung:technologie", - "#7:zeitschriftenartikel:digital:medien", - "#8:zeitschriftenartikel:blended:learning", - "#9:zeitschriftenartikel:digital:lernen", - "#a:zeitschriftenartikel:online:lernen", - "#b:zeitschriftenartikel:online:learning", - "#0:zeitschriftenartikel:digital:learning", - "#1:konferenz-paper:learning:management:system", - "#2:konferenz-paper:online:lernplattform", - "#3:konferenz-paper:online:lernumgebung", - "#4:konferenz-paper:mooc", - "#5:konferenz-paper:e-learning", - "#6:konferenz-paper:bildung:technologie", - "#7:konferenz-paper:digital:medien", - "#8:konferenz-paper:blended:learning", - "#9:konferenz-paper:digital:lernen", - "#a:konferenz-paper:online:lernen", - "#b:konferenz-paper:online:learning", - "#0:konferenz-paper:digital:learning" - ] - - category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"} - source_data = defaultdict(lambda: {'Identifiziert': 0, 'Analysiert': 0}) - - if not bib_database or not bib_database.entries: - print("Fehler: Die Datenbank enthält keine Einträge.") - return - - for entry in bib_database.entries: - keywords = entry.get('keywords', '') - if not keywords: - continue - - entry_keywords = set(map(str.lower, map(str.strip, keywords.replace('\\#', '#').split(',')))) - - for tag in search_folder_tags: - if tag.lower() in entry_keywords: - source_data[tag]['Identifiziert'] += 1 - if entry_keywords & category_tags: - source_data[tag]['Analysiert'] += 1 - - table_data = [] - analysiert_values = [] - nicht_analysiert_values = [] - analysiert_colors = [] - tags = [] - - for tag, counts in sorted(source_data.items(), key=lambda item: item[1]['Identifiziert'], reverse=True): - stichprobe = calculate_sample_size(counts['Identifiziert']) - noch_zu_analysieren = counts['Identifiziert'] - counts['Analysiert'] - noch_benoetigt_fuer_stichprobe = max(0, stichprobe - counts['Analysiert']) - - table_data.append([ - tag, - counts['Identifiziert'], - counts['Analysiert'], - noch_zu_analysieren, - stichprobe, - noch_benoetigt_fuer_stichprobe - ]) - - analysiert_values.append(counts['Analysiert']) - nicht_analysiert_values.append(noch_zu_analysieren) - tags.append(tag) - - analysiert_colors.append(colors['positiveHighlight'] if counts['Analysiert'] >= stichprobe else colors['negativeHighlight']) - - print(tabulate( - table_data, - headers=['Suchordner', 'Identifiziert', 'Analysiert', 'nicht-Analysiert', 'Stichprobe', 'Noch benötigt für Stichprobe'], - tablefmt='grid' - )) - - fig = go.Figure() - - fig.add_trace(go.Bar( - x=tags, - y=analysiert_values, - name='Analysiert', - marker=dict(color=analysiert_colors) - )) - - fig.add_trace(go.Bar( - x=tags, - y=nicht_analysiert_values, - name='Nicht-Analysiert', - marker=dict(color=colors['primaryLine']) - )) - - fig.update_layout( - barmode='stack', - title=f'Analyse- und Stichprobenstatus je Suchordner (n={sum(counts["Identifiziert"] for counts in source_data.values())}, Stand: {current_date})', - xaxis_title='Suchbegriffsordner', - yaxis_title='Anzahl der Quellen', - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']), - xaxis=dict( - categoryorder='array', - categoryarray=search_folder_tags - ) - ) - - fig.show() - export_visualize_sources_status(fig) - -############# - -# Funktion zur Erstellung einer Wortwolke aus Überschriften -def create_wordcloud_from_titles(bib_database, stop_words): - titles = [entry.get('title', '') for entry in bib_database.entries] - - # Wörter zählen - word_counts = defaultdict(int) - for title in titles: - for word in title.split(): - word = word.lower().strip(",.!?\"'()[]{}:;") - if word and word not in stop_words: - word_counts[word] += 1 - - # Wortwolke erstellen - wordcloud = WordCloud( - width=800, - height=400, - background_color=colors['background'], - color_func=lambda *args, **kwargs: random.choice(word_colors) - ).generate_from_frequencies(word_counts) - - # Wortwolke anzeigen - plt.figure(figsize=(10, 5)) - plt.imshow(wordcloud, interpolation='bilinear') - plt.axis('off') - plt.title(f'Häufigkeitsanalyse von Titelwörtern (Stand: {current_date})', color=colors['white']) - plt.show() - -# Exportfunktion für visualize_search_term_distribution -def export_visualize_search_term_distribution(fig): - if export_fig_visualize_search_term_distribution: - safe_filename = slugify("visualize_search_term_distribution") - export_path = f"{safe_filename}.html" - fig.write_html(export_path, full_html=True, include_plotlyjs="cdn") - remote_path = "jochen-hanisch@sternenflottenakademie.local:/mnt/deep-space-nine/public/plot/promotion/" - try: - subprocess.run(["scp", export_path, remote_path], check=True, capture_output=True, text=True) - print(f"✅ Datei '{export_path}' erfolgreich übertragen.") - except subprocess.CalledProcessError as e: - print("❌ Fehler beim Übertragen:") - print(e.stderr) - -# Kuchengrafik zur Verteilung der Einträge auf primäre, sekundäre und tertiäre Begriffsordner -def visualize_search_term_distribution(bib_database): - """ - Erstellt eine Kuchengrafik zur Verteilung der Einträge auf primäre, sekundäre und tertiäre Begriffsordner. - """ - hierarchy_counts = { - 'Primär': 0, - 'Sekundär': 0, - 'Tertiär': 0 - } - - primary_folders = { - 'S:01 Learning Management System', - 'S:02 Online-Lernplattform', - 'S:03 Online-Lernumgebung', - 'S:05 eLearning', - 'S:04 MOOC', - 'S:06 Bildungstechnologie', - 'S:07 Digitale Medien', - 'S:08 Blended Learning', - 'S:09 Digitales Lernen', - 'S:12 Digital Learning', - 'S:10 Online Lernen', - 'S:11 Online Learning' - } - - secondary_folders = { - 'S:13 Berichte', - 'S:14 Agiles Lernen', - 'S:15 Learning Analytics' - } - - tertiary_folders = { - 'S:16 Dissertationen', - 'S:17 ePortfolio' - } - - conn = sqlite3.connect('/Users/jochen_hanisch-johannsen/Zotero/zotero.sqlite') - cursor = conn.cursor() - - query = """ - SELECT collections.collectionName, COUNT(DISTINCT items.itemID) - FROM items - JOIN collectionItems ON items.itemID = collectionItems.itemID - JOIN collections ON collectionItems.collectionID = collections.collectionID - WHERE collections.collectionName IN ( - 'S:01 Learning Management System', - 'S:02 Online-Lernplattform', - 'S:03 Online-Lernumgebung', - 'S:05 eLearning', - 'S:04 MOOC', - 'S:06 Bildungstechnologie', - 'S:07 Digitale Medien', - 'S:08 Blended Learning', - 'S:09 Digitales Lernen', - 'S:12 Digital Learning', - 'S:10 Online Lernen', - 'S:11 Online Learning', - 'S:13 Berichte', - 'S:14 Agiles Lernen', - 'S:15 Learning Analytics', - 'S:16 Dissertationen', - 'S:17 ePortfolio' - ) - GROUP BY collections.collectionName - """ - - cursor.execute(query) - rows = cursor.fetchall() - conn.close() - - for collection, count in rows: - if collection in primary_folders: - hierarchy_counts['Primär'] += count - elif collection in secondary_folders: - hierarchy_counts['Sekundär'] += count - elif collection in tertiary_folders: - hierarchy_counts['Tertiär'] += count - - labels = list(hierarchy_counts.keys()) - values = list(hierarchy_counts.values()) - colors_pie = [colors['primaryLine'], colors['secondaryLine'], colors['brightArea']] - - fig = go.Figure(data=[go.Pie( - labels=labels, - values=values, - marker=dict(colors=colors_pie), - textinfo='label+percent', - hoverinfo='label+value' - )]) - - fig.update_layout( - title='Verteilung der Suchbegriffsordner (Primär, Sekundär, Tertiär)', - plot_bgcolor=colors['background'], - paper_bgcolor=colors['background'], - font=dict(color=colors['white']) - ) - - fig.show() - export_visualize_search_term_distribution(fig) - -# Aufrufen der Visualisierungsfunktionen -visualize_network(bib_database) -visualize_tags(bib_database) -visualize_index(bib_database) -visualize_research_questions(bib_database) -visualize_categories(bib_database) -visualize_time_series(bib_database) -visualize_top_authors(bib_database) -visualize_top_publications(bib_database) -data = prepare_path_data(bib_database) -create_path_diagram(data) -create_sankey_diagram(bib_database) -visualize_sources_status(bib_database) -create_wordcloud_from_titles(bib_database, stop_words) -visualize_search_term_distribution(bib_database)