Compare commits

..

1 Commits

6 changed files with 3847 additions and 139827 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
import os import os
# Neue Exportfunktion: HTML in /tmp speichern, per SCP übertragen, PNG lokal speichern # Neue Exportfunktion: HTML in /tmp speichern, per SCP übertragen, PNG lokal speichern
@ -288,7 +289,6 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
abs_corr = abs(corr) abs_corr = abs(corr)
significance = 'Signifikant' if p_value < 0.05 else 'Nicht signifikant' significance = 'Signifikant' if p_value < 0.05 else 'Nicht signifikant'
hover_color = colors['brightArea'] if p_value < 0.05 else colors['depthArea'] hover_color = colors['brightArea'] if p_value < 0.05 else colors['depthArea']
cooccurrence_count = int(((df[x_term] == 1) & (df[y_term] == 1)).sum())
correlations.append({ correlations.append({
'x_term': x_term, 'x_term': x_term,
'y_term': y_term, 'y_term': y_term,
@ -297,8 +297,6 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
'p_value': p_value, 'p_value': p_value,
'significance': significance, 'significance': significance,
'hover_color': hover_color, 'hover_color': hover_color,
'n_observations': int(len(x_valid)),
'cooccurrence_count': cooccurrence_count,
'interpretation': ( 'interpretation': (
f"Die Korrelation zwischen '{x_term}' und '{y_term}' beträgt {corr:.2f}. " f"Die Korrelation zwischen '{x_term}' und '{y_term}' beträgt {corr:.2f}. "
f"p-Wert: {p_value:.3e} ({significance})" f"p-Wert: {p_value:.3e} ({significance})"
@ -364,25 +362,12 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
line=dict(width=1, color=colors['background']) line=dict(width=1, color=colors['background'])
), ),
hovertemplate=( hovertemplate=(
'<b>%{customdata[0]}</b> ↔ <b>%{customdata[1]}</b><br>' '<b>%{customdata[0]}</b><br>'
'Korrelation: %{marker.color:.2f}<br>' 'Korrelation: %{marker.color:.2f}<br>'
'p-Wert: %{customdata[3]:.3e}<br>' 'p-Wert: %{customdata[1]:.3e}<br>'
'Signifikanz: %{customdata[4]}<br>' 'Signifikanz: %{customdata[2]}'
'Stichprobe (n): %{customdata[5]}<br>'
'Gemeinsame Treffer: %{customdata[6]}<br>'
'%{customdata[7]}'
'<extra></extra>'
), ),
customdata=np.array(list(zip( customdata=correlation_df[['x_term', 'p_value', 'significance']].to_numpy()
correlation_df['x_term'],
correlation_df['y_term'],
correlation_df['correlation'],
correlation_df['p_value'],
correlation_df['significance'],
correlation_df['n_observations'],
correlation_df['cooccurrence_count'],
correlation_df['interpretation']
)), dtype=object)
) )
# Standardlayout verwenden und ggf. ergänzen, Margin dynamisch für Responsivität # Standardlayout verwenden und ggf. ergänzen, Margin dynamisch für Responsivität
@ -392,7 +377,6 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
x_title=x_label, x_title=x_label,
y_title=y_label y_title=y_label
), ),
hovermode='closest',
xaxis=dict( xaxis=dict(
tickangle=-45, tickangle=-45,
automargin=True automargin=True
@ -507,6 +491,11 @@ df['X_Dimension'] = df[[tag for tag in tags_to_search_processed if tag in df.col
df['Y_Dimension'] = df[[cat for cat in categories_processed if cat in df.columns]].sum(axis=1) df['Y_Dimension'] = df[[cat for cat in categories_processed if cat in df.columns]].sum(axis=1)
df['Z_Dimension'] = df[[rq for rq in research_questions_processed if rq in df.columns]].sum(axis=1) df['Z_Dimension'] = df[[rq for rq in research_questions_processed if rq in df.columns]].sum(axis=1)
# Clusteranalyse mit K-Means basierend auf den deduktiven Dimensionen
features = df[['X_Dimension', 'Y_Dimension', 'Z_Dimension']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
# Clusteranalyse mit K-Means basierend auf den deduktiven Dimensionen # Clusteranalyse mit K-Means basierend auf den deduktiven Dimensionen
# Prüfung auf konstante deduktive Dimensionen # Prüfung auf konstante deduktive Dimensionen
if df[['X_Dimension', 'Y_Dimension', 'Z_Dimension']].nunique().eq(1).all(): if df[['X_Dimension', 'Y_Dimension', 'Z_Dimension']].nunique().eq(1).all():
@ -579,19 +568,6 @@ for cluster in cluster_means.index:
# Statische Cluster-Beschriftungen in den DataFrame einfügen # Statische Cluster-Beschriftungen in den DataFrame einfügen
df['Cluster_Label'] = df['KMeans_Cluster'].map(cluster_labels) df['Cluster_Label'] = df['KMeans_Cluster'].map(cluster_labels)
df['Cluster_Label'] = df['Cluster_Label'].fillna(df['KMeans_Cluster'])
# Farbzuordnung für die Clusterlabels aus den CI-Farben ableiten
fallback_color = cluster_colors.get("0", colors.get('primaryLine', '#1f77b4'))
color_map = {}
for cluster_key, label in cluster_labels.items():
base_color = cluster_colors.get(str(cluster_key), fallback_color)
color_map[label] = base_color
# Sicherstellen, dass auch eventuelle Restlabels (z.B. "Nicht gültig") erfasst werden
for label in df['Cluster_Label'].dropna().unique():
if label not in color_map:
color_map[label] = cluster_colors.get(str(label), fallback_color)
# Ausgabe der statischen Cluster-Beschriftungen # Ausgabe der statischen Cluster-Beschriftungen
print("Cluster-Beschriftungen (inhaltlich):") print("Cluster-Beschriftungen (inhaltlich):")
@ -608,7 +584,7 @@ fig_cluster = px.scatter_3d(
color='Cluster_Label', color='Cluster_Label',
size='Point_Size', size='Point_Size',
size_max=100, size_max=100,
color_discrete_map=color_map, color_discrete_sequence=list(cluster_colors.values()),
hover_data={ hover_data={
'Cluster_Label': True, 'Cluster_Label': True,
'X_Dimension': True, 'X_Dimension': True,
@ -626,6 +602,7 @@ fig_cluster = px.scatter_3d(
} }
) )
# Layout mit Standardlayout und konsistenten CI-konformen Ergänzungen # Layout mit Standardlayout und konsistenten CI-konformen Ergänzungen
layout_cluster = get_standard_layout( layout_cluster = get_standard_layout(
title=plot_title, title=plot_title,
@ -710,7 +687,6 @@ correlation_quality_results = {
"Forschungsunterfragen & Kategorien": analyze_correlation_quality(df, research_questions_processed, categories_processed), "Forschungsunterfragen & Kategorien": analyze_correlation_quality(df, research_questions_processed, categories_processed),
"Forschungsunterfragen & Suchbegriffe": analyze_correlation_quality(df, research_questions_processed, tags_to_search_processed), "Forschungsunterfragen & Suchbegriffe": analyze_correlation_quality(df, research_questions_processed, tags_to_search_processed),
"Forschungsunterfragen & Indizes": analyze_correlation_quality(df, research_questions_processed, index_terms_processed), "Forschungsunterfragen & Indizes": analyze_correlation_quality(df, research_questions_processed, index_terms_processed),
"Forschungsunterfragen & Forschungsunterfragen": analyze_correlation_quality(df, research_questions_processed, research_questions_processed),
"Indizes & Kategorien": analyze_correlation_quality(df, index_terms_processed, categories_processed), "Indizes & Kategorien": analyze_correlation_quality(df, index_terms_processed, categories_processed),
"Indizes & Suchbegriffe": analyze_correlation_quality(df, index_terms_processed, tags_to_search_processed), "Indizes & Suchbegriffe": analyze_correlation_quality(df, index_terms_processed, tags_to_search_processed),
"Suchbegriffe & Kategorien": analyze_correlation_quality(df, tags_to_search_processed, categories_processed), "Suchbegriffe & Kategorien": analyze_correlation_quality(df, tags_to_search_processed, categories_processed),
@ -777,17 +753,8 @@ def plot_average_correlation_plotly(summary_df):
) )
# PNG-Export ergänzen # PNG-Export ergänzen
png_path = os.path.join(export_path_png, f"{slugify('summary_plot_' + global_bib_filename.replace('.bib', ''))}.png") png_path = os.path.join(export_path_png, f"{slugify('summary_plot_' + global_bib_filename.replace('.bib', ''))}.png")
try:
fig.write_image(png_path, width=1200, height=800, scale=2) fig.write_image(png_path, width=1200, height=800, scale=2)
print(f"✅ PNG-Summary-Datei gespeichert unter: {png_path}") print(f"✅ PNG-Summary-Datei gespeichert unter: {png_path}")
except ValueError as err:
if "kaleido" in str(err).lower():
print("⚠️ PNG-Export übersprungen: Plotly benötigt das Paket 'kaleido'.")
print(" Installation (falls gewünscht): pip install -U kaleido")
else:
print(f"⚠️ PNG-Export fehlgeschlagen: {err}")
except Exception as err:
print(f"⚠️ PNG-Export fehlgeschlagen: {err}")
#============================ #============================
# Aufruf Alle möglichen bivariaten Korrelationen visualisieren # Aufruf Alle möglichen bivariaten Korrelationen visualisieren

View File

@ -1,6 +1,7 @@
from config_netzwerk import theme, export_fig_visual, bib_filename from config_netzwerk import theme, export_fig_visual, bib_filename
import os import os
# Clear the terminal # Clear the terminal
@ -59,14 +60,11 @@ from config_netzwerk import (
export_fig_visualize_sources_status, export_fig_visualize_sources_status,
export_fig_create_wordcloud_from_titles, export_fig_create_wordcloud_from_titles,
export_fig_visualize_languages, export_fig_visualize_languages,
export_fig_visualize_relevance_fu,
export_fig_visualize_relevance_categories,
export_fig_visualize_relevance_search_terms,
) )
from config_netzwerk import export_fig_png from config_netzwerk import export_fig_png
def export_figure_local(fig, name, flag): def export_figure_local(fig, name, flag, bib_filename=None):
from config_netzwerk import export_path_html, export_path_png from config_netzwerk import export_path_html, export_path_png
# Einmalige Definition von safe_filename am Anfang der Funktion # Einmalige Definition von safe_filename am Anfang der Funktion
safe_filename = prepare_figure_export(fig, name).replace(".html", "") safe_filename = prepare_figure_export(fig, name).replace(".html", "")
@ -102,23 +100,6 @@ word_colors = [
colors["negativeHighlight"] colors["negativeHighlight"]
] ]
# Relevanz-Stufen (1 = gering, 5 = sehr hoch)
RELEVANCE_LEVELS = [5, 4, 3, 2, 1]
RELEVANCE_LEVEL_LABELS = {
5: "Relevanz 5",
4: "Relevanz 4",
3: "Relevanz 3",
2: "Relevanz 2",
1: "Relevanz 1",
}
RELEVANCE_COLOR_MAP = {
"Relevanz 5": colors['positiveHighlight'],
"Relevanz 4": colors['accent'],
"Relevanz 3": colors['brightArea'],
"Relevanz 2": colors['depthArea'],
"Relevanz 1": colors['negativeHighlight'],
}
# Aktuelles Datum # Aktuelles Datum
current_date = datetime.now().strftime("%Y-%m-%d") current_date = datetime.now().strftime("%Y-%m-%d")
@ -139,13 +120,6 @@ with open('en_complete.txt', 'r', encoding='utf-8') as file:
# Kombinierte Stoppliste # Kombinierte Stoppliste
stop_words = stop_words_de.union(stop_words_en) stop_words = stop_words_de.union(stop_words_en)
# Hilfsfunktion: Relevanzstufe aus Keywords extrahieren
def extract_relevance_level(entry_keywords):
for level in RELEVANCE_LEVELS:
if f'promotion:relevanz:{level}' in entry_keywords:
return level
return None
# Funktion zur Berechnung der Stichprobengröße # Funktion zur Berechnung der Stichprobengröße
def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05): def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05):
n_0 = (Z**2 * p * (1 - p)) / (e**2) n_0 = (Z**2 * p * (1 - p)) / (e**2)
@ -175,10 +149,8 @@ def visualize_network(bib_database):
'Buch', 'Buch',
'Buchteil', 'Buchteil',
'Bericht', 'Bericht',
'Konferenz-Paper', 'Konferenz-Paper'
'Studienbrief'
] ]
tags_to_search = set() tags_to_search = set()
for number, type_ in product(numbers, types): for number, type_ in product(numbers, types):
search_term = search_terms[number] search_term = search_terms[number]
@ -314,15 +286,11 @@ def visualize_network(bib_database):
secondary_nodes = [] secondary_nodes = []
tertiary_nodes = [] tertiary_nodes = []
total_fundzahlen = sum(fundzahlen.values())
for node in G.nodes(): for node in G.nodes():
color = G.nodes[node]['color'] color = G.nodes[node]['color']
size = math.log(G.nodes[node].get('size', 10) + 1) * 10 size = math.log(G.nodes[node].get('size', 10) + 1) * 10
x, y = pos[node] x, y = pos[node]
count = fundzahlen.get(node, 0) hovertext = f"{node}<br>Anzahl Funde: {fundzahlen.get(node, 0)}"
percentage = (count / total_fundzahlen * 100) if total_fundzahlen else 0
hovertext = f"{node}<br>Anzahl Funde: {count}<br>Anteil: {percentage:.1f}%"
node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext) node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext)
if color == colors['primaryLine']: if color == colors['primaryLine']:
primary_nodes.append(node_data) primary_nodes.append(node_data)
@ -363,7 +331,7 @@ def visualize_network(bib_database):
fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace]) fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace])
layout = get_standard_layout( layout = get_standard_layout(
title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={total_fundzahlen}, Stand: {current_date})", title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={sum(fundzahlen.values())}, Stand: {current_date})",
x_title="Technologische Dimension", x_title="Technologische Dimension",
y_title="Pädagogische Dimension" y_title="Pädagogische Dimension"
) )
@ -375,7 +343,7 @@ def visualize_network(bib_database):
fig.update_layout(**layout) fig.update_layout(**layout)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_network", export_fig_visualize_network) export_figure_local(fig, "visualize_network", export_fig_visualize_network, bib_filename)
# Einfache Pfadanalyse nach dem Anzeigen der Figur # Einfache Pfadanalyse nach dem Anzeigen der Figur
if 'e-learning' in G and 'online:lernen' in G: if 'e-learning' in G and 'online:lernen' in G:
@ -410,11 +378,10 @@ def visualize_tags(bib_database):
'Buch', 'Buch',
'Buchteil', 'Buchteil',
'Bericht', 'Bericht',
'Konferenz-Paper', 'Konferenz-Paper'
'Studienbrief'
] ]
tags_to_search = set( tags_to_search = set(
f"#{number}:{type_}:{search_terms[number]}".lower() f"#{number}:{type_}:{search_terms[number]}"
for number, type_ in product(numbers, types) for number, type_ in product(numbers, types)
) )
@ -436,46 +403,36 @@ def visualize_tags(bib_database):
tag_counts[tag] += 1 tag_counts[tag] += 1
# Daten für Visualisierung aufbereiten # Daten für Visualisierung aufbereiten
data_rows = [ data = [
{ {'Tag': tag, 'Count': count, 'Type': tag.split(':')[1].lower()}
'Tag': tag,
'Count': count,
'Type': tag.split(':')[1].lower()
}
for tag, count in tag_counts.items() for tag, count in tag_counts.items()
if count > 0 if count > 0
] ]
if not data_rows: if not data:
print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.") print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.")
return return
df = pd.DataFrame(data_rows)
df['TypeLabel'] = df['Type'].str.replace('-', ' ').str.title()
total_count = df['Count'].sum()
df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
# Farbzuordnung # Farbzuordnung
color_map = { color_map = {
'zeitschriftenartikel': colors['primaryLine'], 'zeitschriftenartikel': colors['primaryLine'],
'konferenz-paper': colors['secondaryLine'], 'konferenz-paper': colors['secondaryLine'],
'buch': colors['depthArea'], 'buch': colors['depthArea'],
'buchteil': colors['brightArea'], 'buchteil': colors['brightArea'],
'bericht': colors['accent'], 'bericht': colors['accent']
'studienbrief': colors['positiveHighlight']
} }
# Visualisierung erstellen # Visualisierung erstellen
total_count = sum(tag_counts.values())
fig = px.bar( fig = px.bar(
df, data,
x='Tag', x='Tag',
y='Count', y='Count',
title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})', title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})',
labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'}, labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'},
color='Type', color='Type',
color_discrete_map=color_map, color_discrete_map=color_map,
text_auto=True, text_auto=True
custom_data=['TypeLabel', 'Percentage']
) )
layout = get_standard_layout( layout = get_standard_layout(
@ -491,17 +448,9 @@ def visualize_tags(bib_database):
layout["xaxis"]["automargin"] = True layout["xaxis"]["automargin"] = True
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Typ: %{customdata[0]}<br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[1]:.1f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_tags", export_fig_visualize_tags) export_figure_local(fig, "visualize_tags", export_fig_visualize_tags, bib_filename)
# Visualisierung 3: Häufigkeit Index # Visualisierung 3: Häufigkeit Index
def visualize_index(bib_database): def visualize_index(bib_database):
@ -529,21 +478,11 @@ def visualize_index(bib_database):
index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()] index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()]
index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True) index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True)
index_df = pd.DataFrame(index_data) total_count = sum(index_counts.values())
total_count = index_df['Count'].sum()
index_df['Percentage'] = index_df['Count'] / total_count * 100 if total_count else 0
print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):") print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):")
print(tabulate(index_df.to_dict('records'), headers="keys", tablefmt="grid")) print(tabulate(index_data, headers="keys", tablefmt="grid"))
fig = px.bar( fig = px.bar(index_data, x='Index', y='Count', title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})', labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
index_df,
x='Index',
y='Count',
title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})',
labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'},
text_auto=True,
custom_data=['Percentage']
)
layout = get_standard_layout( layout = get_standard_layout(
title=fig.layout.title.text, title=fig.layout.title.text,
x_title='Index', x_title='Index',
@ -558,15 +497,8 @@ def visualize_index(bib_database):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(marker=plot_styles['balken_primaryLine']) fig.update_traces(marker=plot_styles['balken_primaryLine'])
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_index", export_fig_visualize_index) export_figure_local(fig, "visualize_index", export_fig_visualize_index, bib_filename)
# Visualisierung 4: Häufigkeit Forschungsunterfragen # Visualisierung 4: Häufigkeit Forschungsunterfragen
def visualize_research_questions(bib_database): def visualize_research_questions(bib_database):
@ -593,22 +525,13 @@ def visualize_research_questions(bib_database):
rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()] rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()]
rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True) rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True)
rq_data_df = pd.DataFrame(rq_data, columns=['Research_Question', 'Count']) rq_data_df = pd.DataFrame(rq_data)
total_count = rq_data_df['Count'].sum() total_count = rq_data_df['Count'].sum()
rq_data_df['Percentage'] = rq_data_df['Count'] / total_count * 100 if total_count else 0
print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):") print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):")
print(tabulate(rq_data, headers="keys", tablefmt="grid")) print(tabulate(rq_data, headers="keys", tablefmt="grid"))
fig = px.bar( fig = px.bar(rq_data_df, x='Research_Question', y='Count', title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})', labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
rq_data_df,
x='Research_Question',
y='Count',
title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})',
labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'},
text_auto=True,
custom_data=['Percentage']
)
layout = get_standard_layout( layout = get_standard_layout(
title=fig.layout.title.text, title=fig.layout.title.text,
x_title='Forschungsunterfrage', x_title='Forschungsunterfrage',
@ -623,15 +546,8 @@ def visualize_research_questions(bib_database):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(marker=plot_styles['balken_primaryLine']) fig.update_traces(marker=plot_styles['balken_primaryLine'])
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions) export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions, bib_filename)
# Visualisierung 5: Häufigkeit spezifischer Kategorien # Visualisierung 5: Häufigkeit spezifischer Kategorien
def visualize_categories(bib_database): def visualize_categories(bib_database):
@ -653,22 +569,13 @@ def visualize_categories(bib_database):
cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()] cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()]
cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True) cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True)
cat_data_df = pd.DataFrame(cat_data, columns=['Category', 'Count']) cat_data_df = pd.DataFrame(cat_data)
total_count = cat_data_df['Count'].sum() total_count = cat_data_df['Count'].sum()
cat_data_df['Percentage'] = cat_data_df['Count'] / total_count * 100 if total_count else 0
print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):") print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):")
print(tabulate(cat_data, headers="keys", tablefmt="grid")) print(tabulate(cat_data, headers="keys", tablefmt="grid"))
fig = px.bar( fig = px.bar(cat_data_df, x='Category', y='Count', title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})', labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
cat_data_df,
x='Category',
y='Count',
title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})',
labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'},
text_auto=True,
custom_data=['Percentage']
)
layout = get_standard_layout( layout = get_standard_layout(
title=fig.layout.title.text, title=fig.layout.title.text,
x_title='Kategorie', x_title='Kategorie',
@ -683,179 +590,8 @@ def visualize_categories(bib_database):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(marker=plot_styles['balken_primaryLine']) fig.update_traces(marker=plot_styles['balken_primaryLine'])
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_categories", export_fig_visualize_categories) export_figure_local(fig, "visualize_categories", export_fig_visualize_categories, bib_filename)
# Relevanz-Auswertungen
def build_relevance_distribution(bib_database, tag_to_label):
records = []
for entry in bib_database.entries:
keywords_raw = entry.get('keywords', '')
if not keywords_raw:
continue
entry_keywords = set(map(str.lower, map(str.strip, keywords_raw.replace('\\#', '#').split(','))))
relevance_level = extract_relevance_level(entry_keywords)
if relevance_level is None:
continue
for tag, label in tag_to_label.items():
if tag in entry_keywords:
records.append({
'Kategorie': label,
'Relevanzstufe': RELEVANCE_LEVEL_LABELS[relevance_level]
})
if not records:
return pd.DataFrame()
df = pd.DataFrame(records)
df = (
df.groupby(['Kategorie', 'Relevanzstufe'])
.size()
.reset_index(name='Count')
)
df['Relevanzstufe'] = pd.Categorical(
df['Relevanzstufe'],
categories=[RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS],
ordered=True
)
return df.sort_values(['Kategorie', 'Relevanzstufe'])
def plot_relevance_distribution(df, title, x_title, export_flag, filename):
if df.empty:
print(f"⚠️ Keine Relevanzdaten verfügbar für: {title}")
return
total_count = df['Count'].sum()
df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
fig = px.bar(
df,
x='Kategorie',
y='Count',
color='Relevanzstufe',
color_discrete_map=RELEVANCE_COLOR_MAP,
category_orders={'Relevanzstufe': [RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS]},
title=f"{title} (n={total_count}, Stand: {current_date})",
labels={'Kategorie': x_title, 'Count': 'Anzahl', 'Relevanzstufe': 'Relevanzstufe'},
custom_data=['Relevanzstufe', 'Percentage']
)
layout = get_standard_layout(
title=fig.layout.title.text,
x_title=x_title,
y_title='Anzahl'
)
layout['barmode'] = 'stack'
layout['font'] = {"size": 14, "color": colors['text']}
layout['title'] = {"font": {"size": 16}}
layout['margin'] = dict(b=160, t=60, l=40, r=40)
layout['xaxis'] = layout.get('xaxis', {})
layout['xaxis']['tickangle'] = -45
layout['xaxis']['automargin'] = True
layout['autosize'] = True
fig.update_layout(**layout)
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Relevanzstufe: %{customdata[0]}<br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[1]:.1f}%<extra></extra>"
)
)
fig.show(config={"responsive": True})
export_figure_local(fig, filename, export_flag)
def visualize_relevance_vs_research_questions(bib_database):
research_questions = {
'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)',
'promotion:fu2a': 'Effekt für Lernende (FU2a)',
'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)',
'promotion:fu3': 'Konzeption und Merkmale (FU3)',
'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)',
'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)',
'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)',
'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)',
'promotion:fu7': 'Inputs und Strategien (FU7)'
}
tag_to_label = {key.lower(): value for key, value in research_questions.items()}
df = build_relevance_distribution(bib_database, tag_to_label)
plot_relevance_distribution(
df,
"Relevanzverteilung nach Forschungsunterfragen",
"Forschungsunterfragen",
export_fig_visualize_relevance_fu,
"visualize_relevance_fu"
)
def visualize_relevance_vs_categories(bib_database):
categories = {
'promotion:argumentation': 'Argumentation',
'promotion:kerngedanke': 'Kerngedanke',
'promotion:weiterführung': 'Weiterführung',
'promotion:schlussfolgerung': 'Schlussfolgerung'
}
tag_to_label = {key.lower(): value for key, value in categories.items()}
df = build_relevance_distribution(bib_database, tag_to_label)
plot_relevance_distribution(
df,
"Relevanzverteilung nach Kategorien",
"Kategorien",
export_fig_visualize_relevance_categories,
"visualize_relevance_categories"
)
def visualize_relevance_vs_search_terms(bib_database):
search_terms = {
'0': 'digital:learning',
'1': 'learning:management:system',
'2': 'online:lernplattform',
'3': 'online:lernumgebung',
'4': 'mooc',
'5': 'e-learning',
'6': 'bildung:technologie',
'7': 'digital:medien',
'8': 'blended:learning',
'9': 'digital:lernen',
'a': 'online:lernen',
'b': 'online:learning'
}
types = [
'Zeitschriftenartikel',
'Buch',
'Buchteil',
'Bericht',
'Konferenz-Paper',
'Studienbrief'
]
tag_to_label = {}
for number, term in search_terms.items():
for type_ in types:
tag = f'#{number}:{type_}:{term}'.lower()
tag_to_label[tag] = f"#{number}:{term}"
df = build_relevance_distribution(bib_database, tag_to_label)
plot_relevance_distribution(
df,
"Relevanzverteilung nach Suchbegriffen",
"Suchbegriffe",
export_fig_visualize_relevance_search_terms,
"visualize_relevance_search_terms"
)
# Zeitreihenanalyse der Veröffentlichungen # Zeitreihenanalyse der Veröffentlichungen
def visualize_time_series(bib_database): def visualize_time_series(bib_database):
@ -878,16 +614,13 @@ def visualize_time_series(bib_database):
if publication_years: if publication_years:
year_counts = Counter(publication_years) year_counts = Counter(publication_years)
df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year') df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year')
total_publications = df['Count'].sum()
df['Percentage'] = df['Count'] / total_publications * 100 if total_publications else 0
fig = px.line( fig = px.line(
df, df,
x='Year', x='Year',
y='Count', y='Count',
title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})', title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})',
labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'}, labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'}
custom_data=['Percentage']
) )
layout = get_standard_layout( layout = get_standard_layout(
title=fig.layout.title.text, title=fig.layout.title.text,
@ -904,15 +637,8 @@ def visualize_time_series(bib_database):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(line=plot_styles['linie_primaryLine']) fig.update_traces(line=plot_styles['linie_primaryLine'])
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series) export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series, bib_filename)
else: else:
print("Keine gültigen Veröffentlichungsjahre gefunden.") print("Keine gültigen Veröffentlichungsjahre gefunden.")
@ -929,18 +655,8 @@ def visualize_top_authors(bib_database):
top_authors = Counter(author_counts).most_common(top_n) top_authors = Counter(author_counts).most_common(top_n)
if top_authors: if top_authors:
df = pd.DataFrame(top_authors, columns=['Author', 'Count']) df = pd.DataFrame(top_authors, columns=['Author', 'Count'])
overall_total = sum(author_counts.values())
df['Percentage'] = df['Count'] / overall_total * 100 if overall_total else 0
fig = px.bar( fig = px.bar(df, x='Author', y='Count', title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={sum(author_counts.values())}, Stand: {current_date})', labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'}, text_auto=True)
df,
x='Author',
y='Count',
title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={overall_total}, Stand: {current_date})',
labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'},
text_auto=True,
custom_data=['Percentage']
)
layout = get_standard_layout( layout = get_standard_layout(
title=fig.layout.title.text, title=fig.layout.title.text,
x_title='Autor', x_title='Autor',
@ -955,18 +671,55 @@ def visualize_top_authors(bib_database):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(marker=plot_styles['balken_primaryLine']) fig.update_traces(marker=plot_styles['balken_primaryLine'])
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors) export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors, bib_filename)
else: else:
print("Keine Autoren gefunden.") print("Keine Autoren gefunden.")
# Top Titel nach Anzahl der Werke
def normalize_title(title):
# Entfernen von Sonderzeichen und Standardisierung auf Kleinbuchstaben
title = title.lower().translate(str.maketrans('', '', ",.!?\"'()[]{}:;"))
# Zusammenführen ähnlicher Titel, die sich nur in geringfügigen Details unterscheiden
title = " ".join(title.split())
# Entfernen häufiger Füllwörter oder Standardphrasen, die die Unterscheidung nicht unterstützen
common_phrases = ['eine studie', 'untersuchung der', 'analyse von']
for phrase in common_phrases:
title = title.replace(phrase, '')
return title.strip()
def visualize_top_publications(bib_database):
top_n = 25 # Anzahl der Top-Publikationen, die angezeigt werden sollen
publication_counts = defaultdict(int)
for entry in bib_database.entries:
if 'title' in entry:
title = normalize_title(entry['title'])
publication_counts[title] += 1
top_publications = sorted(publication_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]
publication_data = [{'Title': title[:50] + '...' if len(title) > 50 else title, 'Count': count} for title, count in top_publications]
df = pd.DataFrame(publication_data)
fig = px.bar(df, x='Title', y='Count', title=f'Häufig zitierte Publikationen in der Analyse (Top {top_n}, n={sum(publication_counts.values())}, Stand: {current_date})', labels={'Title': 'Titel', 'Count': 'Anzahl der Nennungen'})
layout = get_standard_layout(
title=fig.layout.title.text,
x_title='Titel',
y_title='Anzahl der Nennungen'
)
layout["font"] = {"size": 14, "color": colors['text']}
layout["title"] = {"font": {"size": 16}}
layout["margin"] = dict(b=160, t=60, l=40, r=40)
layout["xaxis"] = layout.get("xaxis", {})
layout["xaxis"]["tickangle"] = -45
layout["xaxis"]["automargin"] = True
layout["autosize"] = True
fig.update_layout(**layout)
fig.update_traces(marker=plot_styles['balken_primaryLine'])
fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_top_publications", export_fig_visualize_top_publications, bib_filename)
########## ##########
# Daten vorbereiten # Daten vorbereiten
@ -1008,8 +761,7 @@ def prepare_path_data(bib_database):
'Buch', 'Buch',
'Buchteil', 'Buchteil',
'Bericht', 'Bericht',
'Konferenz-Paper', 'Konferenz-Paper'
'Studienbrief'
] ]
data = [] data = []
@ -1048,14 +800,12 @@ def create_path_diagram(data):
sources = [] sources = []
targets = [] targets = []
values = [] values = []
node_counts = Counter()
color_map = { color_map = {
'zeitschriftenartikel': colors['primaryLine'], 'zeitschriftenartikel': colors['primaryLine'],
'konferenz-paper': colors['secondaryLine'], 'konferenz-paper': colors['secondaryLine'],
'buch': colors['depthArea'], 'buch': colors['depthArea'],
'buchteil': colors['brightArea'], 'buchteil': colors['brightArea'],
'bericht': colors['accent'], 'bericht': colors['accent']
'studienbrief': colors['positiveHighlight']
} }
def add_to_labels(label): def add_to_labels(label):
@ -1072,19 +822,8 @@ def create_path_diagram(data):
sources.extend([fu_idx, category_idx, index_idx]) sources.extend([fu_idx, category_idx, index_idx])
targets.extend([category_idx, index_idx, type_idx]) targets.extend([category_idx, index_idx, type_idx])
values.extend([1, 1, 1]) values.extend([1, 1, 1])
node_counts.update([entry['FU'], entry['Category'], entry['Index'], entry['Type']])
node_colors = [color_map.get(label, colors['primaryLine']) for label in labels] node_colors = [color_map.get(label, colors['primaryLine']) for label in labels]
total_paths = len(data)
total_flows = sum(values)
node_percentages = [
node_counts.get(label, 0) / total_paths * 100 if total_paths else 0
for label in labels
]
link_percentages = [
value / total_flows * 100 if total_flows else 0
for value in values
]
fig = go.Figure(data=[go.Sankey( fig = go.Figure(data=[go.Sankey(
node=dict( node=dict(
@ -1092,24 +831,12 @@ def create_path_diagram(data):
thickness=20, thickness=20,
line=dict(color="black", width=0.5), line=dict(color="black", width=0.5),
label=labels, label=labels,
color=node_colors, color=node_colors
customdata=node_percentages,
hovertemplate=(
"%{label}<br>"
"Anzahl: %{value}<br>"
"Anteil der Pfade: %{customdata:.1f}%<extra></extra>"
)
), ),
link=dict( link=dict(
source=sources, source=sources,
target=targets, target=targets,
value=values, value=values
customdata=link_percentages,
hovertemplate=(
"%{source.label}%{target.label}<br>"
"Anzahl: %{value}<br>"
"Anteil der Verbindungen: %{customdata:.1f}%<extra></extra>"
)
) )
)]) )])
layout = get_standard_layout( layout = get_standard_layout(
@ -1123,7 +850,7 @@ def create_path_diagram(data):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram) export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram, bib_filename)
############# #############
@ -1227,54 +954,22 @@ def create_sankey_diagram(bib_database):
colors['positiveHighlight'] # Ausgewählte Quellen colors['positiveHighlight'] # Ausgewählte Quellen
] ]
node_values = [
initial_sources,
screened_sources,
quality_sources,
relevance_sources,
thematic_sources,
recent_sources,
classic_sources,
selected_sources
]
node_percentages = [
value / initial_sources * 100 if initial_sources else 0
for value in node_values
]
link_percentages = [
value / initial_sources * 100 if initial_sources else 0
for value in values
]
# Sankey-Diagramm erstellen # Sankey-Diagramm erstellen
node_config = { node_config = {
**plot_styles["sankey_node"], **plot_styles["sankey_node"],
"label": node_labels, "label": node_labels,
"color": node_colors, "color": node_colors
"customdata": node_percentages,
"hovertemplate": (
"%{label}<br>"
"Anzahl: %{value}<br>"
"Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
)
} }
# Remove any invalid 'font' key if present # Remove any invalid 'font' key if present
node_config.pop("font", None) node_config.pop("font", None)
link_config = {
**plot_styles["sankey_link"],
"source": sources,
"target": targets,
"value": values,
"customdata": link_percentages,
"hovertemplate": (
"%{source.label}%{target.label}<br>"
"Anzahl: %{value}<br>"
"Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
)
}
fig = go.Figure(go.Sankey( fig = go.Figure(go.Sankey(
node=node_config, node=node_config,
link=link_config link=dict(
**plot_styles["sankey_link"],
source=sources,
target=targets,
value=values
)
)) ))
# Layout anpassen # Layout anpassen
layout = get_standard_layout( layout = get_standard_layout(
@ -1288,7 +983,7 @@ def create_sankey_diagram(bib_database):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram) export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram, bib_filename)
########## ##########
@ -1306,33 +1001,31 @@ def visualize_sources_status(bib_database):
""" """
Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner. Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner.
""" """
search_terms = {
'0': 'digital:learning',
'1': 'learning:management:system',
'2': 'online:lernplattform',
'3': 'online:lernumgebung',
'4': 'mooc',
'5': 'e-learning',
'6': 'bildung:technologie',
'7': 'digital:medien',
'8': 'blended:learning',
'9': 'digital:lernen',
'a': 'online:lernen',
'b': 'online:learning'
}
numbers_order = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b']
type_order = [
'Zeitschriftenartikel',
'Buch',
'Buchteil',
'Bericht',
'Konferenz-Paper',
'Studienbrief'
]
search_folder_tags = [ search_folder_tags = [
f"#{number}:{type_}:{search_terms[number]}".lower() "#1:zeitschriftenartikel:learning:management:system",
for type_ in type_order "#2:zeitschriftenartikel:online:lernplattform",
for number in numbers_order "#3:zeitschriftenartikel:online:lernumgebung",
"#4:zeitschriftenartikel:mooc",
"#5:zeitschriftenartikel:e-learning",
"#6:zeitschriftenartikel:bildung:technologie",
"#7:zeitschriftenartikel:digital:medien",
"#8:zeitschriftenartikel:blended:learning",
"#9:zeitschriftenartikel:digital:lernen",
"#a:zeitschriftenartikel:online:lernen",
"#b:zeitschriftenartikel:online:learning",
"#0:zeitschriftenartikel:digital:learning",
"#1:konferenz-paper:learning:management:system",
"#2:konferenz-paper:online:lernplattform",
"#3:konferenz-paper:online:lernumgebung",
"#4:konferenz-paper:mooc",
"#5:konferenz-paper:e-learning",
"#6:konferenz-paper:bildung:technologie",
"#7:konferenz-paper:digital:medien",
"#8:konferenz-paper:blended:learning",
"#9:konferenz-paper:digital:lernen",
"#a:konferenz-paper:online:lernen",
"#b:konferenz-paper:online:learning",
"#0:konferenz-paper:digital:learning"
] ]
category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"} category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"}
@ -1387,45 +1080,21 @@ def visualize_sources_status(bib_database):
tablefmt='grid' tablefmt='grid'
)) ))
total_identifiziert = sum(counts["Identifiziert"] for counts in source_data.values())
analysiert_percentages = [
value / total_identifiziert * 100 if total_identifiziert else 0
for value in analysiert_values
]
nicht_analysiert_percentages = [
value / total_identifiziert * 100 if total_identifiziert else 0
for value in nicht_analysiert_values
]
fig = go.Figure() fig = go.Figure()
fig.add_trace(go.Bar( fig.add_trace(go.Bar(
x=tags, x=tags,
y=analysiert_values, y=analysiert_values,
name='Analysiert', name='Analysiert',
marker=dict(color=analysiert_colors), marker=dict(color=analysiert_colors)
customdata=analysiert_percentages,
hovertemplate=(
"<b>%{x}</b><br>"
"Status: Analysiert<br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata:.1f}%<extra></extra>"
)
)) ))
fig.add_trace(go.Bar( fig.add_trace(go.Bar(
x=tags, x=tags,
y=nicht_analysiert_values, y=nicht_analysiert_values,
name='Nicht-Analysiert', name='Nicht-Analysiert',
marker=plot_styles['balken_primaryLine'], marker=plot_styles['balken_primaryLine']
customdata=nicht_analysiert_percentages,
hovertemplate=(
"<b>%{x}</b><br>"
"Status: Nicht-Analysiert<br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata:.1f}%<extra></extra>"
)
)) ))
layout = get_standard_layout( layout = get_standard_layout(
title=f'Analyse- und Stichprobenstatus je Suchordner (n={total_identifiziert}, Stand: {current_date})', title=f'Analyse- und Stichprobenstatus je Suchordner (n={sum(counts["Identifiziert"] for counts in source_data.values())}, Stand: {current_date})',
x_title='Suchbegriffsordner', x_title='Suchbegriffsordner',
y_title='Anzahl der Quellen' y_title='Anzahl der Quellen'
) )
@ -1442,7 +1111,7 @@ def visualize_sources_status(bib_database):
layout["autosize"] = True layout["autosize"] = True
fig.update_layout(**layout) fig.update_layout(**layout)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status) export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status, bib_filename)
############# #############
@ -1528,8 +1197,8 @@ def visualize_languages(bib_database):
color='Gruppe', color='Gruppe',
color_discrete_map=color_discrete_map, color_discrete_map=color_discrete_map,
title=f'Sprachverteilung der analysierten Quellen (n={sum(norm_counts.values())}, Stand: {current_date})', title=f'Sprachverteilung der analysierten Quellen (n={sum(norm_counts.values())}, Stand: {current_date})',
barmode="stack", hover_data=["Sprache", "Gruppe", "Anzahl", "Anteil (%)"],
custom_data=['Gruppe', 'Anteil (%)'] barmode="stack"
) )
layout = get_standard_layout( layout = get_standard_layout(
@ -1544,18 +1213,10 @@ def visualize_languages(bib_database):
# Ergänzung: Y-Achse logarithmisch skalieren # Ergänzung: Y-Achse logarithmisch skalieren
layout["yaxis_type"] = "log" layout["yaxis_type"] = "log"
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Sprachgruppe: %{customdata[0]}<br>"
"Anzahl: %{y}<br>"
"Anteil: %{customdata[1]:.2f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
# Tabelle ausgeben # Tabelle ausgeben
print(tabulate(df.sort_values("Anzahl", ascending=False), headers="keys", tablefmt="grid", showindex=False)) print(tabulate(df.sort_values("Anzahl", ascending=False), headers="keys", tablefmt="grid", showindex=False))
export_figure_local(fig, "visualize_languages", export_fig_visualize_languages) export_figure_local(fig, "visualize_languages", export_fig_visualize_languages, bib_filename)
# Visualisierung der Verteilung von ENTRYTYPE innerhalb jeder Sprache # Visualisierung der Verteilung von ENTRYTYPE innerhalb jeder Sprache
def visualize_language_entrytypes(bib_database): def visualize_language_entrytypes(bib_database):
@ -1605,8 +1266,6 @@ def visualize_language_entrytypes(bib_database):
grouped.rename(columns={'ENTRYTYPE': 'Eintragstyp'}, inplace=True) grouped.rename(columns={'ENTRYTYPE': 'Eintragstyp'}, inplace=True)
# Anteil innerhalb Sprache (%) # Anteil innerhalb Sprache (%)
grouped["Anteil innerhalb Sprache (%)"] = grouped.groupby("Sprache")["Anzahl"].transform(lambda x: (x / x.sum() * 100).round(2)) grouped["Anteil innerhalb Sprache (%)"] = grouped.groupby("Sprache")["Anzahl"].transform(lambda x: (x / x.sum() * 100).round(2))
total_entrytypes = grouped['Anzahl'].sum()
grouped["Anteil Gesamt (%)"] = grouped['Anzahl'] / total_entrytypes * 100 if total_entrytypes else 0
# Mapping Eintragstyp zu Typgruppe # Mapping Eintragstyp zu Typgruppe
eintragstyp_gruppen = { eintragstyp_gruppen = {
@ -1643,8 +1302,7 @@ def visualize_language_entrytypes(bib_database):
barmode="group", barmode="group",
title=f'Verteilung der Eintragstypen pro Sprache (n={len(df)}, Stand: {current_date})', title=f'Verteilung der Eintragstypen pro Sprache (n={len(df)}, Stand: {current_date})',
text='Anzahl', text='Anzahl',
labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'}, labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'}
custom_data=['Eintragstyp', 'Typgruppe', 'Anteil Gesamt (%)', 'Anteil innerhalb Sprache (%)']
) )
layout = get_standard_layout( layout = get_standard_layout(
title=fig.layout.title.text, title=fig.layout.title.text,
@ -1658,19 +1316,9 @@ def visualize_language_entrytypes(bib_database):
# Ergänzung: Y-Achse logarithmisch skalieren # Ergänzung: Y-Achse logarithmisch skalieren
layout["yaxis_type"] = "log" layout["yaxis_type"] = "log"
fig.update_layout(**layout) fig.update_layout(**layout)
fig.update_traces(
hovertemplate=(
"<b>%{x}</b><br>"
"Eintragstyp: %{customdata[0]}<br>"
"Typgruppe: %{customdata[1]}<br>"
"Anzahl: %{y}<br>"
"Anteil gesamt: %{customdata[2]:.2f}%<br>"
"Anteil innerhalb Sprache: %{customdata[3]:.2f}%<extra></extra>"
)
)
fig.show(config={"responsive": True}) fig.show(config={"responsive": True})
print(tabulate(grouped.sort_values(["Sprache", "Eintragstyp"]), headers=["Sprache", "Eintragstyp", "Anzahl", "Anteil innerhalb Sprache (%)", "Typgruppe"], tablefmt="grid", showindex=False)) print(tabulate(grouped.sort_values(["Sprache", "Eintragstyp"]), headers=["Sprache", "Eintragstyp", "Anzahl", "Anteil innerhalb Sprache (%)", "Typgruppe"], tablefmt="grid", showindex=False))
export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages) export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages, bib_filename)
############# #############
@ -1713,11 +1361,9 @@ visualize_tags(bib_database)
visualize_index(bib_database) visualize_index(bib_database)
visualize_research_questions(bib_database) visualize_research_questions(bib_database)
visualize_categories(bib_database) visualize_categories(bib_database)
visualize_relevance_vs_research_questions(bib_database)
visualize_relevance_vs_categories(bib_database)
visualize_relevance_vs_search_terms(bib_database)
visualize_time_series(bib_database) visualize_time_series(bib_database)
visualize_top_authors(bib_database) visualize_top_authors(bib_database)
visualize_top_publications(bib_database)
data = prepare_path_data(bib_database) data = prepare_path_data(bib_database)
create_path_diagram(data) create_path_diagram(data)
create_sankey_diagram(bib_database) create_sankey_diagram(bib_database)

File diff suppressed because it is too large Load Diff

View File

@ -218,13 +218,14 @@ layout["legend"] = dict(
itemdoubleclick="toggle" itemdoubleclick="toggle"
) )
layout["yaxis3"] = dict( layout["yaxis3"] = dict(
title=dict(text="Abweichung (ΔSCₙ)", font=dict(color=colors["text"])), title="Abweichung (ΔSCₙ)",
overlaying="y", overlaying="y",
side="right", side="right",
showgrid=False, showgrid=False,
zeroline=True, zeroline=True,
zerolinewidth=2, zerolinewidth=2,
zerolinecolor='grey', zerolinecolor='grey',
titlefont=dict(color=colors["text"]),
tickfont=dict(color=colors["text"]), tickfont=dict(color=colors["text"]),
anchor="free", anchor="free",
position=1.0 position=1.0