Compare commits
11 Commits
32639222c3
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
1abaf478cd
|
|||
|
02ec176582
|
|||
|
127fc78b81
|
|||
|
835f58de3f
|
|||
|
4e7e479304
|
|||
|
7913d842f6
|
|||
|
6f6475b7d0
|
|||
|
1e9b84bae2
|
|||
|
a2e7055e14
|
|||
|
3a69f9d3ed
|
|||
|
f7b41790b8
|
File diff suppressed because it is too large
Load Diff
67354
Bibliothek/cleaned_Literaturverzeichnis.bib
Normal file
67354
Bibliothek/cleaned_Literaturverzeichnis.bib
Normal file
File diff suppressed because it is too large
Load Diff
@ -288,6 +288,7 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
|
|||||||
abs_corr = abs(corr)
|
abs_corr = abs(corr)
|
||||||
significance = 'Signifikant' if p_value < 0.05 else 'Nicht signifikant'
|
significance = 'Signifikant' if p_value < 0.05 else 'Nicht signifikant'
|
||||||
hover_color = colors['brightArea'] if p_value < 0.05 else colors['depthArea']
|
hover_color = colors['brightArea'] if p_value < 0.05 else colors['depthArea']
|
||||||
|
cooccurrence_count = int(((df[x_term] == 1) & (df[y_term] == 1)).sum())
|
||||||
correlations.append({
|
correlations.append({
|
||||||
'x_term': x_term,
|
'x_term': x_term,
|
||||||
'y_term': y_term,
|
'y_term': y_term,
|
||||||
@ -296,6 +297,8 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
|
|||||||
'p_value': p_value,
|
'p_value': p_value,
|
||||||
'significance': significance,
|
'significance': significance,
|
||||||
'hover_color': hover_color,
|
'hover_color': hover_color,
|
||||||
|
'n_observations': int(len(x_valid)),
|
||||||
|
'cooccurrence_count': cooccurrence_count,
|
||||||
'interpretation': (
|
'interpretation': (
|
||||||
f"Die Korrelation zwischen '{x_term}' und '{y_term}' beträgt {corr:.2f}. "
|
f"Die Korrelation zwischen '{x_term}' und '{y_term}' beträgt {corr:.2f}. "
|
||||||
f"p-Wert: {p_value:.3e} ({significance})"
|
f"p-Wert: {p_value:.3e} ({significance})"
|
||||||
@ -361,12 +364,25 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
|
|||||||
line=dict(width=1, color=colors['background'])
|
line=dict(width=1, color=colors['background'])
|
||||||
),
|
),
|
||||||
hovertemplate=(
|
hovertemplate=(
|
||||||
'<b>%{customdata[0]}</b><br>'
|
'<b>%{customdata[0]}</b> ↔ <b>%{customdata[1]}</b><br>'
|
||||||
'Korrelation: %{marker.color:.2f}<br>'
|
'Korrelation: %{marker.color:.2f}<br>'
|
||||||
'p-Wert: %{customdata[1]:.3e}<br>'
|
'p-Wert: %{customdata[3]:.3e}<br>'
|
||||||
'Signifikanz: %{customdata[2]}'
|
'Signifikanz: %{customdata[4]}<br>'
|
||||||
|
'Stichprobe (n): %{customdata[5]}<br>'
|
||||||
|
'Gemeinsame Treffer: %{customdata[6]}<br>'
|
||||||
|
'%{customdata[7]}'
|
||||||
|
'<extra></extra>'
|
||||||
),
|
),
|
||||||
customdata=correlation_df[['x_term', 'p_value', 'significance']].to_numpy()
|
customdata=np.array(list(zip(
|
||||||
|
correlation_df['x_term'],
|
||||||
|
correlation_df['y_term'],
|
||||||
|
correlation_df['correlation'],
|
||||||
|
correlation_df['p_value'],
|
||||||
|
correlation_df['significance'],
|
||||||
|
correlation_df['n_observations'],
|
||||||
|
correlation_df['cooccurrence_count'],
|
||||||
|
correlation_df['interpretation']
|
||||||
|
)), dtype=object)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Standardlayout verwenden und ggf. ergänzen, Margin dynamisch für Responsivität
|
# Standardlayout verwenden und ggf. ergänzen, Margin dynamisch für Responsivität
|
||||||
@ -376,6 +392,7 @@ def visualize_bivariate_correlation(df, x_terms, y_terms, title, x_label, y_labe
|
|||||||
x_title=x_label,
|
x_title=x_label,
|
||||||
y_title=y_label
|
y_title=y_label
|
||||||
),
|
),
|
||||||
|
hovermode='closest',
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
tickangle=-45,
|
tickangle=-45,
|
||||||
automargin=True
|
automargin=True
|
||||||
@ -609,7 +626,6 @@ fig_cluster = px.scatter_3d(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Layout mit Standardlayout und konsistenten CI-konformen Ergänzungen
|
# Layout mit Standardlayout und konsistenten CI-konformen Ergänzungen
|
||||||
layout_cluster = get_standard_layout(
|
layout_cluster = get_standard_layout(
|
||||||
title=plot_title,
|
title=plot_title,
|
||||||
@ -694,6 +710,7 @@ correlation_quality_results = {
|
|||||||
"Forschungsunterfragen & Kategorien": analyze_correlation_quality(df, research_questions_processed, categories_processed),
|
"Forschungsunterfragen & Kategorien": analyze_correlation_quality(df, research_questions_processed, categories_processed),
|
||||||
"Forschungsunterfragen & Suchbegriffe": analyze_correlation_quality(df, research_questions_processed, tags_to_search_processed),
|
"Forschungsunterfragen & Suchbegriffe": analyze_correlation_quality(df, research_questions_processed, tags_to_search_processed),
|
||||||
"Forschungsunterfragen & Indizes": analyze_correlation_quality(df, research_questions_processed, index_terms_processed),
|
"Forschungsunterfragen & Indizes": analyze_correlation_quality(df, research_questions_processed, index_terms_processed),
|
||||||
|
"Forschungsunterfragen & Forschungsunterfragen": analyze_correlation_quality(df, research_questions_processed, research_questions_processed),
|
||||||
"Indizes & Kategorien": analyze_correlation_quality(df, index_terms_processed, categories_processed),
|
"Indizes & Kategorien": analyze_correlation_quality(df, index_terms_processed, categories_processed),
|
||||||
"Indizes & Suchbegriffe": analyze_correlation_quality(df, index_terms_processed, tags_to_search_processed),
|
"Indizes & Suchbegriffe": analyze_correlation_quality(df, index_terms_processed, tags_to_search_processed),
|
||||||
"Suchbegriffe & Kategorien": analyze_correlation_quality(df, tags_to_search_processed, categories_processed),
|
"Suchbegriffe & Kategorien": analyze_correlation_quality(df, tags_to_search_processed, categories_processed),
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
|
|
||||||
from config_netzwerk import theme, export_fig_visual, bib_filename
|
from config_netzwerk import theme, export_fig_visual, bib_filename
|
||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# Clear the terminal
|
# Clear the terminal
|
||||||
@ -60,11 +59,14 @@ from config_netzwerk import (
|
|||||||
export_fig_visualize_sources_status,
|
export_fig_visualize_sources_status,
|
||||||
export_fig_create_wordcloud_from_titles,
|
export_fig_create_wordcloud_from_titles,
|
||||||
export_fig_visualize_languages,
|
export_fig_visualize_languages,
|
||||||
|
export_fig_visualize_relevance_fu,
|
||||||
|
export_fig_visualize_relevance_categories,
|
||||||
|
export_fig_visualize_relevance_search_terms,
|
||||||
)
|
)
|
||||||
|
|
||||||
from config_netzwerk import export_fig_png
|
from config_netzwerk import export_fig_png
|
||||||
|
|
||||||
def export_figure_local(fig, name, flag, bib_filename=None):
|
def export_figure_local(fig, name, flag):
|
||||||
from config_netzwerk import export_path_html, export_path_png
|
from config_netzwerk import export_path_html, export_path_png
|
||||||
# Einmalige Definition von safe_filename am Anfang der Funktion
|
# Einmalige Definition von safe_filename am Anfang der Funktion
|
||||||
safe_filename = prepare_figure_export(fig, name).replace(".html", "")
|
safe_filename = prepare_figure_export(fig, name).replace(".html", "")
|
||||||
@ -100,6 +102,23 @@ word_colors = [
|
|||||||
colors["negativeHighlight"]
|
colors["negativeHighlight"]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Relevanz-Stufen (1 = gering, 5 = sehr hoch)
|
||||||
|
RELEVANCE_LEVELS = [5, 4, 3, 2, 1]
|
||||||
|
RELEVANCE_LEVEL_LABELS = {
|
||||||
|
5: "Relevanz 5",
|
||||||
|
4: "Relevanz 4",
|
||||||
|
3: "Relevanz 3",
|
||||||
|
2: "Relevanz 2",
|
||||||
|
1: "Relevanz 1",
|
||||||
|
}
|
||||||
|
RELEVANCE_COLOR_MAP = {
|
||||||
|
"Relevanz 5": colors['positiveHighlight'],
|
||||||
|
"Relevanz 4": colors['accent'],
|
||||||
|
"Relevanz 3": colors['brightArea'],
|
||||||
|
"Relevanz 2": colors['depthArea'],
|
||||||
|
"Relevanz 1": colors['negativeHighlight'],
|
||||||
|
}
|
||||||
|
|
||||||
# Aktuelles Datum
|
# Aktuelles Datum
|
||||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
current_date = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
@ -120,6 +139,13 @@ with open('en_complete.txt', 'r', encoding='utf-8') as file:
|
|||||||
# Kombinierte Stoppliste
|
# Kombinierte Stoppliste
|
||||||
stop_words = stop_words_de.union(stop_words_en)
|
stop_words = stop_words_de.union(stop_words_en)
|
||||||
|
|
||||||
|
# Hilfsfunktion: Relevanzstufe aus Keywords extrahieren
|
||||||
|
def extract_relevance_level(entry_keywords):
|
||||||
|
for level in RELEVANCE_LEVELS:
|
||||||
|
if f'promotion:relevanz:{level}' in entry_keywords:
|
||||||
|
return level
|
||||||
|
return None
|
||||||
|
|
||||||
# Funktion zur Berechnung der Stichprobengröße
|
# Funktion zur Berechnung der Stichprobengröße
|
||||||
def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05):
|
def calculate_sample_size(N, Z=1.96, p=0.5, e=0.05):
|
||||||
n_0 = (Z**2 * p * (1 - p)) / (e**2)
|
n_0 = (Z**2 * p * (1 - p)) / (e**2)
|
||||||
@ -149,8 +175,10 @@ def visualize_network(bib_database):
|
|||||||
'Buch',
|
'Buch',
|
||||||
'Buchteil',
|
'Buchteil',
|
||||||
'Bericht',
|
'Bericht',
|
||||||
'Konferenz-Paper'
|
'Konferenz-Paper',
|
||||||
|
'Studienbrief'
|
||||||
]
|
]
|
||||||
|
|
||||||
tags_to_search = set()
|
tags_to_search = set()
|
||||||
for number, type_ in product(numbers, types):
|
for number, type_ in product(numbers, types):
|
||||||
search_term = search_terms[number]
|
search_term = search_terms[number]
|
||||||
@ -286,11 +314,15 @@ def visualize_network(bib_database):
|
|||||||
secondary_nodes = []
|
secondary_nodes = []
|
||||||
tertiary_nodes = []
|
tertiary_nodes = []
|
||||||
|
|
||||||
|
total_fundzahlen = sum(fundzahlen.values())
|
||||||
|
|
||||||
for node in G.nodes():
|
for node in G.nodes():
|
||||||
color = G.nodes[node]['color']
|
color = G.nodes[node]['color']
|
||||||
size = math.log(G.nodes[node].get('size', 10) + 1) * 10
|
size = math.log(G.nodes[node].get('size', 10) + 1) * 10
|
||||||
x, y = pos[node]
|
x, y = pos[node]
|
||||||
hovertext = f"{node}<br>Anzahl Funde: {fundzahlen.get(node, 0)}"
|
count = fundzahlen.get(node, 0)
|
||||||
|
percentage = (count / total_fundzahlen * 100) if total_fundzahlen else 0
|
||||||
|
hovertext = f"{node}<br>Anzahl Funde: {count}<br>Anteil: {percentage:.1f}%"
|
||||||
node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext)
|
node_data = dict(x=x, y=y, text=node, size=size, hovertext=hovertext)
|
||||||
if color == colors['primaryLine']:
|
if color == colors['primaryLine']:
|
||||||
primary_nodes.append(node_data)
|
primary_nodes.append(node_data)
|
||||||
@ -331,7 +363,7 @@ def visualize_network(bib_database):
|
|||||||
|
|
||||||
fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace])
|
fig = go.Figure(data=[edge_trace, primary_trace, secondary_trace, tertiary_trace])
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={sum(fundzahlen.values())}, Stand: {current_date})",
|
title=f"Suchbegriff-Netzwerk nach Relevanz und Semantik (n={total_fundzahlen}, Stand: {current_date})",
|
||||||
x_title="Technologische Dimension",
|
x_title="Technologische Dimension",
|
||||||
y_title="Pädagogische Dimension"
|
y_title="Pädagogische Dimension"
|
||||||
)
|
)
|
||||||
@ -343,7 +375,7 @@ def visualize_network(bib_database):
|
|||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
|
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_network", export_fig_visualize_network, bib_filename)
|
export_figure_local(fig, "visualize_network", export_fig_visualize_network)
|
||||||
|
|
||||||
# Einfache Pfadanalyse nach dem Anzeigen der Figur
|
# Einfache Pfadanalyse nach dem Anzeigen der Figur
|
||||||
if 'e-learning' in G and 'online:lernen' in G:
|
if 'e-learning' in G and 'online:lernen' in G:
|
||||||
@ -378,10 +410,11 @@ def visualize_tags(bib_database):
|
|||||||
'Buch',
|
'Buch',
|
||||||
'Buchteil',
|
'Buchteil',
|
||||||
'Bericht',
|
'Bericht',
|
||||||
'Konferenz-Paper'
|
'Konferenz-Paper',
|
||||||
|
'Studienbrief'
|
||||||
]
|
]
|
||||||
tags_to_search = set(
|
tags_to_search = set(
|
||||||
f"#{number}:{type_}:{search_terms[number]}"
|
f"#{number}:{type_}:{search_terms[number]}".lower()
|
||||||
for number, type_ in product(numbers, types)
|
for number, type_ in product(numbers, types)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -403,36 +436,46 @@ def visualize_tags(bib_database):
|
|||||||
tag_counts[tag] += 1
|
tag_counts[tag] += 1
|
||||||
|
|
||||||
# Daten für Visualisierung aufbereiten
|
# Daten für Visualisierung aufbereiten
|
||||||
data = [
|
data_rows = [
|
||||||
{'Tag': tag, 'Count': count, 'Type': tag.split(':')[1].lower()}
|
{
|
||||||
|
'Tag': tag,
|
||||||
|
'Count': count,
|
||||||
|
'Type': tag.split(':')[1].lower()
|
||||||
|
}
|
||||||
for tag, count in tag_counts.items()
|
for tag, count in tag_counts.items()
|
||||||
if count > 0
|
if count > 0
|
||||||
]
|
]
|
||||||
|
|
||||||
if not data:
|
if not data_rows:
|
||||||
print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.")
|
print("Warnung: Keine Tags gefunden, die den Suchkriterien entsprechen.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
df = pd.DataFrame(data_rows)
|
||||||
|
df['TypeLabel'] = df['Type'].str.replace('-', ' ').str.title()
|
||||||
|
total_count = df['Count'].sum()
|
||||||
|
df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
|
||||||
|
|
||||||
# Farbzuordnung
|
# Farbzuordnung
|
||||||
color_map = {
|
color_map = {
|
||||||
'zeitschriftenartikel': colors['primaryLine'],
|
'zeitschriftenartikel': colors['primaryLine'],
|
||||||
'konferenz-paper': colors['secondaryLine'],
|
'konferenz-paper': colors['secondaryLine'],
|
||||||
'buch': colors['depthArea'],
|
'buch': colors['depthArea'],
|
||||||
'buchteil': colors['brightArea'],
|
'buchteil': colors['brightArea'],
|
||||||
'bericht': colors['accent']
|
'bericht': colors['accent'],
|
||||||
|
'studienbrief': colors['positiveHighlight']
|
||||||
}
|
}
|
||||||
|
|
||||||
# Visualisierung erstellen
|
# Visualisierung erstellen
|
||||||
total_count = sum(tag_counts.values())
|
|
||||||
fig = px.bar(
|
fig = px.bar(
|
||||||
data,
|
df,
|
||||||
x='Tag',
|
x='Tag',
|
||||||
y='Count',
|
y='Count',
|
||||||
title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})',
|
title=f'Häufigkeit der Suchbegriffe in der Literaturanalyse (n={total_count}, Stand: {current_date})',
|
||||||
labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'},
|
labels={'Tag': 'Tag', 'Count': 'Anzahl der Vorkommen'},
|
||||||
color='Type',
|
color='Type',
|
||||||
color_discrete_map=color_map,
|
color_discrete_map=color_map,
|
||||||
text_auto=True
|
text_auto=True,
|
||||||
|
custom_data=['TypeLabel', 'Percentage']
|
||||||
)
|
)
|
||||||
|
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
@ -448,9 +491,17 @@ def visualize_tags(bib_database):
|
|||||||
layout["xaxis"]["automargin"] = True
|
layout["xaxis"]["automargin"] = True
|
||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Typ: %{customdata[0]}<br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[1]:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_tags", export_fig_visualize_tags, bib_filename)
|
export_figure_local(fig, "visualize_tags", export_fig_visualize_tags)
|
||||||
|
|
||||||
# Visualisierung 3: Häufigkeit Index
|
# Visualisierung 3: Häufigkeit Index
|
||||||
def visualize_index(bib_database):
|
def visualize_index(bib_database):
|
||||||
@ -478,11 +529,21 @@ def visualize_index(bib_database):
|
|||||||
index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()]
|
index_data = [{'Index': index, 'Count': count} for index, count in index_counts.items()]
|
||||||
index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True)
|
index_data = sorted(index_data, key=lambda x: x['Count'], reverse=True)
|
||||||
|
|
||||||
total_count = sum(index_counts.values())
|
index_df = pd.DataFrame(index_data)
|
||||||
|
total_count = index_df['Count'].sum()
|
||||||
|
index_df['Percentage'] = index_df['Count'] / total_count * 100 if total_count else 0
|
||||||
print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):")
|
print(f"Häufigkeit Indizes (Gesamtanzahl: {total_count}):")
|
||||||
print(tabulate(index_data, headers="keys", tablefmt="grid"))
|
print(tabulate(index_df.to_dict('records'), headers="keys", tablefmt="grid"))
|
||||||
|
|
||||||
fig = px.bar(index_data, x='Index', y='Count', title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})', labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
|
fig = px.bar(
|
||||||
|
index_df,
|
||||||
|
x='Index',
|
||||||
|
y='Count',
|
||||||
|
title=f'Relevanzschlüssel nach Indexkategorien (n={total_count}, Stand: {current_date})',
|
||||||
|
labels={'Index': 'Index', 'Count': 'Anzahl der Vorkommen'},
|
||||||
|
text_auto=True,
|
||||||
|
custom_data=['Percentage']
|
||||||
|
)
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=fig.layout.title.text,
|
title=fig.layout.title.text,
|
||||||
x_title='Index',
|
x_title='Index',
|
||||||
@ -497,8 +558,15 @@ def visualize_index(bib_database):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_index", export_fig_visualize_index, bib_filename)
|
export_figure_local(fig, "visualize_index", export_fig_visualize_index)
|
||||||
|
|
||||||
# Visualisierung 4: Häufigkeit Forschungsunterfragen
|
# Visualisierung 4: Häufigkeit Forschungsunterfragen
|
||||||
def visualize_research_questions(bib_database):
|
def visualize_research_questions(bib_database):
|
||||||
@ -525,13 +593,22 @@ def visualize_research_questions(bib_database):
|
|||||||
rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()]
|
rq_data = [{'Research_Question': research_questions[keyword], 'Count': count} for keyword, count in rq_counts.items()]
|
||||||
rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True)
|
rq_data = sorted(rq_data, key=lambda x: x['Count'], reverse=True)
|
||||||
|
|
||||||
rq_data_df = pd.DataFrame(rq_data)
|
rq_data_df = pd.DataFrame(rq_data, columns=['Research_Question', 'Count'])
|
||||||
|
|
||||||
total_count = rq_data_df['Count'].sum()
|
total_count = rq_data_df['Count'].sum()
|
||||||
|
rq_data_df['Percentage'] = rq_data_df['Count'] / total_count * 100 if total_count else 0
|
||||||
print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):")
|
print(f"Häufigkeit Forschungsunterfragen (Gesamtanzahl: {total_count}):")
|
||||||
print(tabulate(rq_data, headers="keys", tablefmt="grid"))
|
print(tabulate(rq_data, headers="keys", tablefmt="grid"))
|
||||||
|
|
||||||
fig = px.bar(rq_data_df, x='Research_Question', y='Count', title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})', labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
|
fig = px.bar(
|
||||||
|
rq_data_df,
|
||||||
|
x='Research_Question',
|
||||||
|
y='Count',
|
||||||
|
title=f'Zuordnung der Literatur zu Forschungsunterfragen (n={total_count}, Stand: {current_date})',
|
||||||
|
labels={'Research_Question': 'Forschungsunterfrage', 'Count': 'Anzahl der Vorkommen'},
|
||||||
|
text_auto=True,
|
||||||
|
custom_data=['Percentage']
|
||||||
|
)
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=fig.layout.title.text,
|
title=fig.layout.title.text,
|
||||||
x_title='Forschungsunterfrage',
|
x_title='Forschungsunterfrage',
|
||||||
@ -546,8 +623,15 @@ def visualize_research_questions(bib_database):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions, bib_filename)
|
export_figure_local(fig, "visualize_research_questions", export_fig_visualize_research_questions)
|
||||||
|
|
||||||
# Visualisierung 5: Häufigkeit spezifischer Kategorien
|
# Visualisierung 5: Häufigkeit spezifischer Kategorien
|
||||||
def visualize_categories(bib_database):
|
def visualize_categories(bib_database):
|
||||||
@ -569,13 +653,22 @@ def visualize_categories(bib_database):
|
|||||||
cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()]
|
cat_data = [{'Category': categories[keyword], 'Count': count} for keyword, count in cat_counts.items()]
|
||||||
cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True)
|
cat_data = sorted(cat_data, key=lambda x: x['Count'], reverse=True)
|
||||||
|
|
||||||
cat_data_df = pd.DataFrame(cat_data)
|
cat_data_df = pd.DataFrame(cat_data, columns=['Category', 'Count'])
|
||||||
|
|
||||||
total_count = cat_data_df['Count'].sum()
|
total_count = cat_data_df['Count'].sum()
|
||||||
|
cat_data_df['Percentage'] = cat_data_df['Count'] / total_count * 100 if total_count else 0
|
||||||
print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):")
|
print(f"Häufigkeit Kategorien (Gesamtanzahl: {total_count}):")
|
||||||
print(tabulate(cat_data, headers="keys", tablefmt="grid"))
|
print(tabulate(cat_data, headers="keys", tablefmt="grid"))
|
||||||
|
|
||||||
fig = px.bar(cat_data_df, x='Category', y='Count', title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})', labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'}, text_auto=True)
|
fig = px.bar(
|
||||||
|
cat_data_df,
|
||||||
|
x='Category',
|
||||||
|
y='Count',
|
||||||
|
title=f'Textsortenzuordnung der analysierten Quellen (n={total_count}, Stand: {current_date})',
|
||||||
|
labels={'Category': 'Kategorie', 'Count': 'Anzahl der Vorkommen'},
|
||||||
|
text_auto=True,
|
||||||
|
custom_data=['Percentage']
|
||||||
|
)
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=fig.layout.title.text,
|
title=fig.layout.title.text,
|
||||||
x_title='Kategorie',
|
x_title='Kategorie',
|
||||||
@ -590,8 +683,179 @@ def visualize_categories(bib_database):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_categories", export_fig_visualize_categories, bib_filename)
|
export_figure_local(fig, "visualize_categories", export_fig_visualize_categories)
|
||||||
|
|
||||||
|
# Relevanz-Auswertungen
|
||||||
|
def build_relevance_distribution(bib_database, tag_to_label):
|
||||||
|
records = []
|
||||||
|
|
||||||
|
for entry in bib_database.entries:
|
||||||
|
keywords_raw = entry.get('keywords', '')
|
||||||
|
if not keywords_raw:
|
||||||
|
continue
|
||||||
|
|
||||||
|
entry_keywords = set(map(str.lower, map(str.strip, keywords_raw.replace('\\#', '#').split(','))))
|
||||||
|
relevance_level = extract_relevance_level(entry_keywords)
|
||||||
|
if relevance_level is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for tag, label in tag_to_label.items():
|
||||||
|
if tag in entry_keywords:
|
||||||
|
records.append({
|
||||||
|
'Kategorie': label,
|
||||||
|
'Relevanzstufe': RELEVANCE_LEVEL_LABELS[relevance_level]
|
||||||
|
})
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
df = pd.DataFrame(records)
|
||||||
|
df = (
|
||||||
|
df.groupby(['Kategorie', 'Relevanzstufe'])
|
||||||
|
.size()
|
||||||
|
.reset_index(name='Count')
|
||||||
|
)
|
||||||
|
df['Relevanzstufe'] = pd.Categorical(
|
||||||
|
df['Relevanzstufe'],
|
||||||
|
categories=[RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS],
|
||||||
|
ordered=True
|
||||||
|
)
|
||||||
|
return df.sort_values(['Kategorie', 'Relevanzstufe'])
|
||||||
|
|
||||||
|
|
||||||
|
def plot_relevance_distribution(df, title, x_title, export_flag, filename):
|
||||||
|
if df.empty:
|
||||||
|
print(f"⚠️ Keine Relevanzdaten verfügbar für: {title}")
|
||||||
|
return
|
||||||
|
|
||||||
|
total_count = df['Count'].sum()
|
||||||
|
df['Percentage'] = df['Count'] / total_count * 100 if total_count else 0
|
||||||
|
fig = px.bar(
|
||||||
|
df,
|
||||||
|
x='Kategorie',
|
||||||
|
y='Count',
|
||||||
|
color='Relevanzstufe',
|
||||||
|
color_discrete_map=RELEVANCE_COLOR_MAP,
|
||||||
|
category_orders={'Relevanzstufe': [RELEVANCE_LEVEL_LABELS[level] for level in RELEVANCE_LEVELS]},
|
||||||
|
title=f"{title} (n={total_count}, Stand: {current_date})",
|
||||||
|
labels={'Kategorie': x_title, 'Count': 'Anzahl', 'Relevanzstufe': 'Relevanzstufe'},
|
||||||
|
custom_data=['Relevanzstufe', 'Percentage']
|
||||||
|
)
|
||||||
|
|
||||||
|
layout = get_standard_layout(
|
||||||
|
title=fig.layout.title.text,
|
||||||
|
x_title=x_title,
|
||||||
|
y_title='Anzahl'
|
||||||
|
)
|
||||||
|
layout['barmode'] = 'stack'
|
||||||
|
layout['font'] = {"size": 14, "color": colors['text']}
|
||||||
|
layout['title'] = {"font": {"size": 16}}
|
||||||
|
layout['margin'] = dict(b=160, t=60, l=40, r=40)
|
||||||
|
layout['xaxis'] = layout.get('xaxis', {})
|
||||||
|
layout['xaxis']['tickangle'] = -45
|
||||||
|
layout['xaxis']['automargin'] = True
|
||||||
|
layout['autosize'] = True
|
||||||
|
fig.update_layout(**layout)
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Relevanzstufe: %{customdata[0]}<br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[1]:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.show(config={"responsive": True})
|
||||||
|
export_figure_local(fig, filename, export_flag)
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_relevance_vs_research_questions(bib_database):
|
||||||
|
research_questions = {
|
||||||
|
'promotion:fu1': 'Akzeptanz und Nützlichkeit (FU1)',
|
||||||
|
'promotion:fu2a': 'Effekt für Lernende (FU2a)',
|
||||||
|
'promotion:fu2b': 'Effekt-Faktoren für Lehrende (FU2b)',
|
||||||
|
'promotion:fu3': 'Konzeption und Merkmale (FU3)',
|
||||||
|
'promotion:fu4a': 'Bildungswissenschaftliche Mechanismen (FU4a)',
|
||||||
|
'promotion:fu4b': 'Technisch-gestalterische Mechanismen (FU4b)',
|
||||||
|
'promotion:fu5': 'Möglichkeiten und Grenzen (FU5)',
|
||||||
|
'promotion:fu6': 'Beurteilung als Kompetenzerwerbssystem (FU6)',
|
||||||
|
'promotion:fu7': 'Inputs und Strategien (FU7)'
|
||||||
|
}
|
||||||
|
tag_to_label = {key.lower(): value for key, value in research_questions.items()}
|
||||||
|
df = build_relevance_distribution(bib_database, tag_to_label)
|
||||||
|
plot_relevance_distribution(
|
||||||
|
df,
|
||||||
|
"Relevanzverteilung nach Forschungsunterfragen",
|
||||||
|
"Forschungsunterfragen",
|
||||||
|
export_fig_visualize_relevance_fu,
|
||||||
|
"visualize_relevance_fu"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_relevance_vs_categories(bib_database):
|
||||||
|
categories = {
|
||||||
|
'promotion:argumentation': 'Argumentation',
|
||||||
|
'promotion:kerngedanke': 'Kerngedanke',
|
||||||
|
'promotion:weiterführung': 'Weiterführung',
|
||||||
|
'promotion:schlussfolgerung': 'Schlussfolgerung'
|
||||||
|
}
|
||||||
|
tag_to_label = {key.lower(): value for key, value in categories.items()}
|
||||||
|
df = build_relevance_distribution(bib_database, tag_to_label)
|
||||||
|
plot_relevance_distribution(
|
||||||
|
df,
|
||||||
|
"Relevanzverteilung nach Kategorien",
|
||||||
|
"Kategorien",
|
||||||
|
export_fig_visualize_relevance_categories,
|
||||||
|
"visualize_relevance_categories"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_relevance_vs_search_terms(bib_database):
|
||||||
|
search_terms = {
|
||||||
|
'0': 'digital:learning',
|
||||||
|
'1': 'learning:management:system',
|
||||||
|
'2': 'online:lernplattform',
|
||||||
|
'3': 'online:lernumgebung',
|
||||||
|
'4': 'mooc',
|
||||||
|
'5': 'e-learning',
|
||||||
|
'6': 'bildung:technologie',
|
||||||
|
'7': 'digital:medien',
|
||||||
|
'8': 'blended:learning',
|
||||||
|
'9': 'digital:lernen',
|
||||||
|
'a': 'online:lernen',
|
||||||
|
'b': 'online:learning'
|
||||||
|
}
|
||||||
|
types = [
|
||||||
|
'Zeitschriftenartikel',
|
||||||
|
'Buch',
|
||||||
|
'Buchteil',
|
||||||
|
'Bericht',
|
||||||
|
'Konferenz-Paper',
|
||||||
|
'Studienbrief'
|
||||||
|
]
|
||||||
|
|
||||||
|
tag_to_label = {}
|
||||||
|
for number, term in search_terms.items():
|
||||||
|
for type_ in types:
|
||||||
|
tag = f'#{number}:{type_}:{term}'.lower()
|
||||||
|
tag_to_label[tag] = f"#{number}:{term}"
|
||||||
|
|
||||||
|
df = build_relevance_distribution(bib_database, tag_to_label)
|
||||||
|
plot_relevance_distribution(
|
||||||
|
df,
|
||||||
|
"Relevanzverteilung nach Suchbegriffen",
|
||||||
|
"Suchbegriffe",
|
||||||
|
export_fig_visualize_relevance_search_terms,
|
||||||
|
"visualize_relevance_search_terms"
|
||||||
|
)
|
||||||
|
|
||||||
# Zeitreihenanalyse der Veröffentlichungen
|
# Zeitreihenanalyse der Veröffentlichungen
|
||||||
def visualize_time_series(bib_database):
|
def visualize_time_series(bib_database):
|
||||||
@ -614,13 +878,16 @@ def visualize_time_series(bib_database):
|
|||||||
if publication_years:
|
if publication_years:
|
||||||
year_counts = Counter(publication_years)
|
year_counts = Counter(publication_years)
|
||||||
df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year')
|
df = pd.DataFrame(year_counts.items(), columns=['Year', 'Count']).sort_values('Year')
|
||||||
|
total_publications = df['Count'].sum()
|
||||||
|
df['Percentage'] = df['Count'] / total_publications * 100 if total_publications else 0
|
||||||
|
|
||||||
fig = px.line(
|
fig = px.line(
|
||||||
df,
|
df,
|
||||||
x='Year',
|
x='Year',
|
||||||
y='Count',
|
y='Count',
|
||||||
title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})',
|
title=f'Jährliche Veröffentlichungen in der Literaturanalyse (n={sum(year_counts.values())}, Stand: {current_date})',
|
||||||
labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'}
|
labels={'Year': 'Jahr', 'Count': 'Anzahl der Veröffentlichungen'},
|
||||||
|
custom_data=['Percentage']
|
||||||
)
|
)
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=fig.layout.title.text,
|
title=fig.layout.title.text,
|
||||||
@ -637,8 +904,15 @@ def visualize_time_series(bib_database):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.update_traces(line=plot_styles['linie_primaryLine'])
|
fig.update_traces(line=plot_styles['linie_primaryLine'])
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series, bib_filename)
|
export_figure_local(fig, "visualize_time_series", export_fig_visualize_time_series)
|
||||||
else:
|
else:
|
||||||
print("Keine gültigen Veröffentlichungsjahre gefunden.")
|
print("Keine gültigen Veröffentlichungsjahre gefunden.")
|
||||||
|
|
||||||
@ -655,8 +929,18 @@ def visualize_top_authors(bib_database):
|
|||||||
top_authors = Counter(author_counts).most_common(top_n)
|
top_authors = Counter(author_counts).most_common(top_n)
|
||||||
if top_authors:
|
if top_authors:
|
||||||
df = pd.DataFrame(top_authors, columns=['Author', 'Count'])
|
df = pd.DataFrame(top_authors, columns=['Author', 'Count'])
|
||||||
|
overall_total = sum(author_counts.values())
|
||||||
|
df['Percentage'] = df['Count'] / overall_total * 100 if overall_total else 0
|
||||||
|
|
||||||
fig = px.bar(df, x='Author', y='Count', title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={sum(author_counts.values())}, Stand: {current_date})', labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'}, text_auto=True)
|
fig = px.bar(
|
||||||
|
df,
|
||||||
|
x='Author',
|
||||||
|
y='Count',
|
||||||
|
title=f'Meistgenannte Autor:innen in der Literaturanalyse (Top {top_n}, n={overall_total}, Stand: {current_date})',
|
||||||
|
labels={'Author': 'Autor', 'Count': 'Anzahl der Werke'},
|
||||||
|
text_auto=True,
|
||||||
|
custom_data=['Percentage']
|
||||||
|
)
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=fig.layout.title.text,
|
title=fig.layout.title.text,
|
||||||
x_title='Autor',
|
x_title='Autor',
|
||||||
@ -671,55 +955,18 @@ def visualize_top_authors(bib_database):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[0]:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors, bib_filename)
|
export_figure_local(fig, "visualize_top_authors", export_fig_visualize_top_authors)
|
||||||
else:
|
else:
|
||||||
print("Keine Autoren gefunden.")
|
print("Keine Autoren gefunden.")
|
||||||
|
|
||||||
# Top Titel nach Anzahl der Werke
|
|
||||||
def normalize_title(title):
|
|
||||||
# Entfernen von Sonderzeichen und Standardisierung auf Kleinbuchstaben
|
|
||||||
title = title.lower().translate(str.maketrans('', '', ",.!?\"'()[]{}:;"))
|
|
||||||
# Zusammenführen ähnlicher Titel, die sich nur in geringfügigen Details unterscheiden
|
|
||||||
title = " ".join(title.split())
|
|
||||||
# Entfernen häufiger Füllwörter oder Standardphrasen, die die Unterscheidung nicht unterstützen
|
|
||||||
common_phrases = ['eine studie', 'untersuchung der', 'analyse von']
|
|
||||||
for phrase in common_phrases:
|
|
||||||
title = title.replace(phrase, '')
|
|
||||||
return title.strip()
|
|
||||||
|
|
||||||
def visualize_top_publications(bib_database):
|
|
||||||
top_n = 25 # Anzahl der Top-Publikationen, die angezeigt werden sollen
|
|
||||||
publication_counts = defaultdict(int)
|
|
||||||
|
|
||||||
for entry in bib_database.entries:
|
|
||||||
if 'title' in entry:
|
|
||||||
title = normalize_title(entry['title'])
|
|
||||||
publication_counts[title] += 1
|
|
||||||
|
|
||||||
top_publications = sorted(publication_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]
|
|
||||||
publication_data = [{'Title': title[:50] + '...' if len(title) > 50 else title, 'Count': count} for title, count in top_publications]
|
|
||||||
|
|
||||||
df = pd.DataFrame(publication_data)
|
|
||||||
|
|
||||||
fig = px.bar(df, x='Title', y='Count', title=f'Häufig zitierte Publikationen in der Analyse (Top {top_n}, n={sum(publication_counts.values())}, Stand: {current_date})', labels={'Title': 'Titel', 'Count': 'Anzahl der Nennungen'})
|
|
||||||
layout = get_standard_layout(
|
|
||||||
title=fig.layout.title.text,
|
|
||||||
x_title='Titel',
|
|
||||||
y_title='Anzahl der Nennungen'
|
|
||||||
)
|
|
||||||
layout["font"] = {"size": 14, "color": colors['text']}
|
|
||||||
layout["title"] = {"font": {"size": 16}}
|
|
||||||
layout["margin"] = dict(b=160, t=60, l=40, r=40)
|
|
||||||
layout["xaxis"] = layout.get("xaxis", {})
|
|
||||||
layout["xaxis"]["tickangle"] = -45
|
|
||||||
layout["xaxis"]["automargin"] = True
|
|
||||||
layout["autosize"] = True
|
|
||||||
fig.update_layout(**layout)
|
|
||||||
fig.update_traces(marker=plot_styles['balken_primaryLine'])
|
|
||||||
fig.show(config={"responsive": True})
|
|
||||||
export_figure_local(fig, "visualize_top_publications", export_fig_visualize_top_publications, bib_filename)
|
|
||||||
|
|
||||||
##########
|
##########
|
||||||
|
|
||||||
# Daten vorbereiten
|
# Daten vorbereiten
|
||||||
@ -761,7 +1008,8 @@ def prepare_path_data(bib_database):
|
|||||||
'Buch',
|
'Buch',
|
||||||
'Buchteil',
|
'Buchteil',
|
||||||
'Bericht',
|
'Bericht',
|
||||||
'Konferenz-Paper'
|
'Konferenz-Paper',
|
||||||
|
'Studienbrief'
|
||||||
]
|
]
|
||||||
|
|
||||||
data = []
|
data = []
|
||||||
@ -800,12 +1048,14 @@ def create_path_diagram(data):
|
|||||||
sources = []
|
sources = []
|
||||||
targets = []
|
targets = []
|
||||||
values = []
|
values = []
|
||||||
|
node_counts = Counter()
|
||||||
color_map = {
|
color_map = {
|
||||||
'zeitschriftenartikel': colors['primaryLine'],
|
'zeitschriftenartikel': colors['primaryLine'],
|
||||||
'konferenz-paper': colors['secondaryLine'],
|
'konferenz-paper': colors['secondaryLine'],
|
||||||
'buch': colors['depthArea'],
|
'buch': colors['depthArea'],
|
||||||
'buchteil': colors['brightArea'],
|
'buchteil': colors['brightArea'],
|
||||||
'bericht': colors['accent']
|
'bericht': colors['accent'],
|
||||||
|
'studienbrief': colors['positiveHighlight']
|
||||||
}
|
}
|
||||||
|
|
||||||
def add_to_labels(label):
|
def add_to_labels(label):
|
||||||
@ -822,8 +1072,19 @@ def create_path_diagram(data):
|
|||||||
sources.extend([fu_idx, category_idx, index_idx])
|
sources.extend([fu_idx, category_idx, index_idx])
|
||||||
targets.extend([category_idx, index_idx, type_idx])
|
targets.extend([category_idx, index_idx, type_idx])
|
||||||
values.extend([1, 1, 1])
|
values.extend([1, 1, 1])
|
||||||
|
node_counts.update([entry['FU'], entry['Category'], entry['Index'], entry['Type']])
|
||||||
|
|
||||||
node_colors = [color_map.get(label, colors['primaryLine']) for label in labels]
|
node_colors = [color_map.get(label, colors['primaryLine']) for label in labels]
|
||||||
|
total_paths = len(data)
|
||||||
|
total_flows = sum(values)
|
||||||
|
node_percentages = [
|
||||||
|
node_counts.get(label, 0) / total_paths * 100 if total_paths else 0
|
||||||
|
for label in labels
|
||||||
|
]
|
||||||
|
link_percentages = [
|
||||||
|
value / total_flows * 100 if total_flows else 0
|
||||||
|
for value in values
|
||||||
|
]
|
||||||
|
|
||||||
fig = go.Figure(data=[go.Sankey(
|
fig = go.Figure(data=[go.Sankey(
|
||||||
node=dict(
|
node=dict(
|
||||||
@ -831,12 +1092,24 @@ def create_path_diagram(data):
|
|||||||
thickness=20,
|
thickness=20,
|
||||||
line=dict(color="black", width=0.5),
|
line=dict(color="black", width=0.5),
|
||||||
label=labels,
|
label=labels,
|
||||||
color=node_colors
|
color=node_colors,
|
||||||
|
customdata=node_percentages,
|
||||||
|
hovertemplate=(
|
||||||
|
"%{label}<br>"
|
||||||
|
"Anzahl: %{value}<br>"
|
||||||
|
"Anteil der Pfade: %{customdata:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
),
|
),
|
||||||
link=dict(
|
link=dict(
|
||||||
source=sources,
|
source=sources,
|
||||||
target=targets,
|
target=targets,
|
||||||
value=values
|
value=values,
|
||||||
|
customdata=link_percentages,
|
||||||
|
hovertemplate=(
|
||||||
|
"%{source.label} → %{target.label}<br>"
|
||||||
|
"Anzahl: %{value}<br>"
|
||||||
|
"Anteil der Verbindungen: %{customdata:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)])
|
)])
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
@ -850,7 +1123,7 @@ def create_path_diagram(data):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram, bib_filename)
|
export_figure_local(fig, "create_path_diagram", export_fig_create_path_diagram)
|
||||||
|
|
||||||
#############
|
#############
|
||||||
|
|
||||||
@ -954,22 +1227,54 @@ def create_sankey_diagram(bib_database):
|
|||||||
colors['positiveHighlight'] # Ausgewählte Quellen
|
colors['positiveHighlight'] # Ausgewählte Quellen
|
||||||
]
|
]
|
||||||
|
|
||||||
|
node_values = [
|
||||||
|
initial_sources,
|
||||||
|
screened_sources,
|
||||||
|
quality_sources,
|
||||||
|
relevance_sources,
|
||||||
|
thematic_sources,
|
||||||
|
recent_sources,
|
||||||
|
classic_sources,
|
||||||
|
selected_sources
|
||||||
|
]
|
||||||
|
node_percentages = [
|
||||||
|
value / initial_sources * 100 if initial_sources else 0
|
||||||
|
for value in node_values
|
||||||
|
]
|
||||||
|
link_percentages = [
|
||||||
|
value / initial_sources * 100 if initial_sources else 0
|
||||||
|
for value in values
|
||||||
|
]
|
||||||
|
|
||||||
# Sankey-Diagramm erstellen
|
# Sankey-Diagramm erstellen
|
||||||
node_config = {
|
node_config = {
|
||||||
**plot_styles["sankey_node"],
|
**plot_styles["sankey_node"],
|
||||||
"label": node_labels,
|
"label": node_labels,
|
||||||
"color": node_colors
|
"color": node_colors,
|
||||||
|
"customdata": node_percentages,
|
||||||
|
"hovertemplate": (
|
||||||
|
"%{label}<br>"
|
||||||
|
"Anzahl: %{value}<br>"
|
||||||
|
"Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
}
|
}
|
||||||
# Remove any invalid 'font' key if present
|
# Remove any invalid 'font' key if present
|
||||||
node_config.pop("font", None)
|
node_config.pop("font", None)
|
||||||
|
link_config = {
|
||||||
|
**plot_styles["sankey_link"],
|
||||||
|
"source": sources,
|
||||||
|
"target": targets,
|
||||||
|
"value": values,
|
||||||
|
"customdata": link_percentages,
|
||||||
|
"hovertemplate": (
|
||||||
|
"%{source.label} → %{target.label}<br>"
|
||||||
|
"Anzahl: %{value}<br>"
|
||||||
|
"Anteil an Ausgangsmenge: %{customdata:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
}
|
||||||
fig = go.Figure(go.Sankey(
|
fig = go.Figure(go.Sankey(
|
||||||
node=node_config,
|
node=node_config,
|
||||||
link=dict(
|
link=link_config
|
||||||
**plot_styles["sankey_link"],
|
|
||||||
source=sources,
|
|
||||||
target=targets,
|
|
||||||
value=values
|
|
||||||
)
|
|
||||||
))
|
))
|
||||||
# Layout anpassen
|
# Layout anpassen
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
@ -983,7 +1288,7 @@ def create_sankey_diagram(bib_database):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram, bib_filename)
|
export_figure_local(fig, "create_sankey_diagram", export_fig_create_sankey_diagram)
|
||||||
|
|
||||||
##########
|
##########
|
||||||
|
|
||||||
@ -1001,31 +1306,33 @@ def visualize_sources_status(bib_database):
|
|||||||
"""
|
"""
|
||||||
Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner.
|
Visualisiert den Status der analysierten und nicht analysierten Quellen pro Suchordner.
|
||||||
"""
|
"""
|
||||||
|
search_terms = {
|
||||||
|
'0': 'digital:learning',
|
||||||
|
'1': 'learning:management:system',
|
||||||
|
'2': 'online:lernplattform',
|
||||||
|
'3': 'online:lernumgebung',
|
||||||
|
'4': 'mooc',
|
||||||
|
'5': 'e-learning',
|
||||||
|
'6': 'bildung:technologie',
|
||||||
|
'7': 'digital:medien',
|
||||||
|
'8': 'blended:learning',
|
||||||
|
'9': 'digital:lernen',
|
||||||
|
'a': 'online:lernen',
|
||||||
|
'b': 'online:learning'
|
||||||
|
}
|
||||||
|
numbers_order = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b']
|
||||||
|
type_order = [
|
||||||
|
'Zeitschriftenartikel',
|
||||||
|
'Buch',
|
||||||
|
'Buchteil',
|
||||||
|
'Bericht',
|
||||||
|
'Konferenz-Paper',
|
||||||
|
'Studienbrief'
|
||||||
|
]
|
||||||
search_folder_tags = [
|
search_folder_tags = [
|
||||||
"#1:zeitschriftenartikel:learning:management:system",
|
f"#{number}:{type_}:{search_terms[number]}".lower()
|
||||||
"#2:zeitschriftenartikel:online:lernplattform",
|
for type_ in type_order
|
||||||
"#3:zeitschriftenartikel:online:lernumgebung",
|
for number in numbers_order
|
||||||
"#4:zeitschriftenartikel:mooc",
|
|
||||||
"#5:zeitschriftenartikel:e-learning",
|
|
||||||
"#6:zeitschriftenartikel:bildung:technologie",
|
|
||||||
"#7:zeitschriftenartikel:digital:medien",
|
|
||||||
"#8:zeitschriftenartikel:blended:learning",
|
|
||||||
"#9:zeitschriftenartikel:digital:lernen",
|
|
||||||
"#a:zeitschriftenartikel:online:lernen",
|
|
||||||
"#b:zeitschriftenartikel:online:learning",
|
|
||||||
"#0:zeitschriftenartikel:digital:learning",
|
|
||||||
"#1:konferenz-paper:learning:management:system",
|
|
||||||
"#2:konferenz-paper:online:lernplattform",
|
|
||||||
"#3:konferenz-paper:online:lernumgebung",
|
|
||||||
"#4:konferenz-paper:mooc",
|
|
||||||
"#5:konferenz-paper:e-learning",
|
|
||||||
"#6:konferenz-paper:bildung:technologie",
|
|
||||||
"#7:konferenz-paper:digital:medien",
|
|
||||||
"#8:konferenz-paper:blended:learning",
|
|
||||||
"#9:konferenz-paper:digital:lernen",
|
|
||||||
"#a:konferenz-paper:online:lernen",
|
|
||||||
"#b:konferenz-paper:online:learning",
|
|
||||||
"#0:konferenz-paper:digital:learning"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"}
|
category_tags = {"promotion:argumentation", "promotion:kerngedanke", "promotion:weiterführung", "promotion:schlussfolgerung"}
|
||||||
@ -1080,21 +1387,45 @@ def visualize_sources_status(bib_database):
|
|||||||
tablefmt='grid'
|
tablefmt='grid'
|
||||||
))
|
))
|
||||||
|
|
||||||
|
total_identifiziert = sum(counts["Identifiziert"] for counts in source_data.values())
|
||||||
|
analysiert_percentages = [
|
||||||
|
value / total_identifiziert * 100 if total_identifiziert else 0
|
||||||
|
for value in analysiert_values
|
||||||
|
]
|
||||||
|
nicht_analysiert_percentages = [
|
||||||
|
value / total_identifiziert * 100 if total_identifiziert else 0
|
||||||
|
for value in nicht_analysiert_values
|
||||||
|
]
|
||||||
|
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
fig.add_trace(go.Bar(
|
fig.add_trace(go.Bar(
|
||||||
x=tags,
|
x=tags,
|
||||||
y=analysiert_values,
|
y=analysiert_values,
|
||||||
name='Analysiert',
|
name='Analysiert',
|
||||||
marker=dict(color=analysiert_colors)
|
marker=dict(color=analysiert_colors),
|
||||||
|
customdata=analysiert_percentages,
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Status: Analysiert<br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
))
|
))
|
||||||
fig.add_trace(go.Bar(
|
fig.add_trace(go.Bar(
|
||||||
x=tags,
|
x=tags,
|
||||||
y=nicht_analysiert_values,
|
y=nicht_analysiert_values,
|
||||||
name='Nicht-Analysiert',
|
name='Nicht-Analysiert',
|
||||||
marker=plot_styles['balken_primaryLine']
|
marker=plot_styles['balken_primaryLine'],
|
||||||
|
customdata=nicht_analysiert_percentages,
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Status: Nicht-Analysiert<br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata:.1f}%<extra></extra>"
|
||||||
|
)
|
||||||
))
|
))
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=f'Analyse- und Stichprobenstatus je Suchordner (n={sum(counts["Identifiziert"] for counts in source_data.values())}, Stand: {current_date})',
|
title=f'Analyse- und Stichprobenstatus je Suchordner (n={total_identifiziert}, Stand: {current_date})',
|
||||||
x_title='Suchbegriffsordner',
|
x_title='Suchbegriffsordner',
|
||||||
y_title='Anzahl der Quellen'
|
y_title='Anzahl der Quellen'
|
||||||
)
|
)
|
||||||
@ -1111,7 +1442,7 @@ def visualize_sources_status(bib_database):
|
|||||||
layout["autosize"] = True
|
layout["autosize"] = True
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status, bib_filename)
|
export_figure_local(fig, "visualize_sources_status", export_fig_visualize_sources_status)
|
||||||
|
|
||||||
#############
|
#############
|
||||||
|
|
||||||
@ -1197,8 +1528,8 @@ def visualize_languages(bib_database):
|
|||||||
color='Gruppe',
|
color='Gruppe',
|
||||||
color_discrete_map=color_discrete_map,
|
color_discrete_map=color_discrete_map,
|
||||||
title=f'Sprachverteilung der analysierten Quellen (n={sum(norm_counts.values())}, Stand: {current_date})',
|
title=f'Sprachverteilung der analysierten Quellen (n={sum(norm_counts.values())}, Stand: {current_date})',
|
||||||
hover_data=["Sprache", "Gruppe", "Anzahl", "Anteil (%)"],
|
barmode="stack",
|
||||||
barmode="stack"
|
custom_data=['Gruppe', 'Anteil (%)']
|
||||||
)
|
)
|
||||||
|
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
@ -1213,10 +1544,18 @@ def visualize_languages(bib_database):
|
|||||||
# Ergänzung: Y-Achse logarithmisch skalieren
|
# Ergänzung: Y-Achse logarithmisch skalieren
|
||||||
layout["yaxis_type"] = "log"
|
layout["yaxis_type"] = "log"
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Sprachgruppe: %{customdata[0]}<br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil: %{customdata[1]:.2f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
# Tabelle ausgeben
|
# Tabelle ausgeben
|
||||||
print(tabulate(df.sort_values("Anzahl", ascending=False), headers="keys", tablefmt="grid", showindex=False))
|
print(tabulate(df.sort_values("Anzahl", ascending=False), headers="keys", tablefmt="grid", showindex=False))
|
||||||
export_figure_local(fig, "visualize_languages", export_fig_visualize_languages, bib_filename)
|
export_figure_local(fig, "visualize_languages", export_fig_visualize_languages)
|
||||||
|
|
||||||
# Visualisierung der Verteilung von ENTRYTYPE innerhalb jeder Sprache
|
# Visualisierung der Verteilung von ENTRYTYPE innerhalb jeder Sprache
|
||||||
def visualize_language_entrytypes(bib_database):
|
def visualize_language_entrytypes(bib_database):
|
||||||
@ -1266,6 +1605,8 @@ def visualize_language_entrytypes(bib_database):
|
|||||||
grouped.rename(columns={'ENTRYTYPE': 'Eintragstyp'}, inplace=True)
|
grouped.rename(columns={'ENTRYTYPE': 'Eintragstyp'}, inplace=True)
|
||||||
# Anteil innerhalb Sprache (%)
|
# Anteil innerhalb Sprache (%)
|
||||||
grouped["Anteil innerhalb Sprache (%)"] = grouped.groupby("Sprache")["Anzahl"].transform(lambda x: (x / x.sum() * 100).round(2))
|
grouped["Anteil innerhalb Sprache (%)"] = grouped.groupby("Sprache")["Anzahl"].transform(lambda x: (x / x.sum() * 100).round(2))
|
||||||
|
total_entrytypes = grouped['Anzahl'].sum()
|
||||||
|
grouped["Anteil Gesamt (%)"] = grouped['Anzahl'] / total_entrytypes * 100 if total_entrytypes else 0
|
||||||
|
|
||||||
# Mapping Eintragstyp zu Typgruppe
|
# Mapping Eintragstyp zu Typgruppe
|
||||||
eintragstyp_gruppen = {
|
eintragstyp_gruppen = {
|
||||||
@ -1302,7 +1643,8 @@ def visualize_language_entrytypes(bib_database):
|
|||||||
barmode="group",
|
barmode="group",
|
||||||
title=f'Verteilung der Eintragstypen pro Sprache (n={len(df)}, Stand: {current_date})',
|
title=f'Verteilung der Eintragstypen pro Sprache (n={len(df)}, Stand: {current_date})',
|
||||||
text='Anzahl',
|
text='Anzahl',
|
||||||
labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'}
|
labels={'Sprache': 'Sprache', 'Eintragstyp': 'Eintragstyp', 'Anzahl': 'Anzahl', 'Typgruppe': 'Typgruppe'},
|
||||||
|
custom_data=['Eintragstyp', 'Typgruppe', 'Anteil Gesamt (%)', 'Anteil innerhalb Sprache (%)']
|
||||||
)
|
)
|
||||||
layout = get_standard_layout(
|
layout = get_standard_layout(
|
||||||
title=fig.layout.title.text,
|
title=fig.layout.title.text,
|
||||||
@ -1316,9 +1658,19 @@ def visualize_language_entrytypes(bib_database):
|
|||||||
# Ergänzung: Y-Achse logarithmisch skalieren
|
# Ergänzung: Y-Achse logarithmisch skalieren
|
||||||
layout["yaxis_type"] = "log"
|
layout["yaxis_type"] = "log"
|
||||||
fig.update_layout(**layout)
|
fig.update_layout(**layout)
|
||||||
|
fig.update_traces(
|
||||||
|
hovertemplate=(
|
||||||
|
"<b>%{x}</b><br>"
|
||||||
|
"Eintragstyp: %{customdata[0]}<br>"
|
||||||
|
"Typgruppe: %{customdata[1]}<br>"
|
||||||
|
"Anzahl: %{y}<br>"
|
||||||
|
"Anteil gesamt: %{customdata[2]:.2f}%<br>"
|
||||||
|
"Anteil innerhalb Sprache: %{customdata[3]:.2f}%<extra></extra>"
|
||||||
|
)
|
||||||
|
)
|
||||||
fig.show(config={"responsive": True})
|
fig.show(config={"responsive": True})
|
||||||
print(tabulate(grouped.sort_values(["Sprache", "Eintragstyp"]), headers=["Sprache", "Eintragstyp", "Anzahl", "Anteil innerhalb Sprache (%)", "Typgruppe"], tablefmt="grid", showindex=False))
|
print(tabulate(grouped.sort_values(["Sprache", "Eintragstyp"]), headers=["Sprache", "Eintragstyp", "Anzahl", "Anteil innerhalb Sprache (%)", "Typgruppe"], tablefmt="grid", showindex=False))
|
||||||
export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages, bib_filename)
|
export_figure_local(fig, "visualize_language_entrytypes", export_fig_visualize_languages)
|
||||||
|
|
||||||
#############
|
#############
|
||||||
|
|
||||||
@ -1361,9 +1713,11 @@ visualize_tags(bib_database)
|
|||||||
visualize_index(bib_database)
|
visualize_index(bib_database)
|
||||||
visualize_research_questions(bib_database)
|
visualize_research_questions(bib_database)
|
||||||
visualize_categories(bib_database)
|
visualize_categories(bib_database)
|
||||||
|
visualize_relevance_vs_research_questions(bib_database)
|
||||||
|
visualize_relevance_vs_categories(bib_database)
|
||||||
|
visualize_relevance_vs_search_terms(bib_database)
|
||||||
visualize_time_series(bib_database)
|
visualize_time_series(bib_database)
|
||||||
visualize_top_authors(bib_database)
|
visualize_top_authors(bib_database)
|
||||||
visualize_top_publications(bib_database)
|
|
||||||
data = prepare_path_data(bib_database)
|
data = prepare_path_data(bib_database)
|
||||||
create_path_diagram(data)
|
create_path_diagram(data)
|
||||||
create_sankey_diagram(bib_database)
|
create_sankey_diagram(bib_database)
|
||||||
|
|||||||
67354
cleaned_Literaturverzeichnis.bib
Normal file
67354
cleaned_Literaturverzeichnis.bib
Normal file
File diff suppressed because it is too large
Load Diff
@ -218,14 +218,13 @@ layout["legend"] = dict(
|
|||||||
itemdoubleclick="toggle"
|
itemdoubleclick="toggle"
|
||||||
)
|
)
|
||||||
layout["yaxis3"] = dict(
|
layout["yaxis3"] = dict(
|
||||||
title="Abweichung (ΔSCₙ)",
|
title=dict(text="Abweichung (ΔSCₙ)", font=dict(color=colors["text"])),
|
||||||
overlaying="y",
|
overlaying="y",
|
||||||
side="right",
|
side="right",
|
||||||
showgrid=False,
|
showgrid=False,
|
||||||
zeroline=True,
|
zeroline=True,
|
||||||
zerolinewidth=2,
|
zerolinewidth=2,
|
||||||
zerolinecolor='grey',
|
zerolinecolor='grey',
|
||||||
titlefont=dict(color=colors["text"]),
|
|
||||||
tickfont=dict(color=colors["text"]),
|
tickfont=dict(color=colors["text"]),
|
||||||
anchor="free",
|
anchor="free",
|
||||||
position=1.0
|
position=1.0
|
||||||
|
|||||||
Reference in New Issue
Block a user