Testpush
This commit is contained in:
0
Automatisierte Literaturanalyse.py
Normal file
0
Automatisierte Literaturanalyse.py
Normal file
80
Geovisualization
Normal file
80
Geovisualization
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import bibtexparser
|
||||||
|
import folium
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
# Clear the terminal
|
||||||
|
os.system('cls' if os.name == 'nt' else 'clear')
|
||||||
|
|
||||||
|
# Pfade zu den Dateien
|
||||||
|
geonames_file_path = 'allCountries.txt'
|
||||||
|
bib_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/Literaturverzeichnis.bib'
|
||||||
|
cleaned_bib_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/cleaned_Literaturverzeichnis.bib'
|
||||||
|
|
||||||
|
# Spaltennamen laut Geonames README
|
||||||
|
columns = [
|
||||||
|
'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude', 'longitude',
|
||||||
|
'feature class', 'feature code', 'country code', 'cc2', 'admin1 code',
|
||||||
|
'admin2 code', 'admin3 code', 'admin4 code', 'population', 'elevation', 'dem',
|
||||||
|
'timezone', 'modification date'
|
||||||
|
]
|
||||||
|
|
||||||
|
# GeoNames-Daten laden
|
||||||
|
print("Lade GeoNames-Daten...")
|
||||||
|
geo_df = pd.read_csv(geonames_file_path, sep='\t', header=None, names=columns, usecols=['name', 'asciiname', 'latitude', 'longitude'])
|
||||||
|
geo_df.dropna(subset=['latitude', 'longitude'], inplace=True)
|
||||||
|
print("GeoNames-Daten geladen.")
|
||||||
|
|
||||||
|
# BibTeX-Datei laden
|
||||||
|
print("Lade BibTeX-Datei...")
|
||||||
|
with open(bib_file_path, encoding='utf-8') as bibtex_file:
|
||||||
|
bib_database = bibtexparser.load(bibtex_file)
|
||||||
|
print("BibTeX-Datei geladen.")
|
||||||
|
|
||||||
|
# Ortsnamen extrahieren und bereinigen
|
||||||
|
print("Extrahiere und bereinige Ortsnamen...")
|
||||||
|
locations = set()
|
||||||
|
for entry in bib_database.entries:
|
||||||
|
if 'address' in entry:
|
||||||
|
for loc in entry['address'].split(';'):
|
||||||
|
locations.update(loc.split(','))
|
||||||
|
|
||||||
|
cleaned_locations = {loc.strip() for loc in locations}
|
||||||
|
print("Ortsnamen extrahiert und bereinigt.")
|
||||||
|
|
||||||
|
# Geo-Koordinaten zuordnen
|
||||||
|
def find_coordinates(location):
|
||||||
|
match = geo_df[(geo_df['name'].str.lower() == location.lower()) | (geo_df['asciiname'].str.lower() == location.lower())]
|
||||||
|
if not match.empty:
|
||||||
|
return match.iloc[0]['latitude'], match.iloc[0]['longitude']
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
print("Suche Geo-Koordinaten...")
|
||||||
|
location_coords = {}
|
||||||
|
|
||||||
|
for location in tqdm(cleaned_locations, desc="Bearbeitung der Ortsnamen"):
|
||||||
|
latitude, longitude = find_coordinates(location)
|
||||||
|
if latitude is not None and longitude is not None:
|
||||||
|
location_coords[location] = (latitude, longitude)
|
||||||
|
|
||||||
|
print("Geo-Koordinaten gefunden.")
|
||||||
|
|
||||||
|
# Erstelle die Karte
|
||||||
|
print("Erstelle Karte...")
|
||||||
|
map_center = [geo_df['latitude'].mean(), geo_df['longitude'].mean()]
|
||||||
|
map = folium.Map(location=map_center, zoom_start=2)
|
||||||
|
|
||||||
|
for location, coords in location_coords.items():
|
||||||
|
folium.Marker(location=coords, popup=location).add_to(map)
|
||||||
|
|
||||||
|
map_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/literature_map.html'
|
||||||
|
map.save(map_file_path)
|
||||||
|
print(f"Karte gespeichert unter {map_file_path}")
|
||||||
|
|
||||||
|
# Bereinigen und Speichern der BibTeX-Datei
|
||||||
|
print("Speichere bereinigte BibTeX-Datei...")
|
||||||
|
with open(cleaned_bib_file_path, 'w', encoding='utf-8') as bibtex_file:
|
||||||
|
bibtexparser.dump(bib_database, bibtex_file)
|
||||||
|
print(f"Bereinigte BibTeX-Datei gespeichert unter {cleaned_bib_file_path}")
|
||||||
78
Geovisualization copy.py
Normal file
78
Geovisualization copy.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
import os
|
||||||
|
import bibtexparser
|
||||||
|
import pandas as pd
|
||||||
|
from tqdm import tqdm
|
||||||
|
import folium
|
||||||
|
from folium.plugins import Fullscreen
|
||||||
|
|
||||||
|
# Terminal bereinigen
|
||||||
|
os.system('cls' if os.name == 'nt' else 'clear')
|
||||||
|
|
||||||
|
# Dateipfade
|
||||||
|
geonames_file = 'allCountries.txt'
|
||||||
|
bib_file = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/Literaturverzeichnis.bib'
|
||||||
|
|
||||||
|
# Laden der GeoNames-Daten
|
||||||
|
print("Laden der GeoNames-Daten...")
|
||||||
|
geonames_columns = [
|
||||||
|
'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude',
|
||||||
|
'longitude', 'feature class', 'feature code', 'country code', 'cc2',
|
||||||
|
'admin1 code', 'admin2 code', 'admin3 code', 'admin4 code', 'population',
|
||||||
|
'elevation', 'dem', 'timezone', 'modification date'
|
||||||
|
]
|
||||||
|
|
||||||
|
chunksize = 10**6
|
||||||
|
geonames_data = pd.DataFrame()
|
||||||
|
for chunk in tqdm(pd.read_csv(geonames_file, sep='\t', header=None, names=geonames_columns, chunksize=chunksize, dtype=str, encoding='utf-8')):
|
||||||
|
geonames_data = pd.concat([geonames_data, chunk], ignore_index=True)
|
||||||
|
|
||||||
|
# Laden der BibTeX-Daten
|
||||||
|
print("Laden der BibTeX-Daten...")
|
||||||
|
with open(bib_file, encoding='utf-8') as bibtex_file:
|
||||||
|
bib_database = bibtexparser.load(bibtex_file)
|
||||||
|
|
||||||
|
# Ortsnamen extrahieren und bereinigen
|
||||||
|
print("Extrahieren und Bereinigen der Ortsnamen...")
|
||||||
|
locations = set()
|
||||||
|
for entry in bib_database.entries:
|
||||||
|
if 'address' in entry:
|
||||||
|
locations.update(entry['address'].replace(';', ',').replace('&', 'and').split(','))
|
||||||
|
|
||||||
|
locations = {loc.strip() for loc in locations}
|
||||||
|
print(f"Bereinigte Ortsnamen: {locations}")
|
||||||
|
|
||||||
|
# Geo-Koordinaten zuordnen
|
||||||
|
print("Zuordnen der Geo-Koordinaten...")
|
||||||
|
geo_data = []
|
||||||
|
for location in tqdm(locations):
|
||||||
|
matching_rows = geonames_data[geonames_data['name'].str.contains(location, case=False, na=False)]
|
||||||
|
if not matching_rows.empty:
|
||||||
|
best_match = matching_rows.iloc[0]
|
||||||
|
geo_data.append({
|
||||||
|
'name': location,
|
||||||
|
'latitude': best_match['latitude'],
|
||||||
|
'longitude': best_match['longitude']
|
||||||
|
})
|
||||||
|
|
||||||
|
if not geo_data:
|
||||||
|
print("Keine gültigen Koordinaten gefunden.")
|
||||||
|
else:
|
||||||
|
df = pd.DataFrame(geo_data)
|
||||||
|
|
||||||
|
# Karte erstellen
|
||||||
|
print("Erstellen der Karte...")
|
||||||
|
m = folium.Map(location=[0, 0], zoom_start=2)
|
||||||
|
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
folium.Marker(
|
||||||
|
location=[row['latitude'], row['longitude']],
|
||||||
|
popup=row['name']
|
||||||
|
).add_to(m)
|
||||||
|
|
||||||
|
# Vollbildmodus und LayerControl hinzufügen
|
||||||
|
Fullscreen(position='topright').add_to(m)
|
||||||
|
folium.LayerControl().add_to(m)
|
||||||
|
|
||||||
|
# Karte speichern
|
||||||
|
m.save('literature_map_with_zoom.html')
|
||||||
|
print("Karte wurde gespeichert als 'literature_map_with_zoom.html'.")
|
||||||
1482
Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py
Normal file
1482
Systematsiche Literaturanalyse db | Netzwerk- und Pfadanalyse.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user