import os import pandas as pd import bibtexparser import folium from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm # Clear the terminal os.system('cls' if os.name == 'nt' else 'clear') # Pfade zu den Dateien geonames_file_path = 'allCountries.txt' bib_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/Literaturverzeichnis.bib' cleaned_bib_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/cleaned_Literaturverzeichnis.bib' # Spaltennamen laut Geonames README columns = [ 'geonameid', 'name', 'asciiname', 'alternatenames', 'latitude', 'longitude', 'feature class', 'feature code', 'country code', 'cc2', 'admin1 code', 'admin2 code', 'admin3 code', 'admin4 code', 'population', 'elevation', 'dem', 'timezone', 'modification date' ] # GeoNames-Daten laden print("Lade GeoNames-Daten...") geo_df = pd.read_csv(geonames_file_path, sep='\t', header=None, names=columns, usecols=['name', 'asciiname', 'latitude', 'longitude']) geo_df.dropna(subset=['latitude', 'longitude'], inplace=True) print("GeoNames-Daten geladen.") # BibTeX-Datei laden print("Lade BibTeX-Datei...") with open(bib_file_path, encoding='utf-8') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) print("BibTeX-Datei geladen.") # Ortsnamen extrahieren und bereinigen print("Extrahiere und bereinige Ortsnamen...") locations = set() for entry in bib_database.entries: if 'address' in entry: for loc in entry['address'].split(';'): locations.update(loc.split(',')) cleaned_locations = {loc.strip() for loc in locations} print("Ortsnamen extrahiert und bereinigt.") # Geo-Koordinaten zuordnen def find_coordinates(location): match = geo_df[(geo_df['name'].str.lower() == location.lower()) | (geo_df['asciiname'].str.lower() == location.lower())] if not match.empty: return match.iloc[0]['latitude'], match.iloc[0]['longitude'] return None, None print("Suche Geo-Koordinaten...") location_coords = {} for location in tqdm(cleaned_locations, desc="Bearbeitung der Ortsnamen"): latitude, longitude = find_coordinates(location) if latitude is not None and longitude is not None: location_coords[location] = (latitude, longitude) print("Geo-Koordinaten gefunden.") # Erstelle die Karte print("Erstelle Karte...") map_center = [geo_df['latitude'].mean(), geo_df['longitude'].mean()] map = folium.Map(location=map_center, zoom_start=2) for location, coords in location_coords.items(): folium.Marker(location=coords, popup=location).add_to(map) map_file_path = 'Research/Charité - Universitätsmedizin Berlin/Systematische Literaturrecherche/literature_map.html' map.save(map_file_path) print(f"Karte gespeichert unter {map_file_path}") # Bereinigen und Speichern der BibTeX-Datei print("Speichere bereinigte BibTeX-Datei...") with open(cleaned_bib_file_path, 'w', encoding='utf-8') as bibtex_file: bibtexparser.dump(bib_database, bibtex_file) print(f"Bereinigte BibTeX-Datei gespeichert unter {cleaned_bib_file_path}")