diff options
| author | Friedrich Beckmann <friedrich.beckmann@tha.de> | 2026-03-19 16:23:55 +0100 |
|---|---|---|
| committer | Friedrich Beckmann <friedrich.beckmann@tha.de> | 2026-03-19 16:23:55 +0100 |
| commit | ba2d143a8f0328d61294e84d46c4ed55ff10421e (patch) | |
| tree | df7d68dcba756d86351ec0287897b8a14dee79b9 /crawl/wp.py | |
| parent | 12a87277041c82edcec46b4f05e35abac632d271 (diff) | |
update crawler to retrieve data from webuntis
Diffstat (limited to 'crawl/wp.py')
| -rw-r--r-- | crawl/wp.py | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/crawl/wp.py b/crawl/wp.py new file mode 100644 index 0000000..55d33e0 --- /dev/null +++ b/crawl/wp.py @@ -0,0 +1,171 @@ +import json +from datetime import datetime + +# PDF courses: (SP, Name, Dozent, SWS, CP, Turnus, shortname in WebUntis) +# Only courses with SoSe or WiSe/SoSe turnus are relevant +PDF_COURSES = [ + # Aufbau + ("Aufbau", "Praktikum Messtechnik", "Großmann", 2, 2, "SoSe", "E-MT.PR"), + # Automat./Robotik + ("Autom./Robotik", "Antriebstechnik", "Meyer", 4, 5, "SoSe", "E-ANT"), + ("Autom./Robotik", "Automatisierungstechnik 1", "Zeller / Danzer", 4, 5, "WiSe/SoSe", "E-AUT.1"), + ("Autom./Robotik", "Praktikum Automatisierungstechnik", "Zeller / Danzer", 2, 2, "WiSe/SoSe", "E-AUT.PR"), + ("Autom./Robotik", "Robot Systems Engineering", "Dietrich", 4, 5, "SoSe", "E-RSE"), + # Elektronik + ("Elektronik", "Automobilelektronik", "Schurk", 2, 2, "WiSe/SoSe", "E-%AUTOM"), + ("Elektronik", "Formula Student Electric", "Markgraf", 4, 5, "WiSe/SoSe", None), + ("Elektronik", "Fortgeschrittene Messtechnik", "Großmann", 4, 5, "SoSe", "E-FMT"), + ("Elektronik", "Schaltungstechnik", "Zedler", 4, 5, "WiSe/SoSe", "E-SCHT"), + ("Elektronik", "Elektrische Maschinen", "Meyer", 4, 5, "SoSe", "E-ELMA"), + # Energietechnik + ("Energietechnik", "Erneuerbare Energien", "Schwägerl", 4, 5, "SoSe", "E-EREN"), + ("Energietechnik", "Praktikum Erneuerbare Energien", "Schwägerl", 2, 2, "WiSe/SoSe", "E-EREN.PR"), + ("Energietechnik", "Hochspannungstechnik", "Finkel", 4, 5, "SoSe", "E-HST"), + ("Energietechnik", "Leistungselektronik", "Ritter", 4, 5, "SoSe", "E-LE"), + # Information + ("Information", "Eingebettete Echtzeitsysteme mit Praktikum", "Werthschulte", 4, 5, "SoSe", "E-EES"), + ("Information", "Künstliche Intelligenz: Grundlagen und Anwendungen", "Legat", 4, 5, "SoSe", None), + ("Information", "Digitale Zwillinge: Grundkonzepte und Anwendungen", "Legat", 4, 5, "SoSe", "E-DIGTWIN"), + ("Information", "Industrial Security Basics", "Hollmann", 4, 5, "SoSe", "E-IS1C5"), + ("Information", "Matlab/Simulink", "Werthschulte", 2, 2, "WiSe/SoSe", "E-MATLAB"), + # Kommunikation + ("Kommunikation", "Digitale Kommunikation mit Praktikum", "Kamuf", 4, 5, "SoSe", "E-DIGK"), + ("Kommunikation", "Hochfrequenz-Schaltungstechnik mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSCH"), + ("Kommunikation", "Hochfrequenzsysteme mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSYS"), + ("Kommunikation", "Funktechnik in der Praxis", "Bögl", 2, 2, "SoSe", None), + # Übergreifend + ("Übergreifend", "Fertigungstechnik", "Dietrich", 4, 5, "SoSe", "E-FT"), + ("Übergreifend", "Systems Engineering", "Frommelt", 4, 5, "WiSe/SoSe", None), + ("Übergreifend", "Elektrokonstruktion mit E-Plan", "Danzer / Voicau-Ottlik", 2, 2, "SoSe", "E-%EPLAN"), + ("Übergreifend", "Elektronikproduktion", "Dietrich / Baur", 2, 2, "SoSe", "E-EP"), + ("Übergreifend", "Systemdenken im Produktentstehungsprozess", "Königbauer", 4, 5, "SoSe", "E-THINK"), + ("Übergreifend", "Advanced Topics in Electrical Engineering", "Gastdozierende", 2, 2, "WiSe/SoSe", None), + ("Übergreifend", "Amateurfunk", "Stolle", 2, 2, "WiSe/SoSe", None), +] + +DAY_NAMES = { + 0: "Montag", + 1: "Dienstag", + 2: "Mittwoch", + 3: "Donnerstag", + 4: "Freitag", + 5: "Samstag", +} + +with open("timetables.json", 'r') as f: + timetables = json.load(f) + +# Build lookup by shortname +by_shortname = {} +for course in timetables: + by_shortname[course["shortname"]] = course + +def get_slots(course_data): + """Extract (day_name, start_time, end_time, room) tuples from timetable data.""" + slots = [] + for day in course_data["timetable"].get("days", []): + for grid in day.get("gridEntries", []): + start = datetime.fromisoformat(grid["duration"]["start"]) + end = datetime.fromisoformat(grid["duration"]["end"]) + day_name = DAY_NAMES[start.weekday()] + start_time = start.strftime("%H:%M") + end_time = end.strftime("%H:%M") + + rooms = [] + for room in (grid.get("position2") or []): + if room.get("current"): + rooms.append(room["current"]["displayName"]) + room_str = "/".join(rooms) if rooms else "" + + slots.append((day_name, start_time, end_time, room_str)) + return slots + +def merge_slots(slots): + """Merge consecutive slots on the same day into single blocks.""" + if not slots: + return [] + # Group by day + by_day = {} + for day, start, end, room in slots: + by_day.setdefault(day, []).append((start, end, room)) + + merged = [] + for day in DAY_NAMES.values(): + if day not in by_day: + continue + entries = sorted(by_day[day]) + # Merge consecutive entries (end of one == start of next, same day) + current_start, current_end, current_room = entries[0] + rooms = {current_room} if current_room else set() + for start, end, room in entries[1:]: + if start <= current_end: + current_end = max(current_end, end) + if room: + rooms.add(room) + else: + merged.append((day, current_start, current_end, "/".join(sorted(rooms)))) + current_start, current_end = start, end + rooms = {room} if room else set() + merged.append((day, current_start, current_end, "/".join(sorted(rooms)))) + return merged + +kurse = [] +no_data = [] + +for sp, name, dozent, sws, cp, turnus, shortname in PDF_COURSES: + semester = "WS/SS" if "WiSe/SoSe" in turnus else "SS" + + course_data = by_shortname.get(shortname) if shortname else None + + if course_data: + slots = get_slots(course_data) + merged = merge_slots(slots) + else: + merged = [] + + if not merged: + no_data.append(name) + + entry = { + "Vertiefung": sp, + "Name": name, + "Dozent": dozent, + "Semester": semester, + "SWS": str(sws), + "CP": str(cp), + "Tag1": "", + "Startzeit1": "", + "Ende1": "", + "Raum1": "", + "Tag2": "", + "Startzeit2": "", + "Ende2": "", + "Raum2": "", + "Tag3": "", + "Startzeit3": "", + "Ende3": "", + "Raum3": "", + "Tag4": "", + "Startzeit4": "", + "Ende4": "", + "Raum4": "", + } + + for i, slot in enumerate(merged[:4], 1): + entry[f"Tag{i}"] = slot[0] + entry[f"Startzeit{i}"] = slot[1] + entry[f"Ende{i}"] = slot[2] + entry[f"Raum{i}"] = slot[3] + if len(merged) > 4: + print(f" WARNING: {name} has {len(merged)} time blocks, only first 4 used") + + kurse.append(entry) + +with open("kurse.json", 'w') as f: + json.dump(kurse, f, ensure_ascii=False, indent=2) + +print(f"Generated kurse.json with {len(kurse)} courses") +if no_data: + print(f"\nNo timetable data for {len(no_data)} courses:") + for n in no_data: + print(f" - {n}") |
