aboutsummaryrefslogtreecommitdiff
path: root/crawl/wp.py
diff options
context:
space:
mode:
authorFriedrich Beckmann <friedrich.beckmann@tha.de>2026-03-19 16:23:55 +0100
committerFriedrich Beckmann <friedrich.beckmann@tha.de>2026-03-19 16:23:55 +0100
commitba2d143a8f0328d61294e84d46c4ed55ff10421e (patch)
treedf7d68dcba756d86351ec0287897b8a14dee79b9 /crawl/wp.py
parent12a87277041c82edcec46b4f05e35abac632d271 (diff)
update crawler to retrieve data from webuntis
Diffstat (limited to 'crawl/wp.py')
-rw-r--r--crawl/wp.py171
1 files changed, 171 insertions, 0 deletions
diff --git a/crawl/wp.py b/crawl/wp.py
new file mode 100644
index 0000000..55d33e0
--- /dev/null
+++ b/crawl/wp.py
@@ -0,0 +1,171 @@
+import json
+from datetime import datetime
+
+# PDF courses: (SP, Name, Dozent, SWS, CP, Turnus, shortname in WebUntis)
+# Only courses with SoSe or WiSe/SoSe turnus are relevant
+PDF_COURSES = [
+ # Aufbau
+ ("Aufbau", "Praktikum Messtechnik", "Großmann", 2, 2, "SoSe", "E-MT.PR"),
+ # Automat./Robotik
+ ("Autom./Robotik", "Antriebstechnik", "Meyer", 4, 5, "SoSe", "E-ANT"),
+ ("Autom./Robotik", "Automatisierungstechnik 1", "Zeller / Danzer", 4, 5, "WiSe/SoSe", "E-AUT.1"),
+ ("Autom./Robotik", "Praktikum Automatisierungstechnik", "Zeller / Danzer", 2, 2, "WiSe/SoSe", "E-AUT.PR"),
+ ("Autom./Robotik", "Robot Systems Engineering", "Dietrich", 4, 5, "SoSe", "E-RSE"),
+ # Elektronik
+ ("Elektronik", "Automobilelektronik", "Schurk", 2, 2, "WiSe/SoSe", "E-%AUTOM"),
+ ("Elektronik", "Formula Student Electric", "Markgraf", 4, 5, "WiSe/SoSe", None),
+ ("Elektronik", "Fortgeschrittene Messtechnik", "Großmann", 4, 5, "SoSe", "E-FMT"),
+ ("Elektronik", "Schaltungstechnik", "Zedler", 4, 5, "WiSe/SoSe", "E-SCHT"),
+ ("Elektronik", "Elektrische Maschinen", "Meyer", 4, 5, "SoSe", "E-ELMA"),
+ # Energietechnik
+ ("Energietechnik", "Erneuerbare Energien", "Schwägerl", 4, 5, "SoSe", "E-EREN"),
+ ("Energietechnik", "Praktikum Erneuerbare Energien", "Schwägerl", 2, 2, "WiSe/SoSe", "E-EREN.PR"),
+ ("Energietechnik", "Hochspannungstechnik", "Finkel", 4, 5, "SoSe", "E-HST"),
+ ("Energietechnik", "Leistungselektronik", "Ritter", 4, 5, "SoSe", "E-LE"),
+ # Information
+ ("Information", "Eingebettete Echtzeitsysteme mit Praktikum", "Werthschulte", 4, 5, "SoSe", "E-EES"),
+ ("Information", "Künstliche Intelligenz: Grundlagen und Anwendungen", "Legat", 4, 5, "SoSe", None),
+ ("Information", "Digitale Zwillinge: Grundkonzepte und Anwendungen", "Legat", 4, 5, "SoSe", "E-DIGTWIN"),
+ ("Information", "Industrial Security Basics", "Hollmann", 4, 5, "SoSe", "E-IS1C5"),
+ ("Information", "Matlab/Simulink", "Werthschulte", 2, 2, "WiSe/SoSe", "E-MATLAB"),
+ # Kommunikation
+ ("Kommunikation", "Digitale Kommunikation mit Praktikum", "Kamuf", 4, 5, "SoSe", "E-DIGK"),
+ ("Kommunikation", "Hochfrequenz-Schaltungstechnik mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSCH"),
+ ("Kommunikation", "Hochfrequenzsysteme mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSYS"),
+ ("Kommunikation", "Funktechnik in der Praxis", "Bögl", 2, 2, "SoSe", None),
+ # Übergreifend
+ ("Übergreifend", "Fertigungstechnik", "Dietrich", 4, 5, "SoSe", "E-FT"),
+ ("Übergreifend", "Systems Engineering", "Frommelt", 4, 5, "WiSe/SoSe", None),
+ ("Übergreifend", "Elektrokonstruktion mit E-Plan", "Danzer / Voicau-Ottlik", 2, 2, "SoSe", "E-%EPLAN"),
+ ("Übergreifend", "Elektronikproduktion", "Dietrich / Baur", 2, 2, "SoSe", "E-EP"),
+ ("Übergreifend", "Systemdenken im Produktentstehungsprozess", "Königbauer", 4, 5, "SoSe", "E-THINK"),
+ ("Übergreifend", "Advanced Topics in Electrical Engineering", "Gastdozierende", 2, 2, "WiSe/SoSe", None),
+ ("Übergreifend", "Amateurfunk", "Stolle", 2, 2, "WiSe/SoSe", None),
+]
+
+DAY_NAMES = {
+ 0: "Montag",
+ 1: "Dienstag",
+ 2: "Mittwoch",
+ 3: "Donnerstag",
+ 4: "Freitag",
+ 5: "Samstag",
+}
+
+with open("timetables.json", 'r') as f:
+ timetables = json.load(f)
+
+# Build lookup by shortname
+by_shortname = {}
+for course in timetables:
+ by_shortname[course["shortname"]] = course
+
+def get_slots(course_data):
+ """Extract (day_name, start_time, end_time, room) tuples from timetable data."""
+ slots = []
+ for day in course_data["timetable"].get("days", []):
+ for grid in day.get("gridEntries", []):
+ start = datetime.fromisoformat(grid["duration"]["start"])
+ end = datetime.fromisoformat(grid["duration"]["end"])
+ day_name = DAY_NAMES[start.weekday()]
+ start_time = start.strftime("%H:%M")
+ end_time = end.strftime("%H:%M")
+
+ rooms = []
+ for room in (grid.get("position2") or []):
+ if room.get("current"):
+ rooms.append(room["current"]["displayName"])
+ room_str = "/".join(rooms) if rooms else ""
+
+ slots.append((day_name, start_time, end_time, room_str))
+ return slots
+
+def merge_slots(slots):
+ """Merge consecutive slots on the same day into single blocks."""
+ if not slots:
+ return []
+ # Group by day
+ by_day = {}
+ for day, start, end, room in slots:
+ by_day.setdefault(day, []).append((start, end, room))
+
+ merged = []
+ for day in DAY_NAMES.values():
+ if day not in by_day:
+ continue
+ entries = sorted(by_day[day])
+ # Merge consecutive entries (end of one == start of next, same day)
+ current_start, current_end, current_room = entries[0]
+ rooms = {current_room} if current_room else set()
+ for start, end, room in entries[1:]:
+ if start <= current_end:
+ current_end = max(current_end, end)
+ if room:
+ rooms.add(room)
+ else:
+ merged.append((day, current_start, current_end, "/".join(sorted(rooms))))
+ current_start, current_end = start, end
+ rooms = {room} if room else set()
+ merged.append((day, current_start, current_end, "/".join(sorted(rooms))))
+ return merged
+
+kurse = []
+no_data = []
+
+for sp, name, dozent, sws, cp, turnus, shortname in PDF_COURSES:
+ semester = "WS/SS" if "WiSe/SoSe" in turnus else "SS"
+
+ course_data = by_shortname.get(shortname) if shortname else None
+
+ if course_data:
+ slots = get_slots(course_data)
+ merged = merge_slots(slots)
+ else:
+ merged = []
+
+ if not merged:
+ no_data.append(name)
+
+ entry = {
+ "Vertiefung": sp,
+ "Name": name,
+ "Dozent": dozent,
+ "Semester": semester,
+ "SWS": str(sws),
+ "CP": str(cp),
+ "Tag1": "",
+ "Startzeit1": "",
+ "Ende1": "",
+ "Raum1": "",
+ "Tag2": "",
+ "Startzeit2": "",
+ "Ende2": "",
+ "Raum2": "",
+ "Tag3": "",
+ "Startzeit3": "",
+ "Ende3": "",
+ "Raum3": "",
+ "Tag4": "",
+ "Startzeit4": "",
+ "Ende4": "",
+ "Raum4": "",
+ }
+
+ for i, slot in enumerate(merged[:4], 1):
+ entry[f"Tag{i}"] = slot[0]
+ entry[f"Startzeit{i}"] = slot[1]
+ entry[f"Ende{i}"] = slot[2]
+ entry[f"Raum{i}"] = slot[3]
+ if len(merged) > 4:
+ print(f" WARNING: {name} has {len(merged)} time blocks, only first 4 used")
+
+ kurse.append(entry)
+
+with open("kurse.json", 'w') as f:
+ json.dump(kurse, f, ensure_ascii=False, indent=2)
+
+print(f"Generated kurse.json with {len(kurse)} courses")
+if no_data:
+ print(f"\nNo timetable data for {len(no_data)} courses:")
+ for n in no_data:
+ print(f" - {n}")