From ba2d143a8f0328d61294e84d46c4ed55ff10421e Mon Sep 17 00:00:00 2001 From: Friedrich Beckmann Date: Thu, 19 Mar 2026 16:23:55 +0100 Subject: update crawler to retrieve data from webuntis --- crawl/analyze.py | 122 +++++++++++++++++++++++++++++++++++++++ crawl/crawl.py | 96 +++++++++++++++++++++++++++++++ crawl/get.py | 75 ++++++++++++++++++++++++ crawl/wp.py | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 464 insertions(+) create mode 100644 crawl/analyze.py create mode 100644 crawl/crawl.py create mode 100644 crawl/get.py create mode 100644 crawl/wp.py (limited to 'crawl') diff --git a/crawl/analyze.py b/crawl/analyze.py new file mode 100644 index 0000000..add1484 --- /dev/null +++ b/crawl/analyze.py @@ -0,0 +1,122 @@ +import time +import json + +with open("subjects.json", 'r') as f: + subjects = json.load(f); + +with open("teachers.json", 'r') as f: + teachers=json.load(f); + +with open("timetables.json", 'r') as f: + timetables=json.load(f); + + +allteachers = teachers["teachers"]; +myteachers = []; +for teacher in allteachers: + fullname = teacher["teacher"]["displayName"]; + try: + [surname, firstname] = fullname.split(); + except ValueError: + surname = ""; + firstname = ""; + faculty = teacher["departments"][0]["shortName"] if teacher["departments"] else ""; + myteachers.append({"fullname": fullname, + "surname": surname, + "firstname": firstname, + "faculty": faculty}); + +with open("myteachers.json", 'w') as f: + json.dump(myteachers, f); + +myfaculties=[]; +allfaculties = teachers["departments"]; +for faculty in allfaculties: + shortname = faculty["shortName"]; + longname = faculty["displayName"]; + myfaculties.append({"shortname": shortname, "longname": longname}); +with open("myfaculties.json", 'w') as f: + json.dump(myfaculties, f); + + +myrealteachers=set(); +mytimetable={}; +myrooms={}; +mycourses=[]; +for course in timetables: + melpomeid = course.get("id", course.get("melpomeid")); + shortname = course["shortname"]; + longname = course["longname"]; + timetable = course["timetable"]; + days=timetable["days"]; + courseteachers=set(); + courserooms=set(); + courseklassen=set(); + courseslots=[]; + for day in days: + grids=day["gridEntries"]; + for grid in grids: + if grid["position1"]: + for klasse in grid["position1"]: + if not klasse.get("current"): + continue; + klassenname=klasse["current"]["displayName"]; + courseklassen.add(klassenname) + if grid["position2"]: + for room in grid["position2"]: + if not room.get("current"): + continue; + roomname=room["current"]["displayName"]; + roomlongname=room["current"]["longName"]; + myrooms[roomname] = roomlongname; + courserooms.add(roomname); + cteachers=grid["position3"]; + if cteachers : + for teacher in cteachers: + if not teacher.get("current"): + continue; + teachername=teacher["current"]["displayName"]; + print(teachername); + myrealteachers.add(teachername); + courseteachers.add(teachername); + duration=grid["duration"]; + courseslots.append({"teachers": list(courseteachers), + "rooms": list(courserooms), + "klassen": list(courseklassen), + "info": grid["lessonInfo"], + "time": duration}); + mycourses.append({"melpomeid": melpomeid, + "shortname": shortname, + "longname": longname, +# "timetable": timetable, + "slots": courseslots}); + +#print(mycourses); + +with open("mycourses.json", 'w') as f: + json.dump(mycourses, f); + +myrealcourses=[]; +for c in mycourses: + if len(c["slots"]) > 0 : + myrealcourses.append(c); + +with open("myrealcourses.json", 'w') as f: + json.dump(myrealcourses, f); + +myrealteachers_upper = {name.upper() for name in myrealteachers}; +myteachersincourses=[]; +for teacher in myteachers: + fullname = teacher["fullname"]; + if fullname.upper() in myrealteachers_upper : + myteachersincourses.append(teacher); + +with open("myrealteachers.json", 'w') as f: + json.dump(myteachersincourses, f); + +print("Total number of courses: " + str(len(mycourses))); +print("Courses in timetable: " + str(len(myrealcourses))); +print("Total number of teachers: " + str(len(myteachers))); +print("Teachers in timetable: " + str(len(myteachersincourses))); + + diff --git a/crawl/crawl.py b/crawl/crawl.py new file mode 100644 index 0000000..49cce83 --- /dev/null +++ b/crawl/crawl.py @@ -0,0 +1,96 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from dotenv import load_dotenv +import requests +import json +import os + +load_dotenv() + +username = os.environ["WEBUNTIS_USER"] +password = os.environ["WEBUNTIS_PASS"] + +driver = webdriver.Chrome() +driver.get("https://tha.webuntis.com/WebUntis/#/basic/login") + +wait = WebDriverWait(driver, 15) + +user_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.un-input-group__input[type='text']"))) +pass_input = driver.find_element(By.CSS_SELECTOR, "input.un-input-group__input[type='password']") + +user_input.send_keys(username) +pass_input.send_keys(password) + +login_btn = driver.find_element(By.XPATH, "//button[contains(@class, 'redesigned-button__primary') and text()='Login']") +login_btn.click() + +# Wait until login completes +wait.until(EC.url_changes("https://tha.webuntis.com/WebUntis/#/basic/login")) + +# Get Bearer token in the browser before closing +driver.set_script_timeout(15) +bearer_token = driver.execute_async_script(""" + var callback = arguments[arguments.length - 1]; + var xhr = new XMLHttpRequest(); + xhr.open('GET', '/WebUntis/api/token/new'); + xhr.onload = function() { callback(xhr.responseText); }; + xhr.send(); +""") +print(f"Token: {bearer_token[:50]}...") + +cookies = driver.get_cookies() +driver.quit() + +s = requests.Session() +for cookie in cookies: + s.cookies.set(cookie['name'], cookie['value'], domain=cookie.get('domain', '')) + +BASE = "https://tha.webuntis.com/WebUntis" + +headers = { + "Authorization": f"Bearer {bearer_token}", +} + +print("Teachers") +result = s.get(f"{BASE}/api/rest/view/v1/timetable/filter?resourceType=TEACHER&timetableType=STANDARD", headers=headers) +teachers = result.json() +with open("teachers.json", 'w') as f: + json.dump(teachers, f) + +print("Subjects") +result = s.get(f"{BASE}/api/rest/view/v1/timetable/filter?resourceType=SUBJECT&timetableType=STANDARD", headers=headers) +subjects = result.json() +with open("subjects.json", 'w') as f: + json.dump(subjects, f) + +DATE_START = "2026-03-16" +DATE_END = "2026-03-21" + +print("Timetables by subject (format=2: class, room, teacher)") +timetables = [] +for sj in subjects["subjects"]: + subject = sj["subject"] + sid = subject["id"] + shortname = subject["shortName"] + longname = subject["longName"] + print(longname) + result = s.get( + f"{BASE}/api/rest/view/v1/timetable/entries" + f"?start={DATE_START}&end={DATE_END}&format=2" + f"&resourceType=SUBJECT&resources={sid}" + f"&periodTypes=&timetableType=STANDARD", + headers=headers) + timetable = result.json() + timetables.append({ + "id": sid, + "shortname": shortname, + "longname": longname, + "timetable": timetable, + }) + +with open("timetables.json", 'w') as f: + json.dump(timetables, f) + +print("Done") diff --git a/crawl/get.py b/crawl/get.py new file mode 100644 index 0000000..2f72ef1 --- /dev/null +++ b/crawl/get.py @@ -0,0 +1,75 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +import requests +import time +import json + +driver = webdriver.Chrome() +driver.get("https://melpomene.webuntis.com/WebUntis/?school=HS-Augsburg#/basic/timetablePublic") + +time.sleep(5) + +cookies = driver.get_cookies() + +s = requests.Session() +for cookie in cookies: + s.cookies.set(cookie['name'], cookie['value']) + +myheaders = { + "Accept": "application/json, text/plain, */*", + "anonymous-school": "HS-Augsburg"} + +myrealteachers=set(); +myrooms={}; +mycourses=[]; + +melpomeid = 14880; +shortname="FredK"; +longname="FredL"; + +result = s.get("https://melpomene.webuntis.com/WebUntis/api/rest/view/v1/timetable/entries?start=2025-04-07&end=2025-04-12&format=3&resourceType=SUBJECT&resources=" + str(melpomeid) + "&periodTypes=&timetableType=STANDARD", + headers=myheaders); +timetable = json.loads(result.text); +print(timetable); +days=timetable["days"]; +courseteachers=set(); +courserooms=set(); +courseklassen=set(); +courseslots=[]; +for day in days: + grids=day["gridEntries"]; + for grid in grids: + print(grid); + teachers=grid["position2"]; + for teacher in teachers: + teachername=teacher["current"]["displayName"]; + print(teachername); + courseteachers.add(teachername); + try: + for room in grid["position3"]: + roomname=room["current"]["displayName"]; + roomlongname=room["current"]["longName"]; + print(roomname); + myrooms[roomname] = roomlongname; + courserooms.add(roomname); + except TypeError: + print("Cannot handle: "); + print(grid); + pass; + for klasse in grid["position1"]: + klassenname=klasse["current"]["displayName"]; + print(klassenname); + courseklassen.add(klassenname) + duration=grid["duration"]; + print(duration); + courseslots.append({"teachers": list(courseteachers), + "rooms": list(courserooms), + "klassen": list(courseklassen), + "time": duration}); +mycourses.append({"melpomeid": melpomeid, + "shortname": shortname, + "longname": longname, + "slots": courseslots}); + +print(mycourses); + diff --git a/crawl/wp.py b/crawl/wp.py new file mode 100644 index 0000000..55d33e0 --- /dev/null +++ b/crawl/wp.py @@ -0,0 +1,171 @@ +import json +from datetime import datetime + +# PDF courses: (SP, Name, Dozent, SWS, CP, Turnus, shortname in WebUntis) +# Only courses with SoSe or WiSe/SoSe turnus are relevant +PDF_COURSES = [ + # Aufbau + ("Aufbau", "Praktikum Messtechnik", "Großmann", 2, 2, "SoSe", "E-MT.PR"), + # Automat./Robotik + ("Autom./Robotik", "Antriebstechnik", "Meyer", 4, 5, "SoSe", "E-ANT"), + ("Autom./Robotik", "Automatisierungstechnik 1", "Zeller / Danzer", 4, 5, "WiSe/SoSe", "E-AUT.1"), + ("Autom./Robotik", "Praktikum Automatisierungstechnik", "Zeller / Danzer", 2, 2, "WiSe/SoSe", "E-AUT.PR"), + ("Autom./Robotik", "Robot Systems Engineering", "Dietrich", 4, 5, "SoSe", "E-RSE"), + # Elektronik + ("Elektronik", "Automobilelektronik", "Schurk", 2, 2, "WiSe/SoSe", "E-%AUTOM"), + ("Elektronik", "Formula Student Electric", "Markgraf", 4, 5, "WiSe/SoSe", None), + ("Elektronik", "Fortgeschrittene Messtechnik", "Großmann", 4, 5, "SoSe", "E-FMT"), + ("Elektronik", "Schaltungstechnik", "Zedler", 4, 5, "WiSe/SoSe", "E-SCHT"), + ("Elektronik", "Elektrische Maschinen", "Meyer", 4, 5, "SoSe", "E-ELMA"), + # Energietechnik + ("Energietechnik", "Erneuerbare Energien", "Schwägerl", 4, 5, "SoSe", "E-EREN"), + ("Energietechnik", "Praktikum Erneuerbare Energien", "Schwägerl", 2, 2, "WiSe/SoSe", "E-EREN.PR"), + ("Energietechnik", "Hochspannungstechnik", "Finkel", 4, 5, "SoSe", "E-HST"), + ("Energietechnik", "Leistungselektronik", "Ritter", 4, 5, "SoSe", "E-LE"), + # Information + ("Information", "Eingebettete Echtzeitsysteme mit Praktikum", "Werthschulte", 4, 5, "SoSe", "E-EES"), + ("Information", "Künstliche Intelligenz: Grundlagen und Anwendungen", "Legat", 4, 5, "SoSe", None), + ("Information", "Digitale Zwillinge: Grundkonzepte und Anwendungen", "Legat", 4, 5, "SoSe", "E-DIGTWIN"), + ("Information", "Industrial Security Basics", "Hollmann", 4, 5, "SoSe", "E-IS1C5"), + ("Information", "Matlab/Simulink", "Werthschulte", 2, 2, "WiSe/SoSe", "E-MATLAB"), + # Kommunikation + ("Kommunikation", "Digitale Kommunikation mit Praktikum", "Kamuf", 4, 5, "SoSe", "E-DIGK"), + ("Kommunikation", "Hochfrequenz-Schaltungstechnik mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSCH"), + ("Kommunikation", "Hochfrequenzsysteme mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSYS"), + ("Kommunikation", "Funktechnik in der Praxis", "Bögl", 2, 2, "SoSe", None), + # Übergreifend + ("Übergreifend", "Fertigungstechnik", "Dietrich", 4, 5, "SoSe", "E-FT"), + ("Übergreifend", "Systems Engineering", "Frommelt", 4, 5, "WiSe/SoSe", None), + ("Übergreifend", "Elektrokonstruktion mit E-Plan", "Danzer / Voicau-Ottlik", 2, 2, "SoSe", "E-%EPLAN"), + ("Übergreifend", "Elektronikproduktion", "Dietrich / Baur", 2, 2, "SoSe", "E-EP"), + ("Übergreifend", "Systemdenken im Produktentstehungsprozess", "Königbauer", 4, 5, "SoSe", "E-THINK"), + ("Übergreifend", "Advanced Topics in Electrical Engineering", "Gastdozierende", 2, 2, "WiSe/SoSe", None), + ("Übergreifend", "Amateurfunk", "Stolle", 2, 2, "WiSe/SoSe", None), +] + +DAY_NAMES = { + 0: "Montag", + 1: "Dienstag", + 2: "Mittwoch", + 3: "Donnerstag", + 4: "Freitag", + 5: "Samstag", +} + +with open("timetables.json", 'r') as f: + timetables = json.load(f) + +# Build lookup by shortname +by_shortname = {} +for course in timetables: + by_shortname[course["shortname"]] = course + +def get_slots(course_data): + """Extract (day_name, start_time, end_time, room) tuples from timetable data.""" + slots = [] + for day in course_data["timetable"].get("days", []): + for grid in day.get("gridEntries", []): + start = datetime.fromisoformat(grid["duration"]["start"]) + end = datetime.fromisoformat(grid["duration"]["end"]) + day_name = DAY_NAMES[start.weekday()] + start_time = start.strftime("%H:%M") + end_time = end.strftime("%H:%M") + + rooms = [] + for room in (grid.get("position2") or []): + if room.get("current"): + rooms.append(room["current"]["displayName"]) + room_str = "/".join(rooms) if rooms else "" + + slots.append((day_name, start_time, end_time, room_str)) + return slots + +def merge_slots(slots): + """Merge consecutive slots on the same day into single blocks.""" + if not slots: + return [] + # Group by day + by_day = {} + for day, start, end, room in slots: + by_day.setdefault(day, []).append((start, end, room)) + + merged = [] + for day in DAY_NAMES.values(): + if day not in by_day: + continue + entries = sorted(by_day[day]) + # Merge consecutive entries (end of one == start of next, same day) + current_start, current_end, current_room = entries[0] + rooms = {current_room} if current_room else set() + for start, end, room in entries[1:]: + if start <= current_end: + current_end = max(current_end, end) + if room: + rooms.add(room) + else: + merged.append((day, current_start, current_end, "/".join(sorted(rooms)))) + current_start, current_end = start, end + rooms = {room} if room else set() + merged.append((day, current_start, current_end, "/".join(sorted(rooms)))) + return merged + +kurse = [] +no_data = [] + +for sp, name, dozent, sws, cp, turnus, shortname in PDF_COURSES: + semester = "WS/SS" if "WiSe/SoSe" in turnus else "SS" + + course_data = by_shortname.get(shortname) if shortname else None + + if course_data: + slots = get_slots(course_data) + merged = merge_slots(slots) + else: + merged = [] + + if not merged: + no_data.append(name) + + entry = { + "Vertiefung": sp, + "Name": name, + "Dozent": dozent, + "Semester": semester, + "SWS": str(sws), + "CP": str(cp), + "Tag1": "", + "Startzeit1": "", + "Ende1": "", + "Raum1": "", + "Tag2": "", + "Startzeit2": "", + "Ende2": "", + "Raum2": "", + "Tag3": "", + "Startzeit3": "", + "Ende3": "", + "Raum3": "", + "Tag4": "", + "Startzeit4": "", + "Ende4": "", + "Raum4": "", + } + + for i, slot in enumerate(merged[:4], 1): + entry[f"Tag{i}"] = slot[0] + entry[f"Startzeit{i}"] = slot[1] + entry[f"Ende{i}"] = slot[2] + entry[f"Raum{i}"] = slot[3] + if len(merged) > 4: + print(f" WARNING: {name} has {len(merged)} time blocks, only first 4 used") + + kurse.append(entry) + +with open("kurse.json", 'w') as f: + json.dump(kurse, f, ensure_ascii=False, indent=2) + +print(f"Generated kurse.json with {len(kurse)} courses") +if no_data: + print(f"\nNo timetable data for {len(no_data)} courses:") + for n in no_data: + print(f" - {n}") -- cgit v1.2.3