aboutsummaryrefslogtreecommitdiff
path: root/crawl
diff options
context:
space:
mode:
Diffstat (limited to 'crawl')
-rw-r--r--crawl/analyze.py122
-rw-r--r--crawl/crawl.py96
-rw-r--r--crawl/get.py75
-rw-r--r--crawl/wp.py171
4 files changed, 464 insertions, 0 deletions
diff --git a/crawl/analyze.py b/crawl/analyze.py
new file mode 100644
index 0000000..add1484
--- /dev/null
+++ b/crawl/analyze.py
@@ -0,0 +1,122 @@
+import time
+import json
+
+with open("subjects.json", 'r') as f:
+ subjects = json.load(f);
+
+with open("teachers.json", 'r') as f:
+ teachers=json.load(f);
+
+with open("timetables.json", 'r') as f:
+ timetables=json.load(f);
+
+
+allteachers = teachers["teachers"];
+myteachers = [];
+for teacher in allteachers:
+ fullname = teacher["teacher"]["displayName"];
+ try:
+ [surname, firstname] = fullname.split();
+ except ValueError:
+ surname = "";
+ firstname = "";
+ faculty = teacher["departments"][0]["shortName"] if teacher["departments"] else "";
+ myteachers.append({"fullname": fullname,
+ "surname": surname,
+ "firstname": firstname,
+ "faculty": faculty});
+
+with open("myteachers.json", 'w') as f:
+ json.dump(myteachers, f);
+
+myfaculties=[];
+allfaculties = teachers["departments"];
+for faculty in allfaculties:
+ shortname = faculty["shortName"];
+ longname = faculty["displayName"];
+ myfaculties.append({"shortname": shortname, "longname": longname});
+with open("myfaculties.json", 'w') as f:
+ json.dump(myfaculties, f);
+
+
+myrealteachers=set();
+mytimetable={};
+myrooms={};
+mycourses=[];
+for course in timetables:
+ melpomeid = course.get("id", course.get("melpomeid"));
+ shortname = course["shortname"];
+ longname = course["longname"];
+ timetable = course["timetable"];
+ days=timetable["days"];
+ courseteachers=set();
+ courserooms=set();
+ courseklassen=set();
+ courseslots=[];
+ for day in days:
+ grids=day["gridEntries"];
+ for grid in grids:
+ if grid["position1"]:
+ for klasse in grid["position1"]:
+ if not klasse.get("current"):
+ continue;
+ klassenname=klasse["current"]["displayName"];
+ courseklassen.add(klassenname)
+ if grid["position2"]:
+ for room in grid["position2"]:
+ if not room.get("current"):
+ continue;
+ roomname=room["current"]["displayName"];
+ roomlongname=room["current"]["longName"];
+ myrooms[roomname] = roomlongname;
+ courserooms.add(roomname);
+ cteachers=grid["position3"];
+ if cteachers :
+ for teacher in cteachers:
+ if not teacher.get("current"):
+ continue;
+ teachername=teacher["current"]["displayName"];
+ print(teachername);
+ myrealteachers.add(teachername);
+ courseteachers.add(teachername);
+ duration=grid["duration"];
+ courseslots.append({"teachers": list(courseteachers),
+ "rooms": list(courserooms),
+ "klassen": list(courseklassen),
+ "info": grid["lessonInfo"],
+ "time": duration});
+ mycourses.append({"melpomeid": melpomeid,
+ "shortname": shortname,
+ "longname": longname,
+# "timetable": timetable,
+ "slots": courseslots});
+
+#print(mycourses);
+
+with open("mycourses.json", 'w') as f:
+ json.dump(mycourses, f);
+
+myrealcourses=[];
+for c in mycourses:
+ if len(c["slots"]) > 0 :
+ myrealcourses.append(c);
+
+with open("myrealcourses.json", 'w') as f:
+ json.dump(myrealcourses, f);
+
+myrealteachers_upper = {name.upper() for name in myrealteachers};
+myteachersincourses=[];
+for teacher in myteachers:
+ fullname = teacher["fullname"];
+ if fullname.upper() in myrealteachers_upper :
+ myteachersincourses.append(teacher);
+
+with open("myrealteachers.json", 'w') as f:
+ json.dump(myteachersincourses, f);
+
+print("Total number of courses: " + str(len(mycourses)));
+print("Courses in timetable: " + str(len(myrealcourses)));
+print("Total number of teachers: " + str(len(myteachers)));
+print("Teachers in timetable: " + str(len(myteachersincourses)));
+
+
diff --git a/crawl/crawl.py b/crawl/crawl.py
new file mode 100644
index 0000000..49cce83
--- /dev/null
+++ b/crawl/crawl.py
@@ -0,0 +1,96 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from dotenv import load_dotenv
+import requests
+import json
+import os
+
+load_dotenv()
+
+username = os.environ["WEBUNTIS_USER"]
+password = os.environ["WEBUNTIS_PASS"]
+
+driver = webdriver.Chrome()
+driver.get("https://tha.webuntis.com/WebUntis/#/basic/login")
+
+wait = WebDriverWait(driver, 15)
+
+user_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.un-input-group__input[type='text']")))
+pass_input = driver.find_element(By.CSS_SELECTOR, "input.un-input-group__input[type='password']")
+
+user_input.send_keys(username)
+pass_input.send_keys(password)
+
+login_btn = driver.find_element(By.XPATH, "//button[contains(@class, 'redesigned-button__primary') and text()='Login']")
+login_btn.click()
+
+# Wait until login completes
+wait.until(EC.url_changes("https://tha.webuntis.com/WebUntis/#/basic/login"))
+
+# Get Bearer token in the browser before closing
+driver.set_script_timeout(15)
+bearer_token = driver.execute_async_script("""
+ var callback = arguments[arguments.length - 1];
+ var xhr = new XMLHttpRequest();
+ xhr.open('GET', '/WebUntis/api/token/new');
+ xhr.onload = function() { callback(xhr.responseText); };
+ xhr.send();
+""")
+print(f"Token: {bearer_token[:50]}...")
+
+cookies = driver.get_cookies()
+driver.quit()
+
+s = requests.Session()
+for cookie in cookies:
+ s.cookies.set(cookie['name'], cookie['value'], domain=cookie.get('domain', ''))
+
+BASE = "https://tha.webuntis.com/WebUntis"
+
+headers = {
+ "Authorization": f"Bearer {bearer_token}",
+}
+
+print("Teachers")
+result = s.get(f"{BASE}/api/rest/view/v1/timetable/filter?resourceType=TEACHER&timetableType=STANDARD", headers=headers)
+teachers = result.json()
+with open("teachers.json", 'w') as f:
+ json.dump(teachers, f)
+
+print("Subjects")
+result = s.get(f"{BASE}/api/rest/view/v1/timetable/filter?resourceType=SUBJECT&timetableType=STANDARD", headers=headers)
+subjects = result.json()
+with open("subjects.json", 'w') as f:
+ json.dump(subjects, f)
+
+DATE_START = "2026-03-16"
+DATE_END = "2026-03-21"
+
+print("Timetables by subject (format=2: class, room, teacher)")
+timetables = []
+for sj in subjects["subjects"]:
+ subject = sj["subject"]
+ sid = subject["id"]
+ shortname = subject["shortName"]
+ longname = subject["longName"]
+ print(longname)
+ result = s.get(
+ f"{BASE}/api/rest/view/v1/timetable/entries"
+ f"?start={DATE_START}&end={DATE_END}&format=2"
+ f"&resourceType=SUBJECT&resources={sid}"
+ f"&periodTypes=&timetableType=STANDARD",
+ headers=headers)
+ timetable = result.json()
+ timetables.append({
+ "id": sid,
+ "shortname": shortname,
+ "longname": longname,
+ "timetable": timetable,
+ })
+
+with open("timetables.json", 'w') as f:
+ json.dump(timetables, f)
+
+print("Done")
diff --git a/crawl/get.py b/crawl/get.py
new file mode 100644
index 0000000..2f72ef1
--- /dev/null
+++ b/crawl/get.py
@@ -0,0 +1,75 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+import requests
+import time
+import json
+
+driver = webdriver.Chrome()
+driver.get("https://melpomene.webuntis.com/WebUntis/?school=HS-Augsburg#/basic/timetablePublic")
+
+time.sleep(5)
+
+cookies = driver.get_cookies()
+
+s = requests.Session()
+for cookie in cookies:
+ s.cookies.set(cookie['name'], cookie['value'])
+
+myheaders = {
+ "Accept": "application/json, text/plain, */*",
+ "anonymous-school": "HS-Augsburg"}
+
+myrealteachers=set();
+myrooms={};
+mycourses=[];
+
+melpomeid = 14880;
+shortname="FredK";
+longname="FredL";
+
+result = s.get("https://melpomene.webuntis.com/WebUntis/api/rest/view/v1/timetable/entries?start=2025-04-07&end=2025-04-12&format=3&resourceType=SUBJECT&resources=" + str(melpomeid) + "&periodTypes=&timetableType=STANDARD",
+ headers=myheaders);
+timetable = json.loads(result.text);
+print(timetable);
+days=timetable["days"];
+courseteachers=set();
+courserooms=set();
+courseklassen=set();
+courseslots=[];
+for day in days:
+ grids=day["gridEntries"];
+ for grid in grids:
+ print(grid);
+ teachers=grid["position2"];
+ for teacher in teachers:
+ teachername=teacher["current"]["displayName"];
+ print(teachername);
+ courseteachers.add(teachername);
+ try:
+ for room in grid["position3"]:
+ roomname=room["current"]["displayName"];
+ roomlongname=room["current"]["longName"];
+ print(roomname);
+ myrooms[roomname] = roomlongname;
+ courserooms.add(roomname);
+ except TypeError:
+ print("Cannot handle: ");
+ print(grid);
+ pass;
+ for klasse in grid["position1"]:
+ klassenname=klasse["current"]["displayName"];
+ print(klassenname);
+ courseklassen.add(klassenname)
+ duration=grid["duration"];
+ print(duration);
+ courseslots.append({"teachers": list(courseteachers),
+ "rooms": list(courserooms),
+ "klassen": list(courseklassen),
+ "time": duration});
+mycourses.append({"melpomeid": melpomeid,
+ "shortname": shortname,
+ "longname": longname,
+ "slots": courseslots});
+
+print(mycourses);
+
diff --git a/crawl/wp.py b/crawl/wp.py
new file mode 100644
index 0000000..55d33e0
--- /dev/null
+++ b/crawl/wp.py
@@ -0,0 +1,171 @@
+import json
+from datetime import datetime
+
+# PDF courses: (SP, Name, Dozent, SWS, CP, Turnus, shortname in WebUntis)
+# Only courses with SoSe or WiSe/SoSe turnus are relevant
+PDF_COURSES = [
+ # Aufbau
+ ("Aufbau", "Praktikum Messtechnik", "Großmann", 2, 2, "SoSe", "E-MT.PR"),
+ # Automat./Robotik
+ ("Autom./Robotik", "Antriebstechnik", "Meyer", 4, 5, "SoSe", "E-ANT"),
+ ("Autom./Robotik", "Automatisierungstechnik 1", "Zeller / Danzer", 4, 5, "WiSe/SoSe", "E-AUT.1"),
+ ("Autom./Robotik", "Praktikum Automatisierungstechnik", "Zeller / Danzer", 2, 2, "WiSe/SoSe", "E-AUT.PR"),
+ ("Autom./Robotik", "Robot Systems Engineering", "Dietrich", 4, 5, "SoSe", "E-RSE"),
+ # Elektronik
+ ("Elektronik", "Automobilelektronik", "Schurk", 2, 2, "WiSe/SoSe", "E-%AUTOM"),
+ ("Elektronik", "Formula Student Electric", "Markgraf", 4, 5, "WiSe/SoSe", None),
+ ("Elektronik", "Fortgeschrittene Messtechnik", "Großmann", 4, 5, "SoSe", "E-FMT"),
+ ("Elektronik", "Schaltungstechnik", "Zedler", 4, 5, "WiSe/SoSe", "E-SCHT"),
+ ("Elektronik", "Elektrische Maschinen", "Meyer", 4, 5, "SoSe", "E-ELMA"),
+ # Energietechnik
+ ("Energietechnik", "Erneuerbare Energien", "Schwägerl", 4, 5, "SoSe", "E-EREN"),
+ ("Energietechnik", "Praktikum Erneuerbare Energien", "Schwägerl", 2, 2, "WiSe/SoSe", "E-EREN.PR"),
+ ("Energietechnik", "Hochspannungstechnik", "Finkel", 4, 5, "SoSe", "E-HST"),
+ ("Energietechnik", "Leistungselektronik", "Ritter", 4, 5, "SoSe", "E-LE"),
+ # Information
+ ("Information", "Eingebettete Echtzeitsysteme mit Praktikum", "Werthschulte", 4, 5, "SoSe", "E-EES"),
+ ("Information", "Künstliche Intelligenz: Grundlagen und Anwendungen", "Legat", 4, 5, "SoSe", None),
+ ("Information", "Digitale Zwillinge: Grundkonzepte und Anwendungen", "Legat", 4, 5, "SoSe", "E-DIGTWIN"),
+ ("Information", "Industrial Security Basics", "Hollmann", 4, 5, "SoSe", "E-IS1C5"),
+ ("Information", "Matlab/Simulink", "Werthschulte", 2, 2, "WiSe/SoSe", "E-MATLAB"),
+ # Kommunikation
+ ("Kommunikation", "Digitale Kommunikation mit Praktikum", "Kamuf", 4, 5, "SoSe", "E-DIGK"),
+ ("Kommunikation", "Hochfrequenz-Schaltungstechnik mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSCH"),
+ ("Kommunikation", "Hochfrequenzsysteme mit Praktikum", "Stolle", 4, 5, "SoSe", "E-HFSYS"),
+ ("Kommunikation", "Funktechnik in der Praxis", "Bögl", 2, 2, "SoSe", None),
+ # Übergreifend
+ ("Übergreifend", "Fertigungstechnik", "Dietrich", 4, 5, "SoSe", "E-FT"),
+ ("Übergreifend", "Systems Engineering", "Frommelt", 4, 5, "WiSe/SoSe", None),
+ ("Übergreifend", "Elektrokonstruktion mit E-Plan", "Danzer / Voicau-Ottlik", 2, 2, "SoSe", "E-%EPLAN"),
+ ("Übergreifend", "Elektronikproduktion", "Dietrich / Baur", 2, 2, "SoSe", "E-EP"),
+ ("Übergreifend", "Systemdenken im Produktentstehungsprozess", "Königbauer", 4, 5, "SoSe", "E-THINK"),
+ ("Übergreifend", "Advanced Topics in Electrical Engineering", "Gastdozierende", 2, 2, "WiSe/SoSe", None),
+ ("Übergreifend", "Amateurfunk", "Stolle", 2, 2, "WiSe/SoSe", None),
+]
+
+DAY_NAMES = {
+ 0: "Montag",
+ 1: "Dienstag",
+ 2: "Mittwoch",
+ 3: "Donnerstag",
+ 4: "Freitag",
+ 5: "Samstag",
+}
+
+with open("timetables.json", 'r') as f:
+ timetables = json.load(f)
+
+# Build lookup by shortname
+by_shortname = {}
+for course in timetables:
+ by_shortname[course["shortname"]] = course
+
+def get_slots(course_data):
+ """Extract (day_name, start_time, end_time, room) tuples from timetable data."""
+ slots = []
+ for day in course_data["timetable"].get("days", []):
+ for grid in day.get("gridEntries", []):
+ start = datetime.fromisoformat(grid["duration"]["start"])
+ end = datetime.fromisoformat(grid["duration"]["end"])
+ day_name = DAY_NAMES[start.weekday()]
+ start_time = start.strftime("%H:%M")
+ end_time = end.strftime("%H:%M")
+
+ rooms = []
+ for room in (grid.get("position2") or []):
+ if room.get("current"):
+ rooms.append(room["current"]["displayName"])
+ room_str = "/".join(rooms) if rooms else ""
+
+ slots.append((day_name, start_time, end_time, room_str))
+ return slots
+
+def merge_slots(slots):
+ """Merge consecutive slots on the same day into single blocks."""
+ if not slots:
+ return []
+ # Group by day
+ by_day = {}
+ for day, start, end, room in slots:
+ by_day.setdefault(day, []).append((start, end, room))
+
+ merged = []
+ for day in DAY_NAMES.values():
+ if day not in by_day:
+ continue
+ entries = sorted(by_day[day])
+ # Merge consecutive entries (end of one == start of next, same day)
+ current_start, current_end, current_room = entries[0]
+ rooms = {current_room} if current_room else set()
+ for start, end, room in entries[1:]:
+ if start <= current_end:
+ current_end = max(current_end, end)
+ if room:
+ rooms.add(room)
+ else:
+ merged.append((day, current_start, current_end, "/".join(sorted(rooms))))
+ current_start, current_end = start, end
+ rooms = {room} if room else set()
+ merged.append((day, current_start, current_end, "/".join(sorted(rooms))))
+ return merged
+
+kurse = []
+no_data = []
+
+for sp, name, dozent, sws, cp, turnus, shortname in PDF_COURSES:
+ semester = "WS/SS" if "WiSe/SoSe" in turnus else "SS"
+
+ course_data = by_shortname.get(shortname) if shortname else None
+
+ if course_data:
+ slots = get_slots(course_data)
+ merged = merge_slots(slots)
+ else:
+ merged = []
+
+ if not merged:
+ no_data.append(name)
+
+ entry = {
+ "Vertiefung": sp,
+ "Name": name,
+ "Dozent": dozent,
+ "Semester": semester,
+ "SWS": str(sws),
+ "CP": str(cp),
+ "Tag1": "",
+ "Startzeit1": "",
+ "Ende1": "",
+ "Raum1": "",
+ "Tag2": "",
+ "Startzeit2": "",
+ "Ende2": "",
+ "Raum2": "",
+ "Tag3": "",
+ "Startzeit3": "",
+ "Ende3": "",
+ "Raum3": "",
+ "Tag4": "",
+ "Startzeit4": "",
+ "Ende4": "",
+ "Raum4": "",
+ }
+
+ for i, slot in enumerate(merged[:4], 1):
+ entry[f"Tag{i}"] = slot[0]
+ entry[f"Startzeit{i}"] = slot[1]
+ entry[f"Ende{i}"] = slot[2]
+ entry[f"Raum{i}"] = slot[3]
+ if len(merged) > 4:
+ print(f" WARNING: {name} has {len(merged)} time blocks, only first 4 used")
+
+ kurse.append(entry)
+
+with open("kurse.json", 'w') as f:
+ json.dump(kurse, f, ensure_ascii=False, indent=2)
+
+print(f"Generated kurse.json with {len(kurse)} courses")
+if no_data:
+ print(f"\nNo timetable data for {len(no_data)} courses:")
+ for n in no_data:
+ print(f" - {n}")