aboutsummaryrefslogtreecommitdiff
path: root/crawl/crawl.py
diff options
context:
space:
mode:
authorFriedrich Beckmann <friedrich.beckmann@tha.de>2026-03-19 16:23:55 +0100
committerFriedrich Beckmann <friedrich.beckmann@tha.de>2026-03-19 16:23:55 +0100
commitba2d143a8f0328d61294e84d46c4ed55ff10421e (patch)
treedf7d68dcba756d86351ec0287897b8a14dee79b9 /crawl/crawl.py
parent12a87277041c82edcec46b4f05e35abac632d271 (diff)
update crawler to retrieve data from webuntis
Diffstat (limited to 'crawl/crawl.py')
-rw-r--r--crawl/crawl.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/crawl/crawl.py b/crawl/crawl.py
new file mode 100644
index 0000000..49cce83
--- /dev/null
+++ b/crawl/crawl.py
@@ -0,0 +1,96 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from dotenv import load_dotenv
+import requests
+import json
+import os
+
+load_dotenv()
+
+username = os.environ["WEBUNTIS_USER"]
+password = os.environ["WEBUNTIS_PASS"]
+
+driver = webdriver.Chrome()
+driver.get("https://tha.webuntis.com/WebUntis/#/basic/login")
+
+wait = WebDriverWait(driver, 15)
+
+user_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.un-input-group__input[type='text']")))
+pass_input = driver.find_element(By.CSS_SELECTOR, "input.un-input-group__input[type='password']")
+
+user_input.send_keys(username)
+pass_input.send_keys(password)
+
+login_btn = driver.find_element(By.XPATH, "//button[contains(@class, 'redesigned-button__primary') and text()='Login']")
+login_btn.click()
+
+# Wait until login completes
+wait.until(EC.url_changes("https://tha.webuntis.com/WebUntis/#/basic/login"))
+
+# Get Bearer token in the browser before closing
+driver.set_script_timeout(15)
+bearer_token = driver.execute_async_script("""
+ var callback = arguments[arguments.length - 1];
+ var xhr = new XMLHttpRequest();
+ xhr.open('GET', '/WebUntis/api/token/new');
+ xhr.onload = function() { callback(xhr.responseText); };
+ xhr.send();
+""")
+print(f"Token: {bearer_token[:50]}...")
+
+cookies = driver.get_cookies()
+driver.quit()
+
+s = requests.Session()
+for cookie in cookies:
+ s.cookies.set(cookie['name'], cookie['value'], domain=cookie.get('domain', ''))
+
+BASE = "https://tha.webuntis.com/WebUntis"
+
+headers = {
+ "Authorization": f"Bearer {bearer_token}",
+}
+
+print("Teachers")
+result = s.get(f"{BASE}/api/rest/view/v1/timetable/filter?resourceType=TEACHER&timetableType=STANDARD", headers=headers)
+teachers = result.json()
+with open("teachers.json", 'w') as f:
+ json.dump(teachers, f)
+
+print("Subjects")
+result = s.get(f"{BASE}/api/rest/view/v1/timetable/filter?resourceType=SUBJECT&timetableType=STANDARD", headers=headers)
+subjects = result.json()
+with open("subjects.json", 'w') as f:
+ json.dump(subjects, f)
+
+DATE_START = "2026-03-16"
+DATE_END = "2026-03-21"
+
+print("Timetables by subject (format=2: class, room, teacher)")
+timetables = []
+for sj in subjects["subjects"]:
+ subject = sj["subject"]
+ sid = subject["id"]
+ shortname = subject["shortName"]
+ longname = subject["longName"]
+ print(longname)
+ result = s.get(
+ f"{BASE}/api/rest/view/v1/timetable/entries"
+ f"?start={DATE_START}&end={DATE_END}&format=2"
+ f"&resourceType=SUBJECT&resources={sid}"
+ f"&periodTypes=&timetableType=STANDARD",
+ headers=headers)
+ timetable = result.json()
+ timetables.append({
+ "id": sid,
+ "shortname": shortname,
+ "longname": longname,
+ "timetable": timetable,
+ })
+
+with open("timetables.json", 'w') as f:
+ json.dump(timetables, f)
+
+print("Done")