modular parser framework
This commit is contained in:
@@ -0,0 +1,190 @@
|
||||
import logging
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import re
|
||||
import time
|
||||
import json
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WebspeiseplanData:
|
||||
"""Downloaded Webspeiseplan data grouped by outlet name."""
|
||||
|
||||
outlets: dict[str, dict]
|
||||
locations: dict[str, dict]
|
||||
menus: dict[str, dict]
|
||||
meal_categories: dict[str, dict]
|
||||
|
||||
|
||||
class WebspeiseplanAPI:
|
||||
"""Client for Webspeiseplan installations."""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def __init__(self, base_url: str):
|
||||
"""Initialize the web service client."""
|
||||
logging.basicConfig()
|
||||
self.base_url = base_url.rstrip("/")
|
||||
parsed_url = urllib.parse.urlparse(self.base_url)
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
raise ValueError(f"Invalid Webspeiseplan base URL: {base_url!r}")
|
||||
self.host = parsed_url.netloc
|
||||
|
||||
def fetch_all(self) -> WebspeiseplanData:
|
||||
"""Download all data required to render OpenMensa feeds."""
|
||||
proxy_token = self.parse_token()
|
||||
outlets = self.parse_outlets(proxy_token)
|
||||
locations = {
|
||||
item["id"]: item
|
||||
for item in self.parse_location(proxy_token)
|
||||
}
|
||||
menus: dict[str, dict] = {}
|
||||
meal_categories: dict[str, dict] = {}
|
||||
outlet_locations: dict[str, dict] = {}
|
||||
for outlet in outlets.values():
|
||||
location = outlet["standortID"]
|
||||
menu = self.parse_menu(proxy_token, location)
|
||||
categories = self.parse_meal_category(proxy_token, location)
|
||||
id2cat = {item["gerichtkategorieID"]: item for item in categories}
|
||||
menus[outlet["name"]] = menu
|
||||
meal_categories[outlet["name"]] = id2cat
|
||||
outlet_locations[outlet["name"]] = locations[location]
|
||||
return WebspeiseplanData(
|
||||
outlets=outlets,
|
||||
locations=outlet_locations,
|
||||
menus=menus,
|
||||
meal_categories=meal_categories,
|
||||
)
|
||||
|
||||
def __spoof_req_headers(self, req: urllib.request.Request):
|
||||
"""Add headers to a request .
|
||||
|
||||
Args:
|
||||
req (urllib.request.Request): [description]
|
||||
"""
|
||||
req.add_header(
|
||||
"Accept", "application/json, text/javascript, */*; q=0.01"
|
||||
)
|
||||
req.add_header("Accept-Language", "en-US,en;q=0.9")
|
||||
req.add_header("Connection", "keep-alive")
|
||||
req.add_header("Host", self.host)
|
||||
req.add_header("Referer", f"{self.base_url}/InitialConfig")
|
||||
req.add_header(
|
||||
"Sec-Ch-Ua",
|
||||
'"Not/A)Brand";v="99", '
|
||||
+ '"Google Chrome";v="115", '
|
||||
+ '"Chromium";v="115"',
|
||||
)
|
||||
req.add_header("Sec-Ch-Ua-Mobile", "?0")
|
||||
req.add_header("Sec-Ch-Ua-Platform", "Linux")
|
||||
req.add_header("Sec-Fetch-Dest", "empty")
|
||||
req.add_header("Sec-Fetch-Mode", "cors")
|
||||
req.add_header("Sec-Fetch-Site", "same-origin")
|
||||
req.add_header(
|
||||
"User-Agent",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
+ "Chrome/115.0.0.0 Safari/537.36",
|
||||
)
|
||||
req.add_header("X-Requested-With", "XMLHttpRequest")
|
||||
|
||||
def parse_model(self, params: dict):
|
||||
"""Retrieve data from host.
|
||||
|
||||
Args:
|
||||
params (dict): [description]
|
||||
|
||||
Returns:
|
||||
[type]: [description]
|
||||
"""
|
||||
query = urllib.parse.urlencode(params)
|
||||
url = f"{self.base_url}/index.php?{query}"
|
||||
WebspeiseplanAPI.logger.debug("__parse_model: %s", url)
|
||||
req = urllib.request.Request(url)
|
||||
self.__spoof_req_headers(req)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
data = resp.read()
|
||||
return json.loads(data)["content"]
|
||||
|
||||
def parse_token(self) -> str:
|
||||
"""Get the token from the proxy server."""
|
||||
req = urllib.request.Request(self.base_url)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
txt = resp.read().decode("utf-8")
|
||||
match = re.findall(r"/main.[0-9a-f]+.js", txt)
|
||||
|
||||
if match:
|
||||
match = match[0]
|
||||
else:
|
||||
# Development build made it to production, which does not produce
|
||||
# JS chunks with cache-busting filenames
|
||||
match = "/index.js"
|
||||
|
||||
WebspeiseplanAPI.logger.debug(
|
||||
"__parse_token: downloading script %s", match
|
||||
)
|
||||
script_url = urllib.parse.urljoin(f"{self.base_url}/", match)
|
||||
req = urllib.request.Request(script_url)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
txt = resp.read().decode("utf-8")
|
||||
proxy_token =\
|
||||
re.findall(r"PROXY_TOKEN:\s*[\"']([0-9a-f]+)[\"']", txt)[0]
|
||||
WebspeiseplanAPI.logger.debug(
|
||||
"__parse_token: PROXY_TOKEN %s", proxy_token
|
||||
)
|
||||
return proxy_token
|
||||
|
||||
def parse_outlets(self, proxy_token: str) -> dict[str, dict]:
|
||||
"""Get the outlets from the server."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "outlet",
|
||||
"location": "",
|
||||
"languagetype": "",
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
|
||||
outlets = {
|
||||
outlet["name"]: outlet for outlet in self.parse_model(params)
|
||||
}
|
||||
return outlets
|
||||
|
||||
def parse_menu(self, proxy_token: str, location: int) -> dict:
|
||||
"""Get the menu for a specific location."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "menu",
|
||||
"location": location,
|
||||
"languagetype": 1,
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
menu = self.parse_model(params)
|
||||
return menu
|
||||
|
||||
def parse_meal_category(
|
||||
self, proxy_token: str, location: int
|
||||
) -> list[dict]:
|
||||
"""Get the meal categories for a specific location."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "mealCategory",
|
||||
"location": location,
|
||||
"languagetype": 1,
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
categories = self.parse_model(params)
|
||||
return categories
|
||||
|
||||
def parse_location(self, proxy_token: str) -> list[dict]:
|
||||
"""Get the locations from the server."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "location",
|
||||
"location": "",
|
||||
"languagetype": 1,
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
locations = self.parse_model(params)
|
||||
return locations
|
||||
Reference in New Issue
Block a user