modular parser framework

This commit is contained in:
Hadrian Burkhardt
2026-05-21 08:21:49 +00:00
parent 1223791074
commit cf5348a0c8
35 changed files with 452 additions and 166 deletions
+1
View File
@@ -0,0 +1 @@
# -*- encoding: utf-8 -*-
+48
View File
@@ -0,0 +1,48 @@
[neues_palais]
name = Mensa Am Neuen Palais
street = Am Neuen Palais 10, Haus 12
city = 14469 Potsdam
id = 350
cHash = be49adafdafeb65ce711334299e68ecd
[golm]
name = Mensa Golm
street = Karl-Liebknecht-Straße 24/25
city = 14476 Potsdam / OT Golm
id = 351
cHash = e7ff95c986a6d06b88500e35385e4723
[griebnitzsee]
name = Mensa Griebnitzsee
street = August-Bebel-Straße 69, Haus 6
city = 14482 Potsdam
id = 353
cHash = 2baa819c66f7bd7c19a21693a97b5f34
[kiepenheuerallee]
name = Mensa Kiepenheuerallee
street = Kiepenheuerallee 5
city = 14469 Potsdam
id = 355
cHash = fe903f02b273069efc1af471a47fac25
[wildau]
name = Mensa Wildau
street = Hochschulring 1
city = 15745 Wildau
id = 356
cHash = 58cfcf13b92d8045c0810bcca34c37e7
[brandenburg]
name = Mensa Brandenburg
street = Magdeburger Straße 50
city = 14770 Brandenburg an der Havel
id = 357
cHash = 451a59089c91e5ed42112c294297a640
[filmuniversitaet]
name = Mensa Filmuniversität
street = Marlene-Dietrich-Allee 11, Haus 6
city = 14482 Potsdam
id = 354
cHash = 3c06da751a1943da0b6afb3e1f043b2f
+47
View File
@@ -0,0 +1,47 @@
# -*- encoding: utf-8 -*-
import configparser
import io
import os
from dataclasses import dataclass
from functools import partial
@dataclass(frozen=True)
class Canteen:
"""Configured OpenMensa canteen mapping."""
key: str
name: str
street: str
city: str
id: str
chash: str
def _get_config(filename):
config = configparser.ConfigParser()
path = os.path.join(os.path.dirname(__file__), filename)
with io.open(path, encoding='utf-8') as config_file:
config.read_file(config_file)
return config
def _parse_canteen(config, canteen_name):
get = partial(config.get, canteen_name)
return Canteen(key=canteen_name,
name=get('name'),
street=get('street'),
city=get('city'),
id=get('id'),
chash=get('cHash'))
def read_canteen_config():
"""Read the configured canteens from file.
:return: dictionary which maps from canteen short name to :class:`Canteen`.
"""
config = _get_config('canteens.ini')
return {name: _parse_canteen(config, name) for name in config.sections()}
+5
View File
@@ -0,0 +1,5 @@
"""Parser/provider implementations for OpenMensa feed sources."""
from openmensa_parsers.parsers.registry import create_parser, get_parser_class
__all__ = ["create_parser", "get_parser_class"]
+61
View File
@@ -0,0 +1,61 @@
"""Shared parser contract for city-specific OpenMensa parsers."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Protocol
from openmensa_parsers.config import Canteen
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
from openmensa_parsers.xml_types.feed_xml import FeedXML, ScheduleXML
@dataclass(frozen=True)
class FeedDefinition:
"""Default feed metadata used when publishing a parser result."""
source: str
name: str = "full"
priority: int = 0
schedule: dict[str, Any] = field(
default_factory=lambda: {"hour": "8-14", "retry": "30 1"}
)
class OpenMensaParser(Protocol):
"""Contract implemented by each city/source parser."""
id: str
feed: FeedDefinition
def fetch(self) -> Any:
"""Download or load source-specific raw data."""
def parse(
self,
config: dict[str, Canteen],
raw_data: Any,
) -> dict[str, CanteenXML]:
"""Convert raw source data into OpenMensa canteen structures."""
def create_feed(self, canteen: Canteen, url: str) -> FeedXML:
"""Build the OpenMensa feed metadata for one configured canteen."""
class BaseOpenMensaParser: # pylint: disable=too-few-public-methods
"""Base helper for parsers that use the standard OpenMensa feed block."""
id = "base"
feed: FeedDefinition
def create_feed(self, _canteen: Canteen, url: str) -> FeedXML:
"""Create a standard feed tag for a configured canteen."""
schedule_data = dict(self.feed.schedule)
schedule = ScheduleXML(**schedule_data)
return FeedXML(
name=self.feed.name,
priority=self.feed.priority,
source=self.feed.source,
url=url,
schedule=schedule,
)
+61
View File
@@ -0,0 +1,61 @@
"""Potsdam parser/provider implementation."""
from __future__ import annotations
import logging
from openmensa_parsers.config import Canteen
from openmensa_parsers.parsers.base import BaseOpenMensaParser, FeedDefinition
from openmensa_parsers.webspeiseplan_api import (
WebspeiseplanAPI,
WebspeiseplanData,
)
from openmensa_parsers.webspeiseplan_parser import WebspeiseplanParser
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
class PotsdamParser(BaseOpenMensaParser):
"""Parser for Studentenwerk Potsdam's Webspeiseplan source."""
id = "potsdam"
BASE_URL = "https://swp.webspeiseplan.de"
feed = FeedDefinition(source=BASE_URL)
def __init__(
self,
api: WebspeiseplanAPI | None = None,
parser: WebspeiseplanParser | None = None,
) -> None:
"""Initialize the Potsdam parser with fetch and parse helpers."""
self.api = WebspeiseplanAPI(self.BASE_URL) if api is None else api
self.parser = WebspeiseplanParser() if parser is None else parser
self.logger = logging.getLogger(__name__)
def fetch(self) -> WebspeiseplanData:
"""Download all data required by the Potsdam parser."""
return self.api.fetch_all()
def parse(
self,
config: dict[str, Canteen],
raw_data: WebspeiseplanData,
) -> dict[str, CanteenXML]:
"""Convert Potsdam Webspeiseplan data into canteen structures."""
parsed: dict[str, CanteenXML] = {}
for canteen_key, configured_canteen in config.items():
source_name = configured_canteen.name
if source_name not in raw_data.outlets:
self.logger.warning("%s not found in keys", source_name)
continue
outlet = dict(raw_data.outlets[source_name])
menus = raw_data.menus[source_name]
categories = raw_data.meal_categories[source_name]
locations = raw_data.locations[source_name]
outlet["isPublic"] = locations["isPublic"]
canteen = self.parser.parse_canteen_meta_times(outlet)
for meal_data in self.parser.parse_meals(menus, categories):
canteen.add_meal(**meal_data)
parsed[canteen_key] = canteen
return parsed
+25
View File
@@ -0,0 +1,25 @@
"""Registry for city/source parser implementations."""
from __future__ import annotations
from openmensa_parsers.parsers.base import OpenMensaParser
from openmensa_parsers.parsers.potsdam import PotsdamParser
PARSER_CLASSES: dict[str, type[OpenMensaParser]] = {
PotsdamParser.id: PotsdamParser,
}
def get_parser_class(parser_id: str) -> type[OpenMensaParser]:
try:
return PARSER_CLASSES[parser_id]
except KeyError as exc:
configured = ", ".join(sorted(PARSER_CLASSES))
raise KeyError(
f"Unknown parser {parser_id!r}; configured parsers: {configured}"
) from exc
def create_parser(parser_id: str) -> OpenMensaParser:
return get_parser_class(parser_id)()
+79
View File
@@ -0,0 +1,79 @@
# -*- encoding: utf-8 -*-
import os
import urllib.parse
import cachetools as ct
from flask import Flask, jsonify, make_response, url_for
from flask.logging import create_logger
from openmensa_parsers.config import read_canteen_config
from openmensa_parsers.parsers import create_parser
from openmensa_parsers.xml_types.builder import Builder
CACHE_TIMEOUT = 45 * 60
PARSER_ID = os.environ.get("OM_PARSER_ID", "potsdam")
# pragma pylint: disable=invalid-name
app = Flask(__name__)
app.url_map.strict_slashes = False
cache = ct.TTLCache(maxsize=30, ttl=CACHE_TIMEOUT)
config = read_canteen_config()
log = create_logger(app)
if "BASE_URL" in os.environ: # pragma: no cover
base_url = urllib.parse.urlparse(os.environ.get("BASE_URL"))
if base_url.scheme:
app.config["PREFERRED_URL_SCHEME"] = base_url.scheme
if base_url.netloc:
app.config["SERVER_NAME"] = base_url.netloc
if base_url.path:
app.config["APPLICATION_ROOT"] = base_url.path
def canteen_not_found(canteen_name):
log.warning("Canteen %s not found", canteen_name)
configured = ", ".join(f"'{c}'" for c in config.keys())
message = f"Canteen '{canteen_name}' not found, available: {configured}"
return make_response(message, 404)
@ct.cached(cache=cache)
def update_builder():
log.debug("Downloading menu using parser %s", PARSER_ID)
return Builder(config, parser=create_parser(PARSER_ID))
@app.route("/canteens/<canteen_name>")
@app.route("/canteens/<canteen_name>/xml")
def canteen_xml_feed(canteen_name):
if canteen_name not in config:
return canteen_not_found(canteen_name)
builder = update_builder()
xml = builder.get_xml(canteen_name)
response = make_response(xml)
response.mimetype = "text/xml"
return response
@app.route("/")
@app.route("/canteens")
def canteen_index():
return jsonify(
{
key: url_for("canteen_xml_feed", canteen_name=key, _external=True)
for key in config
}
)
@app.route("/health_check")
def health_check():
return make_response("OK", 200)
if __name__ == '__main__':
app.run()
+190
View File
@@ -0,0 +1,190 @@
import logging
import urllib.request
import urllib.parse
import re
import time
import json
from dataclasses import dataclass
@dataclass(frozen=True)
class WebspeiseplanData:
"""Downloaded Webspeiseplan data grouped by outlet name."""
outlets: dict[str, dict]
locations: dict[str, dict]
menus: dict[str, dict]
meal_categories: dict[str, dict]
class WebspeiseplanAPI:
"""Client for Webspeiseplan installations."""
logger = logging.getLogger(__name__)
def __init__(self, base_url: str):
"""Initialize the web service client."""
logging.basicConfig()
self.base_url = base_url.rstrip("/")
parsed_url = urllib.parse.urlparse(self.base_url)
if not parsed_url.scheme or not parsed_url.netloc:
raise ValueError(f"Invalid Webspeiseplan base URL: {base_url!r}")
self.host = parsed_url.netloc
def fetch_all(self) -> WebspeiseplanData:
"""Download all data required to render OpenMensa feeds."""
proxy_token = self.parse_token()
outlets = self.parse_outlets(proxy_token)
locations = {
item["id"]: item
for item in self.parse_location(proxy_token)
}
menus: dict[str, dict] = {}
meal_categories: dict[str, dict] = {}
outlet_locations: dict[str, dict] = {}
for outlet in outlets.values():
location = outlet["standortID"]
menu = self.parse_menu(proxy_token, location)
categories = self.parse_meal_category(proxy_token, location)
id2cat = {item["gerichtkategorieID"]: item for item in categories}
menus[outlet["name"]] = menu
meal_categories[outlet["name"]] = id2cat
outlet_locations[outlet["name"]] = locations[location]
return WebspeiseplanData(
outlets=outlets,
locations=outlet_locations,
menus=menus,
meal_categories=meal_categories,
)
def __spoof_req_headers(self, req: urllib.request.Request):
"""Add headers to a request .
Args:
req (urllib.request.Request): [description]
"""
req.add_header(
"Accept", "application/json, text/javascript, */*; q=0.01"
)
req.add_header("Accept-Language", "en-US,en;q=0.9")
req.add_header("Connection", "keep-alive")
req.add_header("Host", self.host)
req.add_header("Referer", f"{self.base_url}/InitialConfig")
req.add_header(
"Sec-Ch-Ua",
'"Not/A)Brand";v="99", '
+ '"Google Chrome";v="115", '
+ '"Chromium";v="115"',
)
req.add_header("Sec-Ch-Ua-Mobile", "?0")
req.add_header("Sec-Ch-Ua-Platform", "Linux")
req.add_header("Sec-Fetch-Dest", "empty")
req.add_header("Sec-Fetch-Mode", "cors")
req.add_header("Sec-Fetch-Site", "same-origin")
req.add_header(
"User-Agent",
"Mozilla/5.0 (X11; Linux x86_64) "
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/115.0.0.0 Safari/537.36",
)
req.add_header("X-Requested-With", "XMLHttpRequest")
def parse_model(self, params: dict):
"""Retrieve data from host.
Args:
params (dict): [description]
Returns:
[type]: [description]
"""
query = urllib.parse.urlencode(params)
url = f"{self.base_url}/index.php?{query}"
WebspeiseplanAPI.logger.debug("__parse_model: %s", url)
req = urllib.request.Request(url)
self.__spoof_req_headers(req)
with urllib.request.urlopen(req) as resp:
data = resp.read()
return json.loads(data)["content"]
def parse_token(self) -> str:
"""Get the token from the proxy server."""
req = urllib.request.Request(self.base_url)
with urllib.request.urlopen(req) as resp:
txt = resp.read().decode("utf-8")
match = re.findall(r"/main.[0-9a-f]+.js", txt)
if match:
match = match[0]
else:
# Development build made it to production, which does not produce
# JS chunks with cache-busting filenames
match = "/index.js"
WebspeiseplanAPI.logger.debug(
"__parse_token: downloading script %s", match
)
script_url = urllib.parse.urljoin(f"{self.base_url}/", match)
req = urllib.request.Request(script_url)
with urllib.request.urlopen(req) as resp:
txt = resp.read().decode("utf-8")
proxy_token =\
re.findall(r"PROXY_TOKEN:\s*[\"']([0-9a-f]+)[\"']", txt)[0]
WebspeiseplanAPI.logger.debug(
"__parse_token: PROXY_TOKEN %s", proxy_token
)
return proxy_token
def parse_outlets(self, proxy_token: str) -> dict[str, dict]:
"""Get the outlets from the server."""
params = {
"token": proxy_token,
"model": "outlet",
"location": "",
"languagetype": "",
"_": int(time.time() * 1000),
}
outlets = {
outlet["name"]: outlet for outlet in self.parse_model(params)
}
return outlets
def parse_menu(self, proxy_token: str, location: int) -> dict:
"""Get the menu for a specific location."""
params = {
"token": proxy_token,
"model": "menu",
"location": location,
"languagetype": 1,
"_": int(time.time() * 1000),
}
menu = self.parse_model(params)
return menu
def parse_meal_category(
self, proxy_token: str, location: int
) -> list[dict]:
"""Get the meal categories for a specific location."""
params = {
"token": proxy_token,
"model": "mealCategory",
"location": location,
"languagetype": 1,
"_": int(time.time() * 1000),
}
categories = self.parse_model(params)
return categories
def parse_location(self, proxy_token: str) -> list[dict]:
"""Get the locations from the server."""
params = {
"token": proxy_token,
"model": "location",
"location": "",
"languagetype": 1,
"_": int(time.time() * 1000),
}
locations = self.parse_model(params)
return locations
+129
View File
@@ -0,0 +1,129 @@
import logging
import re
from datetime import datetime, date
from openmensa_parsers.xml_types.canteen_xml import CanteenMeta, CanteenXML
from openmensa_parsers.xml_types.times_xml import CanteenOpenTimespec, TimesXML
from openmensa_parsers.xml_types.meal_xml import MealXML
EURO_PRICE_PATTERN = re.compile(r"(\d+(?:[,.]\d{1,2})?)\s*€")
class WebspeiseplanParser:
"""Parser for Webspeiseplan menu and outlet data."""
def __init__(self) -> None:
"""Init WebspeiseplanParser object."""
logging.basicConfig()
self.logger = logging.getLogger(__name__)
def parse_canteen_meta_times(self, outlet: dict):
"""Parse the outlet data from outlet.
Args:
outlet (dict): [description]
"""
self.logger.debug("parse_canteen_meta_times(): %s", outlet["name"])
addr_info = outlet["addressInfo"]
meta = {
"name": outlet["name"],
"address": f'{addr_info["street"]}, {addr_info["postalCode"]} '
+ f'{addr_info["city"]}',
"city": addr_info["city"],
"phone": outlet["contactInfo"][0]["phone"],
"email": outlet["contactInfo"][0]["email"],
"availability": outlet["isPublic"]
}
if outlet["positionInfo"]:
meta["location"] = (
outlet["positionInfo"]["longitude"],
outlet["positionInfo"]["latitude"],
)
canteen_meta = CanteenMeta(**meta)
weekday_dict = {
# this approach only lists the first (valid) opening time,
# since OpenMensa does not support multiple time specs
# (yet).
"monday": outlet['moZeit1'] or outlet['moZeit2'],
"tuesday": outlet['diZeit1'] or outlet['diZeit2'],
"wednesday": outlet['miZeit1'] or outlet['miZeit2'],
"thursday": outlet['doZeit1'] or outlet['doZeit2'],
"friday": outlet['frZeit1'] or outlet['frZeit2'],
"saturday": outlet['saZeit1'] or outlet['saZeit2'],
"sunday": outlet['soZeit1'] or outlet['soZeit2'],
}
canteen_times = TimesXML({
k: CanteenOpenTimespec(v) for k, v in weekday_dict.items()
})
canteen = CanteenXML(canteen_meta, canteen_times)
return canteen
def _parse_price(self, value):
if value in (None, "", {}):
return None
return float(str(value).replace(",", "."))
def _parse_embedded_prices(
self, name: str, price: dict[str, float | None]
) -> tuple[str, dict[str, float | None]]:
if any(price.values()):
return name, price
matches = EURO_PRICE_PATTERN.findall(name)
if len(matches) < 2:
return name, price
parsed = [self._parse_price(match) for match in matches]
if len(parsed) >= 3:
price = {
"student": parsed[0],
"employee": parsed[1],
"other": parsed[2],
}
elif "Stud" in name and ("Gäste" in name or "Gaeste" in name):
price = {
"student": parsed[0],
"employee": price["employee"],
"other": parsed[1],
}
else:
return name, price
name = EURO_PRICE_PATTERN.sub("", name)
name = re.sub(r"\s*/\s*", " ", name)
name = re.sub(r"\s+", " ", name).strip()
return name, price
def parse_meals(
self, menu_data, meal_categories
) -> list[tuple[date, str, MealXML]]:
"""Parse the menu and adds it to the builder."""
meals = []
for menu in menu_data:
for meal_data in menu["speiseplanGerichtData"]:
info = meal_data["speiseplanAdvancedGericht"]
additional_info = meal_data["zusatzinformationen"]
price = {
"student": self._parse_price(
additional_info["mitarbeiterpreisDecimal2"]
),
"employee": self._parse_price(
additional_info["price3Decimal2"]
),
"other": self._parse_price(
additional_info["gaestepreisDecimal2"]
),
}
name, price = self._parse_embedded_prices(
info["gerichtname"], price
)
meal = MealXML(name=name, price=price)
day = datetime.fromisoformat(info["datum"]).date()
category = meal_categories[info["gerichtkategorieID"]]["name"]
meals.append(
{"day": day, "category": category, "meal": meal}
)
self.logger.debug("parse_meals(): %s meals parsed", len(meals))
return meals
+54
View File
@@ -0,0 +1,54 @@
from xml.dom import minidom
from dataclasses import dataclass
import logging
from typing import Any
from flask import url_for
from openmensa_parsers.xml_types.openmensa_xml import OpenMensaXML
from openmensa_parsers.config import Canteen
from openmensa_parsers.parsers.base import OpenMensaParser
from openmensa_parsers.parsers.potsdam import PotsdamParser
@dataclass
class Builder:
"""A class method for creating a new OpenMensa Feed."""
VERSION = "2.0.1"
def __init__(
self,
config: dict[str, Canteen],
source_data: Any | None = None,
parser: OpenMensaParser | None = None,
):
"""Initialize the object for the OpenMensa Feed Doc XML."""
logging.basicConfig()
self.logger = logging.getLogger(__name__)
self._xml_data = {}
self.parser = PotsdamParser() if parser is None else parser
raw_data = self.parser.fetch() if source_data is None else source_data
for cname, canteen in self.parser.parse(config, raw_data).items():
feed = self.__create_feed(config[cname])
canteen.add_feed(feed)
self._xml_data[cname] = OpenMensaXML(self.VERSION, canteen)
def __create_feed(self, ntup: Canteen):
return self.parser.create_feed(
ntup,
url_for(
"canteen_xml_feed",
canteen_name=ntup.key,
_external=True,
),
)
def get_xml(self, canteen_name: str):
"""Return a XML string representing the canteen.
Returns:
[type]: [description]
"""
doc = minidom.Document()
xml_element = self._xml_data[canteen_name].xml_element(doc)
doc.appendChild(xml_element)
return doc.toprettyxml(encoding="UTF-8")
+159
View File
@@ -0,0 +1,159 @@
from dataclasses import dataclass
from xml.dom import minidom
from datetime import date
from openmensa_parsers.xml_types.times_xml import TimesXML
from openmensa_parsers.xml_types.meal_xml import MealXML
from openmensa_parsers.xml_types.feed_xml import FeedXML
@dataclass
class CanteenMeta:
"""Metadata for CanteenXML."""
# pylint: disable=too-many-instance-attributes
def __init__(self, **kwargs):
"""Init CanteenMeta object."""
self.name: str = kwargs["name"]
self.address: str = kwargs["address"]
self.city: str = kwargs["city"]
self.phone: str = kwargs["phone"]
self.email = kwargs["email"]
self.location: tuple[float, float] = kwargs.get("location", None)
self.availability: str = kwargs.get("availability", "public")
@property
def availability(self) -> str:
"""Whether the canteen is public or restricted.
Returns:
str: 'public' | 'restricted'
"""
return self._availability
@availability.setter
def availability(self, value: str):
if value is True:
self._availability = "public"
elif value is False:
self._availability = "restricted"
elif value in ("public", "restricted"):
self._availability = value
else:
raise ValueError("only 'public' or 'restricted' are allowed.")
@availability.deleter
def availability(self):
del self._availability
@dataclass
class CanteenXML:
"""Represents the canteen tag in openMensaFeedv2."""
def __init__(
self,
canteen_meta: CanteenMeta,
times: TimesXML,
feeds: dict[str, FeedXML] = None,
days: dict[date, dict[str, list[MealXML]]] = None,
):
"""Init CanteenXML Object.
Args:
name (str): _description_
address (str): _description_
city (str): _description_
phone (str): _description_
email (str): _description_
location (tuple[float, float]): _description_
availability (str): _description_
times (TimesXML): _description_
"""
self.canteen_meta = canteen_meta
self.times = times
self.feeds = {} if feeds is None else feeds
self.days = {} if days is None else days
def __create_node(self, doc: minidom.Document, tag: str, value: str):
elem = doc.createElement(tag)
txt_node = doc.createTextNode(value)
elem.appendChild(txt_node)
return elem
def __append_meta(self, doc: minidom.Document, canteen: minidom.Element):
name = self.__create_node(doc, "name", self.canteen_meta.name)
canteen.appendChild(name)
address = self.__create_node(doc, "address", self.canteen_meta.address)
canteen.appendChild(address)
city = self.__create_node(doc, "city", self.canteen_meta.city)
canteen.appendChild(city)
phone = self.__create_node(doc, "phone", self.canteen_meta.phone)
canteen.appendChild(phone)
email = self.__create_node(doc, "email", self.canteen_meta.email)
canteen.appendChild(email)
if self.canteen_meta.location:
location = doc.createElement("location")
location.setAttribute(
"longitude", str(self.canteen_meta.location[0])
)
location.setAttribute(
"latitude", str(self.canteen_meta.location[1])
)
canteen.appendChild(location)
availability = self.__create_node(
doc, "availability", self.canteen_meta.availability
)
canteen.appendChild(availability)
times = self.times.xml_element(doc)
canteen.appendChild(times)
def add_feed(self, feed: FeedXML):
"""Add a feed to the canteen.
Args:
feed (FeedXML): _description_
"""
self.feeds[feed.name] = feed
def add_meal(self, day: date, category: str, meal: MealXML):
"""Add a meal to the canteen.
Args:
day (date): Offered date of meal.
catrgory (str): Meal's category.
meal (MealXML): The meal item.
"""
categories = self.days.get(day, {})
if not categories:
self.days[day] = categories
meals = categories.get(category, [])
if not meals:
categories[category] = meals
meals.append(meal)
def xml_element(self, doc: minidom.Document):
"""Return the XML representation.
Args:
doc (minidom.Document): Working XML document
Returns:
_type_: _description_
"""
canteen = doc.createElement("canteen")
self.__append_meta(doc, canteen)
for feed_item in self.feeds.values():
feed = feed_item.xml_element(doc)
canteen.appendChild(feed)
for day_data, categories in self.days.items():
day = doc.createElement("day")
day.setAttribute("date", str(day_data))
for category_name, meals in categories.items():
category = doc.createElement("category")
category.setAttribute("name", category_name)
for meal in meals:
category.appendChild(meal.xml_element(doc))
day.appendChild(category)
canteen.appendChild(day)
return canteen
+82
View File
@@ -0,0 +1,82 @@
from dataclasses import dataclass
from xml.dom import minidom
@dataclass
class ScheduleXML:
"""Represents the schedule inside the feed tag in openMensaFeedv2."""
def __init__(self, hour: str, **kwargs):
"""Init ScheduleXML object.
Args:
hour (str): _description_
day_of_month (str, optional): _description_. Defaults to "*".
day_of_week (str, optional): _description_. Defaults to "*".
month (str, optional): _description_. Defaults to "*".
minute (int, optional): _description_. Defaults to 0.
retry (str, optional): _description_. Defaults to None.
"""
self.hour = hour
self.day_of_month = kwargs.get("day_of_month", "*")
self.day_of_week = kwargs.get("day_of_week", "*")
self.month = kwargs.get("month", "*")
self.minute = kwargs.get("minute", 0)
self.retry = kwargs.get("retry", None)
def xml_element(self, doc: minidom.Document):
"""Return the XML representaion.
Args:
doc (minidom.Document): Working XML document.
Returns:
_type_: _description_
"""
schedule = doc.createElement("schedule")
schedule.setAttribute("dayOfMonth", self.day_of_month)
schedule.setAttribute("dayOfWeek", self.day_of_week)
schedule.setAttribute("month", self.month)
schedule.setAttribute("hour", self.hour)
schedule.setAttribute("minute", str(self.minute))
if self.retry:
schedule.setAttribute("retry", self.retry)
return schedule
@dataclass
class FeedXML:
"""Represents the feed tag in openMensaFeedv2."""
name: str
source: str
url: str
schedule: ScheduleXML
priority: int = 0
def xml_element(self, doc: minidom.Document):
"""Return the XML representaion.
Args:
doc (minidom.Document): Working XML document.
Returns:
_type_: _description_
"""
feed = doc.createElement("feed")
feed.setAttribute("name", self.name)
feed.setAttribute("priority", str(self.priority))
schedule = self.schedule.xml_element(doc)
feed.appendChild(schedule)
url = doc.createElement("url")
txt_node = doc.createTextNode(self.url)
url.appendChild(txt_node)
feed.appendChild(url)
source = doc.createElement("source")
txt_node = doc.createTextNode(self.source)
source.appendChild(txt_node)
feed.appendChild(source)
return feed
+48
View File
@@ -0,0 +1,48 @@
from xml.dom import minidom
from dataclasses import dataclass
@dataclass
class MealXML:
"""Represents the meal tag in openMensaFeedv2."""
def __init__(self, name: str, price: dict[str, float], note: str = None):
"""Init MealXML object.
Args:
name (str): name of the meal
note (str): additional information
price (dict[str, float]): prices for student, employee and other
"""
self.name = name
self.note = note
self.price = price
def xml_element(self, doc: minidom.Document):
"""Return the xml tag.
Args:
doc (minidom.Document): Working XML documnet.
"""
meal = doc.createElement("meal")
name = doc.createElement("name")
txt_node = doc.createTextNode(self.name)
name.appendChild(txt_node)
meal.appendChild(name)
if self.note is not None:
note = doc.createElement("note")
txt_node = doc.createTextNode(self.note)
note.appendChild(txt_node)
meal.appendChild(note)
for key, val in self.price.items():
if key not in ("student", "employee", "other"):
continue
price = doc.createElement("price")
price.setAttribute("role", key)
if val is None:
val = 0.0
txt_node = doc.createTextNode(f"{val:.2f}")
price.appendChild(txt_node)
meal.appendChild(price)
return meal
@@ -0,0 +1,51 @@
from xml.dom import minidom
from dataclasses import dataclass
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
@dataclass
class OpenMensaXML:
"""Represents the openmensa tag in openMensaFeedv2."""
def __init__(self, version: str, canteen: CanteenXML):
"""Init OpenMensaXML.
Args:
version (str): Parser version
canteen (CanteenXML): _description_
"""
self.version = version
self.canteen = canteen
def __create_version_node(self, doc: minidom.Document):
elem = doc.createElement("version")
txt_node = doc.createTextNode(self.version)
elem.appendChild(txt_node)
return elem
def xml_element(self, doc: minidom.Document):
"""Create openmensa XML tag.
Args:
doc (minidom.Document): Working XML document
Returns:
_type_: _description_
"""
open_mensa = doc.createElement("openmensa")
open_mensa.setAttribute("version", "2.1")
open_mensa.setAttribute("xmlns", "http://openmensa.org/open-mensa-v2")
open_mensa.setAttribute(
"xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"
)
open_mensa.setAttribute(
"xsi:schemaLocation",
"http://openmensa.org/open-mensa-v2 "
+ "http://openmensa.org/open-mensa-v2.xsd",
)
version = self.__create_version_node(doc)
open_mensa.appendChild(version)
canteen = self.canteen.xml_element(doc)
open_mensa.appendChild(canteen)
return open_mensa
+111
View File
@@ -0,0 +1,111 @@
import re
from dataclasses import dataclass
from xml.dom import minidom
class CanteenOpenTimespec(str):
"""Represents valid daily opening times in openMensaFeedv2."""
CLOSED = "geschlossen"
CLOSED_VALID_VALUES = {
CLOSED,
None,
False,
"",
}
PATTERN = (r'.*(?P<hour1>\d{1,2}):(?P<min1>\d{1,2})'
r'\D*(?P<hour2>\d{1,2}):(?P<min2>\d{1,2}).*')
MATCHER = re.compile(PATTERN)
def __new__(cls, spec):
"""Create CanteenOpenTimespec object.
Args:
spec (str | bool | None): time specification
"""
if isinstance(spec, str):
spec = spec.strip()
normalized = spec.lower()
if normalized in (cls.CLOSED, "geschllossen"):
spec = cls.CLOSED
if spec in cls.CLOSED_VALID_VALUES:
return super().__new__(cls, cls.CLOSED)
match = cls.MATCHER.match(str(spec))
if not match:
raise ValueError(f'Invalid time specification: {spec!r} does'
f' not conform to regex {cls.PATTERN!r}')
# parse to int for format zerofill
int_spec = {k: int(v) for k, v in match.groupdict().items()}
clean_spec = (
f'{int_spec["hour1"]:02}:{int_spec["min1"]:02}-'
f'{int_spec["hour2"]:02}:{int_spec["min2"]:02}'
)
return super().__new__(cls, clean_spec)
@dataclass
class TimesXML:
"""Represents the times tag in openMensaFeedv2."""
VALID_DAYS = (
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
)
monday: str
tuesday: str
wednesday: str
thursday: str
friday: str
saturday: str
sunday: str
def __init__(self, weekday_dict: dict[str, CanteenOpenTimespec] = None):
"""Init TimesXML object.
Args:
weekday_dict (dict[str, str]): _description_
"""
for key in weekday_dict:
if key in self.VALID_DAYS:
setattr(self, key, weekday_dict[key])
else:
raise KeyError()
def __create_node(self,
doc: minidom.Document,
tag: str,
value: CanteenOpenTimespec):
elem = doc.createElement(tag)
if value in CanteenOpenTimespec.CLOSED_VALID_VALUES:
elem.setAttribute("closed", "true")
else:
elem.setAttribute("open", value)
return elem
def xml_element(self, doc: minidom.Document):
"""Return the XML representation.
Args:
doc (minidom.Document): Working XML document
Returns:
_type_: _description_
"""
times = doc.createElement("times")
times.setAttribute("type", "opening")
for day in self.VALID_DAYS:
day_node = self.__create_node(doc, day, getattr(self, day))
times.appendChild(day_node)
return times