modular parser framework
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
@@ -0,0 +1,48 @@
|
||||
[neues_palais]
|
||||
name = Mensa Am Neuen Palais
|
||||
street = Am Neuen Palais 10, Haus 12
|
||||
city = 14469 Potsdam
|
||||
id = 350
|
||||
cHash = be49adafdafeb65ce711334299e68ecd
|
||||
|
||||
[golm]
|
||||
name = Mensa Golm
|
||||
street = Karl-Liebknecht-Straße 24/25
|
||||
city = 14476 Potsdam / OT Golm
|
||||
id = 351
|
||||
cHash = e7ff95c986a6d06b88500e35385e4723
|
||||
|
||||
[griebnitzsee]
|
||||
name = Mensa Griebnitzsee
|
||||
street = August-Bebel-Straße 69, Haus 6
|
||||
city = 14482 Potsdam
|
||||
id = 353
|
||||
cHash = 2baa819c66f7bd7c19a21693a97b5f34
|
||||
|
||||
[kiepenheuerallee]
|
||||
name = Mensa Kiepenheuerallee
|
||||
street = Kiepenheuerallee 5
|
||||
city = 14469 Potsdam
|
||||
id = 355
|
||||
cHash = fe903f02b273069efc1af471a47fac25
|
||||
|
||||
[wildau]
|
||||
name = Mensa Wildau
|
||||
street = Hochschulring 1
|
||||
city = 15745 Wildau
|
||||
id = 356
|
||||
cHash = 58cfcf13b92d8045c0810bcca34c37e7
|
||||
|
||||
[brandenburg]
|
||||
name = Mensa Brandenburg
|
||||
street = Magdeburger Straße 50
|
||||
city = 14770 Brandenburg an der Havel
|
||||
id = 357
|
||||
cHash = 451a59089c91e5ed42112c294297a640
|
||||
|
||||
[filmuniversitaet]
|
||||
name = Mensa Filmuniversität
|
||||
street = Marlene-Dietrich-Allee 11, Haus 6
|
||||
city = 14482 Potsdam
|
||||
id = 354
|
||||
cHash = 3c06da751a1943da0b6afb3e1f043b2f
|
||||
@@ -0,0 +1,47 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import configparser
|
||||
import io
|
||||
import os
|
||||
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Canteen:
|
||||
"""Configured OpenMensa canteen mapping."""
|
||||
|
||||
key: str
|
||||
name: str
|
||||
street: str
|
||||
city: str
|
||||
id: str
|
||||
chash: str
|
||||
|
||||
|
||||
def _get_config(filename):
|
||||
config = configparser.ConfigParser()
|
||||
path = os.path.join(os.path.dirname(__file__), filename)
|
||||
with io.open(path, encoding='utf-8') as config_file:
|
||||
config.read_file(config_file)
|
||||
return config
|
||||
|
||||
|
||||
def _parse_canteen(config, canteen_name):
|
||||
get = partial(config.get, canteen_name)
|
||||
return Canteen(key=canteen_name,
|
||||
name=get('name'),
|
||||
street=get('street'),
|
||||
city=get('city'),
|
||||
id=get('id'),
|
||||
chash=get('cHash'))
|
||||
|
||||
|
||||
def read_canteen_config():
|
||||
"""Read the configured canteens from file.
|
||||
|
||||
:return: dictionary which maps from canteen short name to :class:`Canteen`.
|
||||
"""
|
||||
config = _get_config('canteens.ini')
|
||||
return {name: _parse_canteen(config, name) for name in config.sections()}
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Parser/provider implementations for OpenMensa feed sources."""
|
||||
|
||||
from openmensa_parsers.parsers.registry import create_parser, get_parser_class
|
||||
|
||||
__all__ = ["create_parser", "get_parser_class"]
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Shared parser contract for city-specific OpenMensa parsers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Protocol
|
||||
|
||||
from openmensa_parsers.config import Canteen
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
|
||||
from openmensa_parsers.xml_types.feed_xml import FeedXML, ScheduleXML
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FeedDefinition:
|
||||
"""Default feed metadata used when publishing a parser result."""
|
||||
|
||||
source: str
|
||||
name: str = "full"
|
||||
priority: int = 0
|
||||
schedule: dict[str, Any] = field(
|
||||
default_factory=lambda: {"hour": "8-14", "retry": "30 1"}
|
||||
)
|
||||
|
||||
|
||||
class OpenMensaParser(Protocol):
|
||||
"""Contract implemented by each city/source parser."""
|
||||
|
||||
id: str
|
||||
feed: FeedDefinition
|
||||
|
||||
def fetch(self) -> Any:
|
||||
"""Download or load source-specific raw data."""
|
||||
|
||||
def parse(
|
||||
self,
|
||||
config: dict[str, Canteen],
|
||||
raw_data: Any,
|
||||
) -> dict[str, CanteenXML]:
|
||||
"""Convert raw source data into OpenMensa canteen structures."""
|
||||
|
||||
def create_feed(self, canteen: Canteen, url: str) -> FeedXML:
|
||||
"""Build the OpenMensa feed metadata for one configured canteen."""
|
||||
|
||||
|
||||
class BaseOpenMensaParser: # pylint: disable=too-few-public-methods
|
||||
"""Base helper for parsers that use the standard OpenMensa feed block."""
|
||||
|
||||
id = "base"
|
||||
feed: FeedDefinition
|
||||
|
||||
def create_feed(self, _canteen: Canteen, url: str) -> FeedXML:
|
||||
"""Create a standard feed tag for a configured canteen."""
|
||||
schedule_data = dict(self.feed.schedule)
|
||||
schedule = ScheduleXML(**schedule_data)
|
||||
return FeedXML(
|
||||
name=self.feed.name,
|
||||
priority=self.feed.priority,
|
||||
source=self.feed.source,
|
||||
url=url,
|
||||
schedule=schedule,
|
||||
)
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Potsdam parser/provider implementation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from openmensa_parsers.config import Canteen
|
||||
from openmensa_parsers.parsers.base import BaseOpenMensaParser, FeedDefinition
|
||||
from openmensa_parsers.webspeiseplan_api import (
|
||||
WebspeiseplanAPI,
|
||||
WebspeiseplanData,
|
||||
)
|
||||
from openmensa_parsers.webspeiseplan_parser import WebspeiseplanParser
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
|
||||
|
||||
|
||||
class PotsdamParser(BaseOpenMensaParser):
|
||||
"""Parser for Studentenwerk Potsdam's Webspeiseplan source."""
|
||||
|
||||
id = "potsdam"
|
||||
BASE_URL = "https://swp.webspeiseplan.de"
|
||||
feed = FeedDefinition(source=BASE_URL)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api: WebspeiseplanAPI | None = None,
|
||||
parser: WebspeiseplanParser | None = None,
|
||||
) -> None:
|
||||
"""Initialize the Potsdam parser with fetch and parse helpers."""
|
||||
self.api = WebspeiseplanAPI(self.BASE_URL) if api is None else api
|
||||
self.parser = WebspeiseplanParser() if parser is None else parser
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def fetch(self) -> WebspeiseplanData:
|
||||
"""Download all data required by the Potsdam parser."""
|
||||
return self.api.fetch_all()
|
||||
|
||||
def parse(
|
||||
self,
|
||||
config: dict[str, Canteen],
|
||||
raw_data: WebspeiseplanData,
|
||||
) -> dict[str, CanteenXML]:
|
||||
"""Convert Potsdam Webspeiseplan data into canteen structures."""
|
||||
parsed: dict[str, CanteenXML] = {}
|
||||
for canteen_key, configured_canteen in config.items():
|
||||
source_name = configured_canteen.name
|
||||
if source_name not in raw_data.outlets:
|
||||
self.logger.warning("%s not found in keys", source_name)
|
||||
continue
|
||||
|
||||
outlet = dict(raw_data.outlets[source_name])
|
||||
menus = raw_data.menus[source_name]
|
||||
categories = raw_data.meal_categories[source_name]
|
||||
locations = raw_data.locations[source_name]
|
||||
outlet["isPublic"] = locations["isPublic"]
|
||||
|
||||
canteen = self.parser.parse_canteen_meta_times(outlet)
|
||||
for meal_data in self.parser.parse_meals(menus, categories):
|
||||
canteen.add_meal(**meal_data)
|
||||
parsed[canteen_key] = canteen
|
||||
return parsed
|
||||
@@ -0,0 +1,25 @@
|
||||
"""Registry for city/source parser implementations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from openmensa_parsers.parsers.base import OpenMensaParser
|
||||
from openmensa_parsers.parsers.potsdam import PotsdamParser
|
||||
|
||||
|
||||
PARSER_CLASSES: dict[str, type[OpenMensaParser]] = {
|
||||
PotsdamParser.id: PotsdamParser,
|
||||
}
|
||||
|
||||
|
||||
def get_parser_class(parser_id: str) -> type[OpenMensaParser]:
|
||||
try:
|
||||
return PARSER_CLASSES[parser_id]
|
||||
except KeyError as exc:
|
||||
configured = ", ".join(sorted(PARSER_CLASSES))
|
||||
raise KeyError(
|
||||
f"Unknown parser {parser_id!r}; configured parsers: {configured}"
|
||||
) from exc
|
||||
|
||||
|
||||
def create_parser(parser_id: str) -> OpenMensaParser:
|
||||
return get_parser_class(parser_id)()
|
||||
@@ -0,0 +1,79 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import urllib.parse
|
||||
|
||||
import cachetools as ct
|
||||
|
||||
from flask import Flask, jsonify, make_response, url_for
|
||||
from flask.logging import create_logger
|
||||
|
||||
from openmensa_parsers.config import read_canteen_config
|
||||
from openmensa_parsers.parsers import create_parser
|
||||
from openmensa_parsers.xml_types.builder import Builder
|
||||
|
||||
CACHE_TIMEOUT = 45 * 60
|
||||
PARSER_ID = os.environ.get("OM_PARSER_ID", "potsdam")
|
||||
|
||||
# pragma pylint: disable=invalid-name
|
||||
|
||||
app = Flask(__name__)
|
||||
app.url_map.strict_slashes = False
|
||||
cache = ct.TTLCache(maxsize=30, ttl=CACHE_TIMEOUT)
|
||||
config = read_canteen_config()
|
||||
log = create_logger(app)
|
||||
|
||||
if "BASE_URL" in os.environ: # pragma: no cover
|
||||
base_url = urllib.parse.urlparse(os.environ.get("BASE_URL"))
|
||||
if base_url.scheme:
|
||||
app.config["PREFERRED_URL_SCHEME"] = base_url.scheme
|
||||
if base_url.netloc:
|
||||
app.config["SERVER_NAME"] = base_url.netloc
|
||||
if base_url.path:
|
||||
app.config["APPLICATION_ROOT"] = base_url.path
|
||||
|
||||
|
||||
def canteen_not_found(canteen_name):
|
||||
log.warning("Canteen %s not found", canteen_name)
|
||||
configured = ", ".join(f"'{c}'" for c in config.keys())
|
||||
message = f"Canteen '{canteen_name}' not found, available: {configured}"
|
||||
return make_response(message, 404)
|
||||
|
||||
|
||||
@ct.cached(cache=cache)
|
||||
def update_builder():
|
||||
log.debug("Downloading menu using parser %s", PARSER_ID)
|
||||
return Builder(config, parser=create_parser(PARSER_ID))
|
||||
|
||||
|
||||
@app.route("/canteens/<canteen_name>")
|
||||
@app.route("/canteens/<canteen_name>/xml")
|
||||
def canteen_xml_feed(canteen_name):
|
||||
if canteen_name not in config:
|
||||
return canteen_not_found(canteen_name)
|
||||
|
||||
builder = update_builder()
|
||||
xml = builder.get_xml(canteen_name)
|
||||
response = make_response(xml)
|
||||
response.mimetype = "text/xml"
|
||||
return response
|
||||
|
||||
|
||||
@app.route("/")
|
||||
@app.route("/canteens")
|
||||
def canteen_index():
|
||||
return jsonify(
|
||||
{
|
||||
key: url_for("canteen_xml_feed", canteen_name=key, _external=True)
|
||||
for key in config
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/health_check")
|
||||
def health_check():
|
||||
return make_response("OK", 200)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run()
|
||||
@@ -0,0 +1,190 @@
|
||||
import logging
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import re
|
||||
import time
|
||||
import json
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WebspeiseplanData:
|
||||
"""Downloaded Webspeiseplan data grouped by outlet name."""
|
||||
|
||||
outlets: dict[str, dict]
|
||||
locations: dict[str, dict]
|
||||
menus: dict[str, dict]
|
||||
meal_categories: dict[str, dict]
|
||||
|
||||
|
||||
class WebspeiseplanAPI:
|
||||
"""Client for Webspeiseplan installations."""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def __init__(self, base_url: str):
|
||||
"""Initialize the web service client."""
|
||||
logging.basicConfig()
|
||||
self.base_url = base_url.rstrip("/")
|
||||
parsed_url = urllib.parse.urlparse(self.base_url)
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
raise ValueError(f"Invalid Webspeiseplan base URL: {base_url!r}")
|
||||
self.host = parsed_url.netloc
|
||||
|
||||
def fetch_all(self) -> WebspeiseplanData:
|
||||
"""Download all data required to render OpenMensa feeds."""
|
||||
proxy_token = self.parse_token()
|
||||
outlets = self.parse_outlets(proxy_token)
|
||||
locations = {
|
||||
item["id"]: item
|
||||
for item in self.parse_location(proxy_token)
|
||||
}
|
||||
menus: dict[str, dict] = {}
|
||||
meal_categories: dict[str, dict] = {}
|
||||
outlet_locations: dict[str, dict] = {}
|
||||
for outlet in outlets.values():
|
||||
location = outlet["standortID"]
|
||||
menu = self.parse_menu(proxy_token, location)
|
||||
categories = self.parse_meal_category(proxy_token, location)
|
||||
id2cat = {item["gerichtkategorieID"]: item for item in categories}
|
||||
menus[outlet["name"]] = menu
|
||||
meal_categories[outlet["name"]] = id2cat
|
||||
outlet_locations[outlet["name"]] = locations[location]
|
||||
return WebspeiseplanData(
|
||||
outlets=outlets,
|
||||
locations=outlet_locations,
|
||||
menus=menus,
|
||||
meal_categories=meal_categories,
|
||||
)
|
||||
|
||||
def __spoof_req_headers(self, req: urllib.request.Request):
|
||||
"""Add headers to a request .
|
||||
|
||||
Args:
|
||||
req (urllib.request.Request): [description]
|
||||
"""
|
||||
req.add_header(
|
||||
"Accept", "application/json, text/javascript, */*; q=0.01"
|
||||
)
|
||||
req.add_header("Accept-Language", "en-US,en;q=0.9")
|
||||
req.add_header("Connection", "keep-alive")
|
||||
req.add_header("Host", self.host)
|
||||
req.add_header("Referer", f"{self.base_url}/InitialConfig")
|
||||
req.add_header(
|
||||
"Sec-Ch-Ua",
|
||||
'"Not/A)Brand";v="99", '
|
||||
+ '"Google Chrome";v="115", '
|
||||
+ '"Chromium";v="115"',
|
||||
)
|
||||
req.add_header("Sec-Ch-Ua-Mobile", "?0")
|
||||
req.add_header("Sec-Ch-Ua-Platform", "Linux")
|
||||
req.add_header("Sec-Fetch-Dest", "empty")
|
||||
req.add_header("Sec-Fetch-Mode", "cors")
|
||||
req.add_header("Sec-Fetch-Site", "same-origin")
|
||||
req.add_header(
|
||||
"User-Agent",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
+ "Chrome/115.0.0.0 Safari/537.36",
|
||||
)
|
||||
req.add_header("X-Requested-With", "XMLHttpRequest")
|
||||
|
||||
def parse_model(self, params: dict):
|
||||
"""Retrieve data from host.
|
||||
|
||||
Args:
|
||||
params (dict): [description]
|
||||
|
||||
Returns:
|
||||
[type]: [description]
|
||||
"""
|
||||
query = urllib.parse.urlencode(params)
|
||||
url = f"{self.base_url}/index.php?{query}"
|
||||
WebspeiseplanAPI.logger.debug("__parse_model: %s", url)
|
||||
req = urllib.request.Request(url)
|
||||
self.__spoof_req_headers(req)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
data = resp.read()
|
||||
return json.loads(data)["content"]
|
||||
|
||||
def parse_token(self) -> str:
|
||||
"""Get the token from the proxy server."""
|
||||
req = urllib.request.Request(self.base_url)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
txt = resp.read().decode("utf-8")
|
||||
match = re.findall(r"/main.[0-9a-f]+.js", txt)
|
||||
|
||||
if match:
|
||||
match = match[0]
|
||||
else:
|
||||
# Development build made it to production, which does not produce
|
||||
# JS chunks with cache-busting filenames
|
||||
match = "/index.js"
|
||||
|
||||
WebspeiseplanAPI.logger.debug(
|
||||
"__parse_token: downloading script %s", match
|
||||
)
|
||||
script_url = urllib.parse.urljoin(f"{self.base_url}/", match)
|
||||
req = urllib.request.Request(script_url)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
txt = resp.read().decode("utf-8")
|
||||
proxy_token =\
|
||||
re.findall(r"PROXY_TOKEN:\s*[\"']([0-9a-f]+)[\"']", txt)[0]
|
||||
WebspeiseplanAPI.logger.debug(
|
||||
"__parse_token: PROXY_TOKEN %s", proxy_token
|
||||
)
|
||||
return proxy_token
|
||||
|
||||
def parse_outlets(self, proxy_token: str) -> dict[str, dict]:
|
||||
"""Get the outlets from the server."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "outlet",
|
||||
"location": "",
|
||||
"languagetype": "",
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
|
||||
outlets = {
|
||||
outlet["name"]: outlet for outlet in self.parse_model(params)
|
||||
}
|
||||
return outlets
|
||||
|
||||
def parse_menu(self, proxy_token: str, location: int) -> dict:
|
||||
"""Get the menu for a specific location."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "menu",
|
||||
"location": location,
|
||||
"languagetype": 1,
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
menu = self.parse_model(params)
|
||||
return menu
|
||||
|
||||
def parse_meal_category(
|
||||
self, proxy_token: str, location: int
|
||||
) -> list[dict]:
|
||||
"""Get the meal categories for a specific location."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "mealCategory",
|
||||
"location": location,
|
||||
"languagetype": 1,
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
categories = self.parse_model(params)
|
||||
return categories
|
||||
|
||||
def parse_location(self, proxy_token: str) -> list[dict]:
|
||||
"""Get the locations from the server."""
|
||||
params = {
|
||||
"token": proxy_token,
|
||||
"model": "location",
|
||||
"location": "",
|
||||
"languagetype": 1,
|
||||
"_": int(time.time() * 1000),
|
||||
}
|
||||
locations = self.parse_model(params)
|
||||
return locations
|
||||
@@ -0,0 +1,129 @@
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, date
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenMeta, CanteenXML
|
||||
from openmensa_parsers.xml_types.times_xml import CanteenOpenTimespec, TimesXML
|
||||
from openmensa_parsers.xml_types.meal_xml import MealXML
|
||||
|
||||
|
||||
EURO_PRICE_PATTERN = re.compile(r"(\d+(?:[,.]\d{1,2})?)\s*€")
|
||||
|
||||
|
||||
class WebspeiseplanParser:
|
||||
"""Parser for Webspeiseplan menu and outlet data."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Init WebspeiseplanParser object."""
|
||||
logging.basicConfig()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def parse_canteen_meta_times(self, outlet: dict):
|
||||
"""Parse the outlet data from outlet.
|
||||
|
||||
Args:
|
||||
outlet (dict): [description]
|
||||
"""
|
||||
self.logger.debug("parse_canteen_meta_times(): %s", outlet["name"])
|
||||
addr_info = outlet["addressInfo"]
|
||||
meta = {
|
||||
"name": outlet["name"],
|
||||
"address": f'{addr_info["street"]}, {addr_info["postalCode"]} '
|
||||
+ f'{addr_info["city"]}',
|
||||
"city": addr_info["city"],
|
||||
"phone": outlet["contactInfo"][0]["phone"],
|
||||
"email": outlet["contactInfo"][0]["email"],
|
||||
"availability": outlet["isPublic"]
|
||||
}
|
||||
|
||||
if outlet["positionInfo"]:
|
||||
meta["location"] = (
|
||||
outlet["positionInfo"]["longitude"],
|
||||
outlet["positionInfo"]["latitude"],
|
||||
)
|
||||
canteen_meta = CanteenMeta(**meta)
|
||||
weekday_dict = {
|
||||
# this approach only lists the first (valid) opening time,
|
||||
# since OpenMensa does not support multiple time specs
|
||||
# (yet).
|
||||
"monday": outlet['moZeit1'] or outlet['moZeit2'],
|
||||
"tuesday": outlet['diZeit1'] or outlet['diZeit2'],
|
||||
"wednesday": outlet['miZeit1'] or outlet['miZeit2'],
|
||||
"thursday": outlet['doZeit1'] or outlet['doZeit2'],
|
||||
"friday": outlet['frZeit1'] or outlet['frZeit2'],
|
||||
"saturday": outlet['saZeit1'] or outlet['saZeit2'],
|
||||
"sunday": outlet['soZeit1'] or outlet['soZeit2'],
|
||||
}
|
||||
|
||||
canteen_times = TimesXML({
|
||||
k: CanteenOpenTimespec(v) for k, v in weekday_dict.items()
|
||||
})
|
||||
canteen = CanteenXML(canteen_meta, canteen_times)
|
||||
return canteen
|
||||
|
||||
def _parse_price(self, value):
|
||||
if value in (None, "", {}):
|
||||
return None
|
||||
return float(str(value).replace(",", "."))
|
||||
|
||||
def _parse_embedded_prices(
|
||||
self, name: str, price: dict[str, float | None]
|
||||
) -> tuple[str, dict[str, float | None]]:
|
||||
if any(price.values()):
|
||||
return name, price
|
||||
|
||||
matches = EURO_PRICE_PATTERN.findall(name)
|
||||
if len(matches) < 2:
|
||||
return name, price
|
||||
|
||||
parsed = [self._parse_price(match) for match in matches]
|
||||
if len(parsed) >= 3:
|
||||
price = {
|
||||
"student": parsed[0],
|
||||
"employee": parsed[1],
|
||||
"other": parsed[2],
|
||||
}
|
||||
elif "Stud" in name and ("Gäste" in name or "Gaeste" in name):
|
||||
price = {
|
||||
"student": parsed[0],
|
||||
"employee": price["employee"],
|
||||
"other": parsed[1],
|
||||
}
|
||||
else:
|
||||
return name, price
|
||||
|
||||
name = EURO_PRICE_PATTERN.sub("", name)
|
||||
name = re.sub(r"\s*/\s*", " ", name)
|
||||
name = re.sub(r"\s+", " ", name).strip()
|
||||
return name, price
|
||||
|
||||
def parse_meals(
|
||||
self, menu_data, meal_categories
|
||||
) -> list[tuple[date, str, MealXML]]:
|
||||
"""Parse the menu and adds it to the builder."""
|
||||
meals = []
|
||||
for menu in menu_data:
|
||||
for meal_data in menu["speiseplanGerichtData"]:
|
||||
info = meal_data["speiseplanAdvancedGericht"]
|
||||
additional_info = meal_data["zusatzinformationen"]
|
||||
price = {
|
||||
"student": self._parse_price(
|
||||
additional_info["mitarbeiterpreisDecimal2"]
|
||||
),
|
||||
"employee": self._parse_price(
|
||||
additional_info["price3Decimal2"]
|
||||
),
|
||||
"other": self._parse_price(
|
||||
additional_info["gaestepreisDecimal2"]
|
||||
),
|
||||
}
|
||||
name, price = self._parse_embedded_prices(
|
||||
info["gerichtname"], price
|
||||
)
|
||||
meal = MealXML(name=name, price=price)
|
||||
day = datetime.fromisoformat(info["datum"]).date()
|
||||
category = meal_categories[info["gerichtkategorieID"]]["name"]
|
||||
meals.append(
|
||||
{"day": day, "category": category, "meal": meal}
|
||||
)
|
||||
self.logger.debug("parse_meals(): %s meals parsed", len(meals))
|
||||
return meals
|
||||
@@ -0,0 +1,54 @@
|
||||
from xml.dom import minidom
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
from typing import Any
|
||||
from flask import url_for
|
||||
from openmensa_parsers.xml_types.openmensa_xml import OpenMensaXML
|
||||
from openmensa_parsers.config import Canteen
|
||||
from openmensa_parsers.parsers.base import OpenMensaParser
|
||||
from openmensa_parsers.parsers.potsdam import PotsdamParser
|
||||
|
||||
|
||||
@dataclass
|
||||
class Builder:
|
||||
"""A class method for creating a new OpenMensa Feed."""
|
||||
|
||||
VERSION = "2.0.1"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: dict[str, Canteen],
|
||||
source_data: Any | None = None,
|
||||
parser: OpenMensaParser | None = None,
|
||||
):
|
||||
"""Initialize the object for the OpenMensa Feed Doc XML."""
|
||||
logging.basicConfig()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._xml_data = {}
|
||||
self.parser = PotsdamParser() if parser is None else parser
|
||||
raw_data = self.parser.fetch() if source_data is None else source_data
|
||||
for cname, canteen in self.parser.parse(config, raw_data).items():
|
||||
feed = self.__create_feed(config[cname])
|
||||
canteen.add_feed(feed)
|
||||
self._xml_data[cname] = OpenMensaXML(self.VERSION, canteen)
|
||||
|
||||
def __create_feed(self, ntup: Canteen):
|
||||
return self.parser.create_feed(
|
||||
ntup,
|
||||
url_for(
|
||||
"canteen_xml_feed",
|
||||
canteen_name=ntup.key,
|
||||
_external=True,
|
||||
),
|
||||
)
|
||||
|
||||
def get_xml(self, canteen_name: str):
|
||||
"""Return a XML string representing the canteen.
|
||||
|
||||
Returns:
|
||||
[type]: [description]
|
||||
"""
|
||||
doc = minidom.Document()
|
||||
xml_element = self._xml_data[canteen_name].xml_element(doc)
|
||||
doc.appendChild(xml_element)
|
||||
return doc.toprettyxml(encoding="UTF-8")
|
||||
@@ -0,0 +1,159 @@
|
||||
from dataclasses import dataclass
|
||||
from xml.dom import minidom
|
||||
from datetime import date
|
||||
from openmensa_parsers.xml_types.times_xml import TimesXML
|
||||
from openmensa_parsers.xml_types.meal_xml import MealXML
|
||||
from openmensa_parsers.xml_types.feed_xml import FeedXML
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanteenMeta:
|
||||
"""Metadata for CanteenXML."""
|
||||
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
def __init__(self, **kwargs):
|
||||
"""Init CanteenMeta object."""
|
||||
self.name: str = kwargs["name"]
|
||||
self.address: str = kwargs["address"]
|
||||
self.city: str = kwargs["city"]
|
||||
self.phone: str = kwargs["phone"]
|
||||
self.email = kwargs["email"]
|
||||
self.location: tuple[float, float] = kwargs.get("location", None)
|
||||
self.availability: str = kwargs.get("availability", "public")
|
||||
|
||||
@property
|
||||
def availability(self) -> str:
|
||||
"""Whether the canteen is public or restricted.
|
||||
|
||||
Returns:
|
||||
str: 'public' | 'restricted'
|
||||
"""
|
||||
return self._availability
|
||||
|
||||
@availability.setter
|
||||
def availability(self, value: str):
|
||||
if value is True:
|
||||
self._availability = "public"
|
||||
elif value is False:
|
||||
self._availability = "restricted"
|
||||
elif value in ("public", "restricted"):
|
||||
self._availability = value
|
||||
else:
|
||||
raise ValueError("only 'public' or 'restricted' are allowed.")
|
||||
|
||||
@availability.deleter
|
||||
def availability(self):
|
||||
del self._availability
|
||||
|
||||
|
||||
@dataclass
|
||||
class CanteenXML:
|
||||
"""Represents the canteen tag in openMensaFeedv2."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
canteen_meta: CanteenMeta,
|
||||
times: TimesXML,
|
||||
feeds: dict[str, FeedXML] = None,
|
||||
days: dict[date, dict[str, list[MealXML]]] = None,
|
||||
):
|
||||
"""Init CanteenXML Object.
|
||||
|
||||
Args:
|
||||
name (str): _description_
|
||||
address (str): _description_
|
||||
city (str): _description_
|
||||
phone (str): _description_
|
||||
email (str): _description_
|
||||
location (tuple[float, float]): _description_
|
||||
availability (str): _description_
|
||||
times (TimesXML): _description_
|
||||
"""
|
||||
self.canteen_meta = canteen_meta
|
||||
self.times = times
|
||||
self.feeds = {} if feeds is None else feeds
|
||||
self.days = {} if days is None else days
|
||||
|
||||
def __create_node(self, doc: minidom.Document, tag: str, value: str):
|
||||
elem = doc.createElement(tag)
|
||||
txt_node = doc.createTextNode(value)
|
||||
elem.appendChild(txt_node)
|
||||
return elem
|
||||
|
||||
def __append_meta(self, doc: minidom.Document, canteen: minidom.Element):
|
||||
name = self.__create_node(doc, "name", self.canteen_meta.name)
|
||||
canteen.appendChild(name)
|
||||
address = self.__create_node(doc, "address", self.canteen_meta.address)
|
||||
canteen.appendChild(address)
|
||||
city = self.__create_node(doc, "city", self.canteen_meta.city)
|
||||
canteen.appendChild(city)
|
||||
phone = self.__create_node(doc, "phone", self.canteen_meta.phone)
|
||||
canteen.appendChild(phone)
|
||||
email = self.__create_node(doc, "email", self.canteen_meta.email)
|
||||
canteen.appendChild(email)
|
||||
if self.canteen_meta.location:
|
||||
location = doc.createElement("location")
|
||||
location.setAttribute(
|
||||
"longitude", str(self.canteen_meta.location[0])
|
||||
)
|
||||
location.setAttribute(
|
||||
"latitude", str(self.canteen_meta.location[1])
|
||||
)
|
||||
canteen.appendChild(location)
|
||||
availability = self.__create_node(
|
||||
doc, "availability", self.canteen_meta.availability
|
||||
)
|
||||
canteen.appendChild(availability)
|
||||
times = self.times.xml_element(doc)
|
||||
canteen.appendChild(times)
|
||||
|
||||
def add_feed(self, feed: FeedXML):
|
||||
"""Add a feed to the canteen.
|
||||
|
||||
Args:
|
||||
feed (FeedXML): _description_
|
||||
"""
|
||||
self.feeds[feed.name] = feed
|
||||
|
||||
def add_meal(self, day: date, category: str, meal: MealXML):
|
||||
"""Add a meal to the canteen.
|
||||
|
||||
Args:
|
||||
day (date): Offered date of meal.
|
||||
catrgory (str): Meal's category.
|
||||
meal (MealXML): The meal item.
|
||||
"""
|
||||
categories = self.days.get(day, {})
|
||||
if not categories:
|
||||
self.days[day] = categories
|
||||
meals = categories.get(category, [])
|
||||
if not meals:
|
||||
categories[category] = meals
|
||||
meals.append(meal)
|
||||
|
||||
def xml_element(self, doc: minidom.Document):
|
||||
"""Return the XML representation.
|
||||
|
||||
Args:
|
||||
doc (minidom.Document): Working XML document
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
canteen = doc.createElement("canteen")
|
||||
self.__append_meta(doc, canteen)
|
||||
for feed_item in self.feeds.values():
|
||||
feed = feed_item.xml_element(doc)
|
||||
canteen.appendChild(feed)
|
||||
|
||||
for day_data, categories in self.days.items():
|
||||
day = doc.createElement("day")
|
||||
day.setAttribute("date", str(day_data))
|
||||
for category_name, meals in categories.items():
|
||||
category = doc.createElement("category")
|
||||
category.setAttribute("name", category_name)
|
||||
for meal in meals:
|
||||
category.appendChild(meal.xml_element(doc))
|
||||
day.appendChild(category)
|
||||
canteen.appendChild(day)
|
||||
return canteen
|
||||
@@ -0,0 +1,82 @@
|
||||
from dataclasses import dataclass
|
||||
from xml.dom import minidom
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScheduleXML:
|
||||
"""Represents the schedule inside the feed tag in openMensaFeedv2."""
|
||||
|
||||
def __init__(self, hour: str, **kwargs):
|
||||
"""Init ScheduleXML object.
|
||||
|
||||
Args:
|
||||
hour (str): _description_
|
||||
day_of_month (str, optional): _description_. Defaults to "*".
|
||||
day_of_week (str, optional): _description_. Defaults to "*".
|
||||
month (str, optional): _description_. Defaults to "*".
|
||||
minute (int, optional): _description_. Defaults to 0.
|
||||
retry (str, optional): _description_. Defaults to None.
|
||||
"""
|
||||
self.hour = hour
|
||||
self.day_of_month = kwargs.get("day_of_month", "*")
|
||||
self.day_of_week = kwargs.get("day_of_week", "*")
|
||||
self.month = kwargs.get("month", "*")
|
||||
self.minute = kwargs.get("minute", 0)
|
||||
self.retry = kwargs.get("retry", None)
|
||||
|
||||
def xml_element(self, doc: minidom.Document):
|
||||
"""Return the XML representaion.
|
||||
|
||||
Args:
|
||||
doc (minidom.Document): Working XML document.
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
schedule = doc.createElement("schedule")
|
||||
schedule.setAttribute("dayOfMonth", self.day_of_month)
|
||||
schedule.setAttribute("dayOfWeek", self.day_of_week)
|
||||
schedule.setAttribute("month", self.month)
|
||||
schedule.setAttribute("hour", self.hour)
|
||||
schedule.setAttribute("minute", str(self.minute))
|
||||
if self.retry:
|
||||
schedule.setAttribute("retry", self.retry)
|
||||
return schedule
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeedXML:
|
||||
"""Represents the feed tag in openMensaFeedv2."""
|
||||
|
||||
name: str
|
||||
source: str
|
||||
url: str
|
||||
schedule: ScheduleXML
|
||||
priority: int = 0
|
||||
|
||||
def xml_element(self, doc: minidom.Document):
|
||||
"""Return the XML representaion.
|
||||
|
||||
Args:
|
||||
doc (minidom.Document): Working XML document.
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
feed = doc.createElement("feed")
|
||||
feed.setAttribute("name", self.name)
|
||||
feed.setAttribute("priority", str(self.priority))
|
||||
|
||||
schedule = self.schedule.xml_element(doc)
|
||||
feed.appendChild(schedule)
|
||||
|
||||
url = doc.createElement("url")
|
||||
txt_node = doc.createTextNode(self.url)
|
||||
url.appendChild(txt_node)
|
||||
feed.appendChild(url)
|
||||
|
||||
source = doc.createElement("source")
|
||||
txt_node = doc.createTextNode(self.source)
|
||||
source.appendChild(txt_node)
|
||||
feed.appendChild(source)
|
||||
return feed
|
||||
@@ -0,0 +1,48 @@
|
||||
from xml.dom import minidom
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class MealXML:
|
||||
"""Represents the meal tag in openMensaFeedv2."""
|
||||
|
||||
def __init__(self, name: str, price: dict[str, float], note: str = None):
|
||||
"""Init MealXML object.
|
||||
|
||||
Args:
|
||||
name (str): name of the meal
|
||||
note (str): additional information
|
||||
price (dict[str, float]): prices for student, employee and other
|
||||
"""
|
||||
self.name = name
|
||||
self.note = note
|
||||
self.price = price
|
||||
|
||||
def xml_element(self, doc: minidom.Document):
|
||||
"""Return the xml tag.
|
||||
|
||||
Args:
|
||||
doc (minidom.Document): Working XML documnet.
|
||||
"""
|
||||
meal = doc.createElement("meal")
|
||||
name = doc.createElement("name")
|
||||
txt_node = doc.createTextNode(self.name)
|
||||
name.appendChild(txt_node)
|
||||
meal.appendChild(name)
|
||||
if self.note is not None:
|
||||
note = doc.createElement("note")
|
||||
txt_node = doc.createTextNode(self.note)
|
||||
note.appendChild(txt_node)
|
||||
meal.appendChild(note)
|
||||
|
||||
for key, val in self.price.items():
|
||||
if key not in ("student", "employee", "other"):
|
||||
continue
|
||||
price = doc.createElement("price")
|
||||
price.setAttribute("role", key)
|
||||
if val is None:
|
||||
val = 0.0
|
||||
txt_node = doc.createTextNode(f"{val:.2f}")
|
||||
price.appendChild(txt_node)
|
||||
meal.appendChild(price)
|
||||
return meal
|
||||
@@ -0,0 +1,51 @@
|
||||
from xml.dom import minidom
|
||||
from dataclasses import dataclass
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenMensaXML:
|
||||
"""Represents the openmensa tag in openMensaFeedv2."""
|
||||
|
||||
def __init__(self, version: str, canteen: CanteenXML):
|
||||
"""Init OpenMensaXML.
|
||||
|
||||
Args:
|
||||
version (str): Parser version
|
||||
canteen (CanteenXML): _description_
|
||||
"""
|
||||
self.version = version
|
||||
self.canteen = canteen
|
||||
|
||||
def __create_version_node(self, doc: minidom.Document):
|
||||
elem = doc.createElement("version")
|
||||
txt_node = doc.createTextNode(self.version)
|
||||
elem.appendChild(txt_node)
|
||||
return elem
|
||||
|
||||
def xml_element(self, doc: minidom.Document):
|
||||
"""Create openmensa XML tag.
|
||||
|
||||
Args:
|
||||
doc (minidom.Document): Working XML document
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
open_mensa = doc.createElement("openmensa")
|
||||
open_mensa.setAttribute("version", "2.1")
|
||||
open_mensa.setAttribute("xmlns", "http://openmensa.org/open-mensa-v2")
|
||||
open_mensa.setAttribute(
|
||||
"xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"
|
||||
)
|
||||
open_mensa.setAttribute(
|
||||
"xsi:schemaLocation",
|
||||
"http://openmensa.org/open-mensa-v2 "
|
||||
+ "http://openmensa.org/open-mensa-v2.xsd",
|
||||
)
|
||||
|
||||
version = self.__create_version_node(doc)
|
||||
open_mensa.appendChild(version)
|
||||
canteen = self.canteen.xml_element(doc)
|
||||
open_mensa.appendChild(canteen)
|
||||
return open_mensa
|
||||
@@ -0,0 +1,111 @@
|
||||
import re
|
||||
|
||||
from dataclasses import dataclass
|
||||
from xml.dom import minidom
|
||||
|
||||
|
||||
class CanteenOpenTimespec(str):
|
||||
"""Represents valid daily opening times in openMensaFeedv2."""
|
||||
|
||||
CLOSED = "geschlossen"
|
||||
CLOSED_VALID_VALUES = {
|
||||
CLOSED,
|
||||
None,
|
||||
False,
|
||||
"",
|
||||
}
|
||||
|
||||
PATTERN = (r'.*(?P<hour1>\d{1,2}):(?P<min1>\d{1,2})'
|
||||
r'\D*(?P<hour2>\d{1,2}):(?P<min2>\d{1,2}).*')
|
||||
|
||||
MATCHER = re.compile(PATTERN)
|
||||
|
||||
def __new__(cls, spec):
|
||||
"""Create CanteenOpenTimespec object.
|
||||
|
||||
Args:
|
||||
spec (str | bool | None): time specification
|
||||
"""
|
||||
if isinstance(spec, str):
|
||||
spec = spec.strip()
|
||||
normalized = spec.lower()
|
||||
if normalized in (cls.CLOSED, "geschllossen"):
|
||||
spec = cls.CLOSED
|
||||
if spec in cls.CLOSED_VALID_VALUES:
|
||||
return super().__new__(cls, cls.CLOSED)
|
||||
|
||||
match = cls.MATCHER.match(str(spec))
|
||||
if not match:
|
||||
raise ValueError(f'Invalid time specification: {spec!r} does'
|
||||
f' not conform to regex {cls.PATTERN!r}')
|
||||
# parse to int for format zerofill
|
||||
int_spec = {k: int(v) for k, v in match.groupdict().items()}
|
||||
clean_spec = (
|
||||
f'{int_spec["hour1"]:02}:{int_spec["min1"]:02}-'
|
||||
f'{int_spec["hour2"]:02}:{int_spec["min2"]:02}'
|
||||
)
|
||||
return super().__new__(cls, clean_spec)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimesXML:
|
||||
"""Represents the times tag in openMensaFeedv2."""
|
||||
|
||||
VALID_DAYS = (
|
||||
"monday",
|
||||
"tuesday",
|
||||
"wednesday",
|
||||
"thursday",
|
||||
"friday",
|
||||
"saturday",
|
||||
"sunday",
|
||||
)
|
||||
|
||||
monday: str
|
||||
tuesday: str
|
||||
wednesday: str
|
||||
thursday: str
|
||||
friday: str
|
||||
saturday: str
|
||||
sunday: str
|
||||
|
||||
def __init__(self, weekday_dict: dict[str, CanteenOpenTimespec] = None):
|
||||
"""Init TimesXML object.
|
||||
|
||||
Args:
|
||||
weekday_dict (dict[str, str]): _description_
|
||||
"""
|
||||
for key in weekday_dict:
|
||||
if key in self.VALID_DAYS:
|
||||
setattr(self, key, weekday_dict[key])
|
||||
else:
|
||||
raise KeyError()
|
||||
|
||||
def __create_node(self,
|
||||
doc: minidom.Document,
|
||||
tag: str,
|
||||
value: CanteenOpenTimespec):
|
||||
elem = doc.createElement(tag)
|
||||
if value in CanteenOpenTimespec.CLOSED_VALID_VALUES:
|
||||
elem.setAttribute("closed", "true")
|
||||
else:
|
||||
elem.setAttribute("open", value)
|
||||
return elem
|
||||
|
||||
def xml_element(self, doc: minidom.Document):
|
||||
"""Return the XML representation.
|
||||
|
||||
Args:
|
||||
doc (minidom.Document): Working XML document
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
times = doc.createElement("times")
|
||||
times.setAttribute("type", "opening")
|
||||
|
||||
for day in self.VALID_DAYS:
|
||||
day_node = self.__create_node(doc, day, getattr(self, day))
|
||||
times.appendChild(day_node)
|
||||
|
||||
return times
|
||||
Reference in New Issue
Block a user