modular parser framework
This commit is contained in:
+2
-2
@@ -1,3 +1,3 @@
|
||||
PUBLIC_HOST=menus.example.org
|
||||
BASE_URL=https://menus.example.org
|
||||
PARSER_HOST=menus.example.org
|
||||
PARSER_URL=https://menus.example.org
|
||||
GITEA_HOST=gitea.example.org
|
||||
|
||||
+3
-3
@@ -1,4 +1,4 @@
|
||||
ARG DEPLOY_DIR=/opt/om-parser-stw-potsdam-v2
|
||||
ARG DEPLOY_DIR=/opt/openmensa-parsers
|
||||
ARG USERNAME=flaskd
|
||||
ARG LISTEN_PORT=3080
|
||||
|
||||
@@ -24,7 +24,7 @@ ENV PIPENV_VENV_IN_PROJECT=1
|
||||
WORKDIR ${DEPLOY_DIR}
|
||||
|
||||
# Copy app folder contents
|
||||
COPY stw_potsdam/ ./stw_potsdam
|
||||
COPY openmensa_parsers/ ./openmensa_parsers
|
||||
COPY tests ./tests
|
||||
COPY Makefile .
|
||||
COPY README.md .
|
||||
@@ -83,6 +83,6 @@ ENV LISTEN_PORT=${LISTEN_PORT}
|
||||
ENV LISTEN=0.0.0.0:${LISTEN_PORT}
|
||||
|
||||
EXPOSE ${LISTEN_PORT}
|
||||
CMD ["sh", "-c", "uv run uwsgi --master --http11-socket \"$LISTEN\" --plugins python --protocol uwsgi --wsgi stw_potsdam.views:app --virtualenv ./.venv"]
|
||||
CMD ["sh", "-c", "uv run uwsgi --master --http11-socket \"$LISTEN\" --plugins python --protocol uwsgi --wsgi openmensa_parsers.views:app --virtualenv ./.venv"]
|
||||
|
||||
HEALTHCHECK --interval=15s --timeout=3s CMD curl -f http://127.0.0.1:$LISTEN_PORT/health_check || exit 1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
RUN=uv run flask --app stw_potsdam.views run
|
||||
RUN=uv run flask --app openmensa_parsers.views run
|
||||
|
||||
dependencies:
|
||||
uv sync --extra dev
|
||||
@@ -7,10 +7,10 @@ run:
|
||||
$(RUN)
|
||||
|
||||
debug:
|
||||
uv run flask --app stw_potsdam.views --debug run
|
||||
uv run flask --app openmensa_parsers.views --debug run
|
||||
|
||||
test:
|
||||
uv run --extra dev python -m pytest -vv --cov-branch --cov stw_potsdam --cov-report term --cov-report html
|
||||
uv run --extra dev python -m pytest -vv --cov-branch --cov openmensa_parsers --cov-report term --cov-report html
|
||||
|
||||
test_debug:
|
||||
uv run --extra dev python -m pytest -v --trace
|
||||
@@ -22,9 +22,9 @@ coverage_report:
|
||||
uv run --extra dev python -m coverage report --fail-under 90
|
||||
|
||||
lint:
|
||||
uv run --extra dev pycodestyle stw_potsdam tests
|
||||
uv run --extra dev pydocstyle stw_potsdam tests
|
||||
uv run --extra dev pylint stw_potsdam tests
|
||||
uv run --extra dev pycodestyle openmensa_parsers tests
|
||||
uv run --extra dev pydocstyle openmensa_parsers tests
|
||||
uv run --extra dev pylint openmensa_parsers tests
|
||||
|
||||
clean:
|
||||
uv run --extra dev python -m coverage erase
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# OpenMensa Parser STW Potsdam
|
||||
# OpenMensa Parsers
|
||||
|
||||
[](https://dl.circleci.com/status-badge/redirect/gh/f4lco/om-parser-stw-potsdam-v2/tree/master)
|
||||
[](https://coveralls.io/github/f4lco/om-parser-stw-potsdam-v2?branch=master)
|
||||
[](https://om-parser-stw-potsdam-v2.readthedocs.io/en/latest/)
|
||||
|
||||
[OpenMensa][om] parser components query canteen websites for menus and transform them into OpenMensa's data format.
|
||||
This project came to life after the website of the canteens of the Studentenwerk Potsdam changed, and is therefore the successor to [kaifabian/om-parser-potsdam][prev-parser] (hence the "-v2").
|
||||
The default parser currently supports Studentenwerk Potsdam. The package is structured so additional city or provider parsers can be added behind the same OpenMensa XML renderer.
|
||||
|
||||
Among others, OpenMensa powers the popular [Mensa Uni Potsdam][steppschuh] Android app.
|
||||
|
||||
@@ -19,14 +19,14 @@ Recommended: Python 3.12+.
|
||||
|
||||
$ uv venv
|
||||
$ uv pip install -e ".[dev]"
|
||||
$ uv run flask --app stw_potsdam.views run
|
||||
$ uv run flask --app openmensa_parsers.views run
|
||||
|
||||
**Option B (venv + pip)** ::
|
||||
|
||||
$ python -m venv .venv
|
||||
$ . .venv/bin/activate
|
||||
$ pip install -e ".[dev]"
|
||||
$ FLASK_APP=stw_potsdam.views flask run
|
||||
$ FLASK_APP=openmensa_parsers.views flask run
|
||||
|
||||
**Contributions** are always welcome, in particular if the response format of the canteens change. Feel free to file a PR with improvements.
|
||||
|
||||
|
||||
+2
-2
@@ -3,7 +3,7 @@ services:
|
||||
build: .
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
BASE_URL: ${BASE_URL}
|
||||
BASE_URL: ${PARSER_URL}
|
||||
LISTEN_PORT: 3080
|
||||
expose:
|
||||
- "3080"
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
- "443:443"
|
||||
- "443:443/udp"
|
||||
environment:
|
||||
PUBLIC_HOST: ${PUBLIC_HOST}
|
||||
PARSER_HOST: ${PARSER_HOST}
|
||||
GITEA_HOST: ${GITEA_HOST}
|
||||
volumes:
|
||||
- ./Caddyfile:/etc/caddy/Caddyfile:ro
|
||||
|
||||
+6
-6
@@ -20,7 +20,7 @@ sys.path.insert(0, os.path.abspath('..'))
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = u'OpenMensa Potsdam'
|
||||
project = u'OpenMensa Parsers'
|
||||
copyright = u'2019, f4lco'
|
||||
author = u'f4lco'
|
||||
|
||||
@@ -123,7 +123,7 @@ html_sidebars = {
|
||||
# -- Options for HTMLHelp output ---------------------------------------------
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'OpenMensaPotsdamdoc'
|
||||
htmlhelp_basename = 'OpenMensaParsersdoc'
|
||||
|
||||
|
||||
# -- Options for LaTeX output ------------------------------------------------
|
||||
@@ -150,7 +150,7 @@ latex_elements = {
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
(master_doc, 'OpenMensaPotsdam.tex', u'OpenMensa Potsdam Documentation',
|
||||
(master_doc, 'OpenMensaParsers.tex', u'OpenMensa Parsers Documentation',
|
||||
u'f4lco', 'manual'),
|
||||
]
|
||||
|
||||
@@ -160,7 +160,7 @@ latex_documents = [
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
(master_doc, 'openmensapotsdam', u'OpenMensa Potsdam Documentation',
|
||||
(master_doc, 'openmensaparsers', u'OpenMensa Parsers Documentation',
|
||||
[author], 1)
|
||||
]
|
||||
|
||||
@@ -171,8 +171,8 @@ man_pages = [
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
(master_doc, 'OpenMensaPotsdam', u'OpenMensa Potsdam Documentation',
|
||||
author, 'OpenMensaPotsdam', 'One line description of project.',
|
||||
(master_doc, 'OpenMensaParsers', u'OpenMensa Parsers Documentation',
|
||||
author, 'OpenMensaParsers', 'One line description of project.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
|
||||
+39
-7
@@ -25,19 +25,53 @@ Main Module Entry Points
|
||||
In the following the main workflow of this parser is explained.
|
||||
Generating a new `OpenMensa` feed starts by reading the configured canteens. Some canteen data, such as ID, name, and location, are currently not scraped. Doing so would be very brittle and involve a multistep process. Refer to the :ref:`cache_hash` for deeper insight into the obstacles.
|
||||
|
||||
.. autofunction:: stw_potsdam.config.read_canteen_config
|
||||
.. autofunction:: openmensa_parsers.config.read_canteen_config
|
||||
|
||||
.. autoclass:: stw_potsdam.config.Canteen
|
||||
.. autoclass:: openmensa_parsers.config.Canteen
|
||||
|
||||
Use the canteen data to select matching upstream outlets, download the required menu JSON, and render the OpenMensa XML.
|
||||
|
||||
.. autoclass:: stw_potsdam.swp_webspeiseplan_api.SWPWebspeiseplanAPI
|
||||
Parser Providers
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: stw_potsdam.swp_webspeiseplan_parser.SWPWebspeiseplanParser
|
||||
The application is structured around parser providers. A provider owns the
|
||||
source-specific work: fetching raw upstream data and converting it into the
|
||||
shared OpenMensa XML structures. The ``Builder`` only asks a provider for
|
||||
canteens, attaches feed metadata, and renders XML.
|
||||
|
||||
New cities or data sources should add a parser under ``openmensa_parsers.parsers``.
|
||||
The parser should implement three methods:
|
||||
|
||||
``fetch()``
|
||||
Download or load the raw source data.
|
||||
|
||||
``parse(config, raw_data)``
|
||||
Convert raw data into a ``dict[str, CanteenXML]`` keyed by the configured
|
||||
canteen key.
|
||||
|
||||
``create_feed(canteen, url)``
|
||||
Return the feed metadata for one canteen. In most cases, subclass
|
||||
``BaseOpenMensaParser`` and configure ``feed`` instead of overriding this
|
||||
method.
|
||||
|
||||
Register the parser in ``openmensa_parsers.parsers.registry``. At runtime, select a
|
||||
parser with ``OM_PARSER_ID``. The default is ``potsdam``.
|
||||
|
||||
Parser tests should keep network access separate from parsing. Store raw
|
||||
fixtures in the test suite, pass them directly into ``parse()``, and reserve
|
||||
live source checks for opt-in tests.
|
||||
|
||||
.. autoclass:: openmensa_parsers.webspeiseplan_api.WebspeiseplanAPI
|
||||
|
||||
.. autoclass:: openmensa_parsers.webspeiseplan_parser.WebspeiseplanParser
|
||||
|
||||
.. autoclass:: openmensa_parsers.parsers.base.BaseOpenMensaParser
|
||||
|
||||
.. autoclass:: openmensa_parsers.parsers.potsdam.PotsdamParser
|
||||
|
||||
The XML type modules contain the OpenMensa rendering objects:
|
||||
|
||||
.. autoclass:: stw_potsdam.xml_types.builder.Builder
|
||||
.. autoclass:: openmensa_parsers.xml_types.builder.Builder
|
||||
|
||||
Tests
|
||||
~~~~~
|
||||
@@ -55,5 +89,3 @@ Test execution works as follows: ::
|
||||
|
||||
The first invocation runs tests whose outcome can solely be determined by the test suite, which makes them suitable for frequent execution and CI systems.
|
||||
Setting the environment variable ``ENABLE_API_QUERY`` enables tests which require querying the canteen API. Because third-party services are queried, those are more suited to manual execution. Developers can quickly check if their change is applicable to today's menu.
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
Welcome to OpenMensa Parser Potsdam's documentation!
|
||||
Welcome to OpenMensa Parsers' documentation!
|
||||
====================================================
|
||||
|
||||
An OpenMensa parser retrieves canteen menus and renders them in a commonly understood format.
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Parser/provider implementations for OpenMensa feed sources."""
|
||||
|
||||
from openmensa_parsers.parsers.registry import create_parser, get_parser_class
|
||||
|
||||
__all__ = ["create_parser", "get_parser_class"]
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Shared parser contract for city-specific OpenMensa parsers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Protocol
|
||||
|
||||
from openmensa_parsers.config import Canteen
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
|
||||
from openmensa_parsers.xml_types.feed_xml import FeedXML, ScheduleXML
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FeedDefinition:
|
||||
"""Default feed metadata used when publishing a parser result."""
|
||||
|
||||
source: str
|
||||
name: str = "full"
|
||||
priority: int = 0
|
||||
schedule: dict[str, Any] = field(
|
||||
default_factory=lambda: {"hour": "8-14", "retry": "30 1"}
|
||||
)
|
||||
|
||||
|
||||
class OpenMensaParser(Protocol):
|
||||
"""Contract implemented by each city/source parser."""
|
||||
|
||||
id: str
|
||||
feed: FeedDefinition
|
||||
|
||||
def fetch(self) -> Any:
|
||||
"""Download or load source-specific raw data."""
|
||||
|
||||
def parse(
|
||||
self,
|
||||
config: dict[str, Canteen],
|
||||
raw_data: Any,
|
||||
) -> dict[str, CanteenXML]:
|
||||
"""Convert raw source data into OpenMensa canteen structures."""
|
||||
|
||||
def create_feed(self, canteen: Canteen, url: str) -> FeedXML:
|
||||
"""Build the OpenMensa feed metadata for one configured canteen."""
|
||||
|
||||
|
||||
class BaseOpenMensaParser: # pylint: disable=too-few-public-methods
|
||||
"""Base helper for parsers that use the standard OpenMensa feed block."""
|
||||
|
||||
id = "base"
|
||||
feed: FeedDefinition
|
||||
|
||||
def create_feed(self, _canteen: Canteen, url: str) -> FeedXML:
|
||||
"""Create a standard feed tag for a configured canteen."""
|
||||
schedule_data = dict(self.feed.schedule)
|
||||
schedule = ScheduleXML(**schedule_data)
|
||||
return FeedXML(
|
||||
name=self.feed.name,
|
||||
priority=self.feed.priority,
|
||||
source=self.feed.source,
|
||||
url=url,
|
||||
schedule=schedule,
|
||||
)
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Potsdam parser/provider implementation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from openmensa_parsers.config import Canteen
|
||||
from openmensa_parsers.parsers.base import BaseOpenMensaParser, FeedDefinition
|
||||
from openmensa_parsers.webspeiseplan_api import (
|
||||
WebspeiseplanAPI,
|
||||
WebspeiseplanData,
|
||||
)
|
||||
from openmensa_parsers.webspeiseplan_parser import WebspeiseplanParser
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
|
||||
|
||||
|
||||
class PotsdamParser(BaseOpenMensaParser):
|
||||
"""Parser for Studentenwerk Potsdam's Webspeiseplan source."""
|
||||
|
||||
id = "potsdam"
|
||||
BASE_URL = "https://swp.webspeiseplan.de"
|
||||
feed = FeedDefinition(source=BASE_URL)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api: WebspeiseplanAPI | None = None,
|
||||
parser: WebspeiseplanParser | None = None,
|
||||
) -> None:
|
||||
"""Initialize the Potsdam parser with fetch and parse helpers."""
|
||||
self.api = WebspeiseplanAPI(self.BASE_URL) if api is None else api
|
||||
self.parser = WebspeiseplanParser() if parser is None else parser
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def fetch(self) -> WebspeiseplanData:
|
||||
"""Download all data required by the Potsdam parser."""
|
||||
return self.api.fetch_all()
|
||||
|
||||
def parse(
|
||||
self,
|
||||
config: dict[str, Canteen],
|
||||
raw_data: WebspeiseplanData,
|
||||
) -> dict[str, CanteenXML]:
|
||||
"""Convert Potsdam Webspeiseplan data into canteen structures."""
|
||||
parsed: dict[str, CanteenXML] = {}
|
||||
for canteen_key, configured_canteen in config.items():
|
||||
source_name = configured_canteen.name
|
||||
if source_name not in raw_data.outlets:
|
||||
self.logger.warning("%s not found in keys", source_name)
|
||||
continue
|
||||
|
||||
outlet = dict(raw_data.outlets[source_name])
|
||||
menus = raw_data.menus[source_name]
|
||||
categories = raw_data.meal_categories[source_name]
|
||||
locations = raw_data.locations[source_name]
|
||||
outlet["isPublic"] = locations["isPublic"]
|
||||
|
||||
canteen = self.parser.parse_canteen_meta_times(outlet)
|
||||
for meal_data in self.parser.parse_meals(menus, categories):
|
||||
canteen.add_meal(**meal_data)
|
||||
parsed[canteen_key] = canteen
|
||||
return parsed
|
||||
@@ -0,0 +1,25 @@
|
||||
"""Registry for city/source parser implementations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from openmensa_parsers.parsers.base import OpenMensaParser
|
||||
from openmensa_parsers.parsers.potsdam import PotsdamParser
|
||||
|
||||
|
||||
PARSER_CLASSES: dict[str, type[OpenMensaParser]] = {
|
||||
PotsdamParser.id: PotsdamParser,
|
||||
}
|
||||
|
||||
|
||||
def get_parser_class(parser_id: str) -> type[OpenMensaParser]:
|
||||
try:
|
||||
return PARSER_CLASSES[parser_id]
|
||||
except KeyError as exc:
|
||||
configured = ", ".join(sorted(PARSER_CLASSES))
|
||||
raise KeyError(
|
||||
f"Unknown parser {parser_id!r}; configured parsers: {configured}"
|
||||
) from exc
|
||||
|
||||
|
||||
def create_parser(parser_id: str) -> OpenMensaParser:
|
||||
return get_parser_class(parser_id)()
|
||||
@@ -8,10 +8,12 @@ import cachetools as ct
|
||||
from flask import Flask, jsonify, make_response, url_for
|
||||
from flask.logging import create_logger
|
||||
|
||||
from stw_potsdam.config import read_canteen_config
|
||||
from stw_potsdam.xml_types.builder import Builder
|
||||
from openmensa_parsers.config import read_canteen_config
|
||||
from openmensa_parsers.parsers import create_parser
|
||||
from openmensa_parsers.xml_types.builder import Builder
|
||||
|
||||
CACHE_TIMEOUT = 45 * 60
|
||||
PARSER_ID = os.environ.get("OM_PARSER_ID", "potsdam")
|
||||
|
||||
# pragma pylint: disable=invalid-name
|
||||
|
||||
@@ -40,8 +42,8 @@ def canteen_not_found(canteen_name):
|
||||
|
||||
@ct.cached(cache=cache)
|
||||
def update_builder():
|
||||
log.debug("Downloading menu for SWP")
|
||||
return Builder(config)
|
||||
log.debug("Downloading menu using parser %s", PARSER_ID)
|
||||
return Builder(config, parser=create_parser(PARSER_ID))
|
||||
|
||||
|
||||
@app.route("/canteens/<canteen_name>")
|
||||
@@ -9,8 +9,8 @@ from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SWPWebspeiseplanData:
|
||||
"""Downloaded SWP Webspeiseplan data grouped by outlet name."""
|
||||
class WebspeiseplanData:
|
||||
"""Downloaded Webspeiseplan data grouped by outlet name."""
|
||||
|
||||
outlets: dict[str, dict]
|
||||
locations: dict[str, dict]
|
||||
@@ -18,21 +18,21 @@ class SWPWebspeiseplanData:
|
||||
meal_categories: dict[str, dict]
|
||||
|
||||
|
||||
class SWPWebspeiseplanAPI:
|
||||
"""This class is used download content from SWP_Webspeiseplan.
|
||||
class WebspeiseplanAPI:
|
||||
"""Client for Webspeiseplan installations."""
|
||||
|
||||
Returns:
|
||||
[type]: [description]
|
||||
"""
|
||||
|
||||
URL_BASE = "https://swp.webspeiseplan.de"
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, base_url: str):
|
||||
"""Initialize the web service client."""
|
||||
logging.basicConfig()
|
||||
self.base_url = base_url.rstrip("/")
|
||||
parsed_url = urllib.parse.urlparse(self.base_url)
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
raise ValueError(f"Invalid Webspeiseplan base URL: {base_url!r}")
|
||||
self.host = parsed_url.netloc
|
||||
|
||||
def fetch_all(self) -> SWPWebspeiseplanData:
|
||||
def fetch_all(self) -> WebspeiseplanData:
|
||||
"""Download all data required to render OpenMensa feeds."""
|
||||
proxy_token = self.parse_token()
|
||||
outlets = self.parse_outlets(proxy_token)
|
||||
@@ -51,7 +51,7 @@ class SWPWebspeiseplanAPI:
|
||||
menus[outlet["name"]] = menu
|
||||
meal_categories[outlet["name"]] = id2cat
|
||||
outlet_locations[outlet["name"]] = locations[location]
|
||||
return SWPWebspeiseplanData(
|
||||
return WebspeiseplanData(
|
||||
outlets=outlets,
|
||||
locations=outlet_locations,
|
||||
menus=menus,
|
||||
@@ -69,8 +69,8 @@ class SWPWebspeiseplanAPI:
|
||||
)
|
||||
req.add_header("Accept-Language", "en-US,en;q=0.9")
|
||||
req.add_header("Connection", "keep-alive")
|
||||
req.add_header("Host", "swp.webspeiseplan.de")
|
||||
req.add_header("Referer", "https://swp.webspeiseplan.de/InitialConfig")
|
||||
req.add_header("Host", self.host)
|
||||
req.add_header("Referer", f"{self.base_url}/InitialConfig")
|
||||
req.add_header(
|
||||
"Sec-Ch-Ua",
|
||||
'"Not/A)Brand";v="99", '
|
||||
@@ -100,8 +100,8 @@ class SWPWebspeiseplanAPI:
|
||||
[type]: [description]
|
||||
"""
|
||||
query = urllib.parse.urlencode(params)
|
||||
url = f"{SWPWebspeiseplanAPI.URL_BASE}/index.php?{query}"
|
||||
SWPWebspeiseplanAPI.logger.debug("__parse_model: %s", url)
|
||||
url = f"{self.base_url}/index.php?{query}"
|
||||
WebspeiseplanAPI.logger.debug("__parse_model: %s", url)
|
||||
req = urllib.request.Request(url)
|
||||
self.__spoof_req_headers(req)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
@@ -110,7 +110,7 @@ class SWPWebspeiseplanAPI:
|
||||
|
||||
def parse_token(self) -> str:
|
||||
"""Get the token from the proxy server."""
|
||||
req = urllib.request.Request(SWPWebspeiseplanAPI.URL_BASE)
|
||||
req = urllib.request.Request(self.base_url)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
txt = resp.read().decode("utf-8")
|
||||
match = re.findall(r"/main.[0-9a-f]+.js", txt)
|
||||
@@ -122,15 +122,16 @@ class SWPWebspeiseplanAPI:
|
||||
# JS chunks with cache-busting filenames
|
||||
match = "/index.js"
|
||||
|
||||
SWPWebspeiseplanAPI.logger.debug(
|
||||
WebspeiseplanAPI.logger.debug(
|
||||
"__parse_token: downloading script %s", match
|
||||
)
|
||||
req = urllib.request.Request(f"{SWPWebspeiseplanAPI.URL_BASE}{match}")
|
||||
script_url = urllib.parse.urljoin(f"{self.base_url}/", match)
|
||||
req = urllib.request.Request(script_url)
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
txt = resp.read().decode("utf-8")
|
||||
proxy_token =\
|
||||
re.findall(r"PROXY_TOKEN:\s*[\"']([0-9a-f]+)[\"']", txt)[0]
|
||||
SWPWebspeiseplanAPI.logger.debug(
|
||||
WebspeiseplanAPI.logger.debug(
|
||||
"__parse_token: PROXY_TOKEN %s", proxy_token
|
||||
)
|
||||
return proxy_token
|
||||
@@ -1,19 +1,19 @@
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, date
|
||||
from stw_potsdam.xml_types.canteen_xml import CanteenMeta, CanteenXML
|
||||
from stw_potsdam.xml_types.times_xml import CanteenOpenTimespec, TimesXML
|
||||
from stw_potsdam.xml_types.meal_xml import MealXML
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenMeta, CanteenXML
|
||||
from openmensa_parsers.xml_types.times_xml import CanteenOpenTimespec, TimesXML
|
||||
from openmensa_parsers.xml_types.meal_xml import MealXML
|
||||
|
||||
|
||||
EURO_PRICE_PATTERN = re.compile(r"(\d+(?:[,.]\d{1,2})?)\s*€")
|
||||
|
||||
|
||||
class SWPWebspeiseplanParser:
|
||||
"""Class method to parse SWP_Webspeiseplan."""
|
||||
class WebspeiseplanParser:
|
||||
"""Parser for Webspeiseplan menu and outlet data."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Init SWPWebspeiseplanParser object."""
|
||||
"""Init WebspeiseplanParser object."""
|
||||
logging.basicConfig()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
from xml.dom import minidom
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
from typing import Any
|
||||
from flask import url_for
|
||||
from openmensa_parsers.xml_types.openmensa_xml import OpenMensaXML
|
||||
from openmensa_parsers.config import Canteen
|
||||
from openmensa_parsers.parsers.base import OpenMensaParser
|
||||
from openmensa_parsers.parsers.potsdam import PotsdamParser
|
||||
|
||||
|
||||
@dataclass
|
||||
class Builder:
|
||||
"""A class method for creating a new OpenMensa Feed."""
|
||||
|
||||
VERSION = "2.0.1"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: dict[str, Canteen],
|
||||
source_data: Any | None = None,
|
||||
parser: OpenMensaParser | None = None,
|
||||
):
|
||||
"""Initialize the object for the OpenMensa Feed Doc XML."""
|
||||
logging.basicConfig()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._xml_data = {}
|
||||
self.parser = PotsdamParser() if parser is None else parser
|
||||
raw_data = self.parser.fetch() if source_data is None else source_data
|
||||
for cname, canteen in self.parser.parse(config, raw_data).items():
|
||||
feed = self.__create_feed(config[cname])
|
||||
canteen.add_feed(feed)
|
||||
self._xml_data[cname] = OpenMensaXML(self.VERSION, canteen)
|
||||
|
||||
def __create_feed(self, ntup: Canteen):
|
||||
return self.parser.create_feed(
|
||||
ntup,
|
||||
url_for(
|
||||
"canteen_xml_feed",
|
||||
canteen_name=ntup.key,
|
||||
_external=True,
|
||||
),
|
||||
)
|
||||
|
||||
def get_xml(self, canteen_name: str):
|
||||
"""Return a XML string representing the canteen.
|
||||
|
||||
Returns:
|
||||
[type]: [description]
|
||||
"""
|
||||
doc = minidom.Document()
|
||||
xml_element = self._xml_data[canteen_name].xml_element(doc)
|
||||
doc.appendChild(xml_element)
|
||||
return doc.toprettyxml(encoding="UTF-8")
|
||||
@@ -1,9 +1,9 @@
|
||||
from dataclasses import dataclass
|
||||
from xml.dom import minidom
|
||||
from datetime import date
|
||||
from stw_potsdam.xml_types.times_xml import TimesXML
|
||||
from stw_potsdam.xml_types.meal_xml import MealXML
|
||||
from stw_potsdam.xml_types.feed_xml import FeedXML
|
||||
from openmensa_parsers.xml_types.times_xml import TimesXML
|
||||
from openmensa_parsers.xml_types.meal_xml import MealXML
|
||||
from openmensa_parsers.xml_types.feed_xml import FeedXML
|
||||
|
||||
|
||||
@dataclass
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
from xml.dom import minidom
|
||||
from dataclasses import dataclass
|
||||
from stw_potsdam.xml_types.canteen_xml import CanteenXML
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
|
||||
|
||||
|
||||
@dataclass
|
||||
+4
-4
@@ -3,9 +3,9 @@ requires = ["setuptools>=69"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "om-parser-stw-potsdam-v2"
|
||||
name = "openmensa-parsers"
|
||||
version = "2.0.1"
|
||||
description = "OpenMensa parser components for Studentenwerk Potsdam."
|
||||
description = "OpenMensa parser components for multiple canteen data sources."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
|
||||
@@ -32,7 +32,7 @@ dev = [
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["stw_potsdam*"]
|
||||
include = ["openmensa_parsers*"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
stw_potsdam = ["canteens.ini"]
|
||||
openmensa_parsers = ["canteens.ini"]
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
from xml.dom import minidom
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
from flask import url_for
|
||||
from stw_potsdam.xml_types.openmensa_xml import OpenMensaXML
|
||||
from stw_potsdam.swp_webspeiseplan_api import (
|
||||
SWPWebspeiseplanAPI,
|
||||
SWPWebspeiseplanData,
|
||||
)
|
||||
from stw_potsdam.swp_webspeiseplan_parser import SWPWebspeiseplanParser
|
||||
from stw_potsdam.config import Canteen
|
||||
from stw_potsdam.xml_types.feed_xml import FeedXML, ScheduleXML
|
||||
|
||||
|
||||
@dataclass
|
||||
class Builder:
|
||||
"""A class method for creating a new OpenMensa Feed."""
|
||||
|
||||
VERSION = "2.0.1"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: dict[str, Canteen],
|
||||
swp_data: SWPWebspeiseplanData | None = None,
|
||||
):
|
||||
"""Initialize the object for the OpenMensa Feed Doc XML."""
|
||||
logging.basicConfig()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._xml_data = {}
|
||||
if swp_data is None:
|
||||
swp_data = SWPWebspeiseplanAPI().fetch_all()
|
||||
swp_parser = SWPWebspeiseplanParser()
|
||||
for cname, ntup in config.items():
|
||||
if ntup.name not in swp_data.outlets.keys():
|
||||
self.logger.warning("%s not found in keys", ntup.name)
|
||||
continue
|
||||
outlet = swp_data.outlets[ntup.name]
|
||||
menus = swp_data.menus[ntup.name]
|
||||
categories = swp_data.meal_categories[ntup.name]
|
||||
locations = swp_data.locations[ntup.name]
|
||||
outlet["isPublic"] = locations["isPublic"]
|
||||
canteen = swp_parser.parse_canteen_meta_times(outlet)
|
||||
meals = swp_parser.parse_meals(menus, categories)
|
||||
for kwargs in meals:
|
||||
canteen.add_meal(**kwargs)
|
||||
feed = self.__create_feed(ntup)
|
||||
canteen.add_feed(feed)
|
||||
self._xml_data[cname] = OpenMensaXML(self.VERSION, canteen)
|
||||
|
||||
def __create_feed(self, ntup: Canteen):
|
||||
schedule = ScheduleXML(
|
||||
hour="8-14",
|
||||
retry="30 1",
|
||||
)
|
||||
feed = FeedXML(
|
||||
name="full",
|
||||
priority=0,
|
||||
source=SWPWebspeiseplanAPI.URL_BASE,
|
||||
url=url_for(
|
||||
"canteen_xml_feed",
|
||||
canteen_name=ntup.key,
|
||||
_external=True,
|
||||
),
|
||||
schedule=schedule,
|
||||
)
|
||||
return feed
|
||||
|
||||
def get_xml(self, canteen_name: str):
|
||||
"""Return a XML string representing the canteen.
|
||||
|
||||
Returns:
|
||||
[type]: [description]
|
||||
"""
|
||||
doc = minidom.Document()
|
||||
xml_element = self._xml_data[canteen_name].xml_element(doc)
|
||||
doc.appendChild(xml_element)
|
||||
return doc.toprettyxml(encoding="UTF-8")
|
||||
@@ -5,7 +5,7 @@ import json
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from stw_potsdam.config import read_canteen_config
|
||||
from openmensa_parsers.config import read_canteen_config
|
||||
|
||||
|
||||
def _resource_path(filename):
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
from openmensa_parsers.config import Canteen
|
||||
from openmensa_parsers.parsers.base import BaseOpenMensaParser, FeedDefinition
|
||||
from openmensa_parsers.parsers.registry import create_parser, get_parser_class
|
||||
from openmensa_parsers.parsers.potsdam import PotsdamParser
|
||||
from openmensa_parsers.views import app
|
||||
from openmensa_parsers.xml_types.builder import Builder
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenMeta, CanteenXML
|
||||
from openmensa_parsers.xml_types.times_xml import CanteenOpenTimespec, TimesXML
|
||||
|
||||
|
||||
def _configured_canteen():
|
||||
return Canteen(
|
||||
key="demo",
|
||||
name="Demo Canteen",
|
||||
street="Demo Street",
|
||||
city="Demo City",
|
||||
id="1",
|
||||
chash="demo",
|
||||
)
|
||||
|
||||
|
||||
def _canteen_xml():
|
||||
meta = CanteenMeta(
|
||||
name="Demo Canteen",
|
||||
address="Demo Street, 12345 Demo City",
|
||||
city="Demo City",
|
||||
phone="+49 331 123456",
|
||||
email="demo@example.test",
|
||||
availability="public",
|
||||
)
|
||||
times = TimesXML({
|
||||
day: CanteenOpenTimespec("geschlossen")
|
||||
for day in TimesXML.VALID_DAYS
|
||||
})
|
||||
return CanteenXML(meta, times)
|
||||
|
||||
|
||||
class DemoParser(BaseOpenMensaParser):
|
||||
id = "demo"
|
||||
feed = FeedDefinition(source="https://example.test")
|
||||
|
||||
def __init__(self):
|
||||
self.fetched = False
|
||||
self.parsed_raw_data = None
|
||||
|
||||
def fetch(self):
|
||||
self.fetched = True
|
||||
return {"source": "fixture"}
|
||||
|
||||
def parse(self, _config, raw_data):
|
||||
self.parsed_raw_data = raw_data
|
||||
return {"demo": _canteen_xml()}
|
||||
|
||||
|
||||
def test_builder_uses_supplied_parser():
|
||||
parser = DemoParser()
|
||||
|
||||
with app.test_request_context():
|
||||
builder = Builder({"demo": _configured_canteen()}, parser=parser)
|
||||
|
||||
assert parser.fetched
|
||||
assert parser.parsed_raw_data == {"source": "fixture"}
|
||||
assert b"https://example.test" in builder.get_xml("demo")
|
||||
|
||||
|
||||
def test_builder_accepts_source_data_fixture():
|
||||
parser = DemoParser()
|
||||
|
||||
with app.test_request_context():
|
||||
Builder(
|
||||
{"demo": _configured_canteen()},
|
||||
source_data={"source": "test-fixture"},
|
||||
parser=parser,
|
||||
)
|
||||
|
||||
assert not parser.fetched
|
||||
assert parser.parsed_raw_data == {"source": "test-fixture"}
|
||||
|
||||
|
||||
def test_parser_registry_loads_default_parser():
|
||||
assert get_parser_class("potsdam") is PotsdamParser
|
||||
assert isinstance(create_parser("potsdam"), PotsdamParser)
|
||||
|
||||
|
||||
def test_parser_registry_rejects_unknown_parser():
|
||||
with pytest.raises(KeyError, match="Unknown parser"):
|
||||
create_parser("unknown")
|
||||
@@ -4,8 +4,8 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import pytest
|
||||
from stw_potsdam.config import read_canteen_config
|
||||
from stw_potsdam.views import canteen_xml_feed, app
|
||||
from openmensa_parsers.config import read_canteen_config
|
||||
from openmensa_parsers.views import canteen_xml_feed, app
|
||||
|
||||
# pragma pylint: disable=invalid-name,redefined-outer-name
|
||||
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from stw_potsdam.swp_webspeiseplan_api import SWPWebspeiseplanAPI
|
||||
|
||||
# pytest fixtures are linked via parameter names of test methods
|
||||
# pragma pylint: disable=unused-import,unused-argument,redefined-outer-name
|
||||
from tests.stub_api import api_offline
|
||||
|
||||
|
||||
def test_api_init_does_not_fetch(api_offline):
|
||||
"""Creating the API client does not perform network requests."""
|
||||
SWPWebspeiseplanAPI()
|
||||
+2
-2
@@ -1,7 +1,7 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
import pytest
|
||||
|
||||
from stw_potsdam import views
|
||||
from openmensa_parsers import views
|
||||
|
||||
# pytest fixtures are linked via parameter names of test methods
|
||||
# pragma pylint: disable=unused-import,redefined-outer-name,unused-argument
|
||||
@@ -21,7 +21,7 @@ def test_health_check(client):
|
||||
assert response.data == b"OK"
|
||||
|
||||
|
||||
def test_index(client):
|
||||
def test_index(client, api_online_one_shot):
|
||||
response = client.get("/").json
|
||||
canteen_url = response.get("griebnitzsee", None)
|
||||
assert canteen_url, "Known canteen in index response"
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
import httpretty
|
||||
import pytest
|
||||
|
||||
from openmensa_parsers.webspeiseplan_api import WebspeiseplanAPI
|
||||
|
||||
# pytest fixtures are linked via parameter names of test methods
|
||||
# pragma pylint: disable=unused-import,unused-argument,redefined-outer-name
|
||||
from tests.stub_api import api_offline
|
||||
|
||||
|
||||
def test_api_init_does_not_fetch(api_offline):
|
||||
"""Creating the API client does not perform network requests."""
|
||||
WebspeiseplanAPI("https://menus.example.test")
|
||||
|
||||
|
||||
def test_api_init_requires_valid_base_url():
|
||||
"""Creating the API client validates the base URL."""
|
||||
with pytest.raises(ValueError):
|
||||
WebspeiseplanAPI("menus.example.test")
|
||||
|
||||
|
||||
def test_parse_token_uses_configured_base_url():
|
||||
"""The API client uses the configured Webspeiseplan host."""
|
||||
httpretty.enable(allow_net_connect=False)
|
||||
try:
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
"https://menus.example.test",
|
||||
body='<script src="/main.abc123.js"></script>',
|
||||
)
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
"https://menus.example.test/main.abc123.js",
|
||||
body='PROXY_TOKEN: "0123456789abcdef"',
|
||||
)
|
||||
|
||||
token = WebspeiseplanAPI("https://menus.example.test/").parse_token()
|
||||
finally:
|
||||
httpretty.disable()
|
||||
httpretty.reset()
|
||||
|
||||
assert token == "0123456789abcdef"
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from stw_potsdam.swp_webspeiseplan_parser import SWPWebspeiseplanParser
|
||||
from openmensa_parsers.webspeiseplan_parser import WebspeiseplanParser
|
||||
|
||||
|
||||
def _menu_item(name):
|
||||
@@ -25,7 +25,7 @@ def _menu_item(name):
|
||||
|
||||
|
||||
def _parse_meal(name):
|
||||
parser = SWPWebspeiseplanParser()
|
||||
parser = WebspeiseplanParser()
|
||||
meals = parser.parse_meals(_menu_item(name), {1: {"name": "Salattheke"}})
|
||||
return meals[0]["meal"]
|
||||
|
||||
Reference in New Issue
Block a user