modular parser framework

This commit is contained in:
Hadrian Burkhardt
2026-05-21 08:21:49 +00:00
parent 1223791074
commit cf5348a0c8
35 changed files with 452 additions and 166 deletions
+5
View File
@@ -0,0 +1,5 @@
"""Parser/provider implementations for OpenMensa feed sources."""
from openmensa_parsers.parsers.registry import create_parser, get_parser_class
__all__ = ["create_parser", "get_parser_class"]
+61
View File
@@ -0,0 +1,61 @@
"""Shared parser contract for city-specific OpenMensa parsers."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Protocol
from openmensa_parsers.config import Canteen
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
from openmensa_parsers.xml_types.feed_xml import FeedXML, ScheduleXML
@dataclass(frozen=True)
class FeedDefinition:
"""Default feed metadata used when publishing a parser result."""
source: str
name: str = "full"
priority: int = 0
schedule: dict[str, Any] = field(
default_factory=lambda: {"hour": "8-14", "retry": "30 1"}
)
class OpenMensaParser(Protocol):
"""Contract implemented by each city/source parser."""
id: str
feed: FeedDefinition
def fetch(self) -> Any:
"""Download or load source-specific raw data."""
def parse(
self,
config: dict[str, Canteen],
raw_data: Any,
) -> dict[str, CanteenXML]:
"""Convert raw source data into OpenMensa canteen structures."""
def create_feed(self, canteen: Canteen, url: str) -> FeedXML:
"""Build the OpenMensa feed metadata for one configured canteen."""
class BaseOpenMensaParser: # pylint: disable=too-few-public-methods
"""Base helper for parsers that use the standard OpenMensa feed block."""
id = "base"
feed: FeedDefinition
def create_feed(self, _canteen: Canteen, url: str) -> FeedXML:
"""Create a standard feed tag for a configured canteen."""
schedule_data = dict(self.feed.schedule)
schedule = ScheduleXML(**schedule_data)
return FeedXML(
name=self.feed.name,
priority=self.feed.priority,
source=self.feed.source,
url=url,
schedule=schedule,
)
+61
View File
@@ -0,0 +1,61 @@
"""Potsdam parser/provider implementation."""
from __future__ import annotations
import logging
from openmensa_parsers.config import Canteen
from openmensa_parsers.parsers.base import BaseOpenMensaParser, FeedDefinition
from openmensa_parsers.webspeiseplan_api import (
WebspeiseplanAPI,
WebspeiseplanData,
)
from openmensa_parsers.webspeiseplan_parser import WebspeiseplanParser
from openmensa_parsers.xml_types.canteen_xml import CanteenXML
class PotsdamParser(BaseOpenMensaParser):
"""Parser for Studentenwerk Potsdam's Webspeiseplan source."""
id = "potsdam"
BASE_URL = "https://swp.webspeiseplan.de"
feed = FeedDefinition(source=BASE_URL)
def __init__(
self,
api: WebspeiseplanAPI | None = None,
parser: WebspeiseplanParser | None = None,
) -> None:
"""Initialize the Potsdam parser with fetch and parse helpers."""
self.api = WebspeiseplanAPI(self.BASE_URL) if api is None else api
self.parser = WebspeiseplanParser() if parser is None else parser
self.logger = logging.getLogger(__name__)
def fetch(self) -> WebspeiseplanData:
"""Download all data required by the Potsdam parser."""
return self.api.fetch_all()
def parse(
self,
config: dict[str, Canteen],
raw_data: WebspeiseplanData,
) -> dict[str, CanteenXML]:
"""Convert Potsdam Webspeiseplan data into canteen structures."""
parsed: dict[str, CanteenXML] = {}
for canteen_key, configured_canteen in config.items():
source_name = configured_canteen.name
if source_name not in raw_data.outlets:
self.logger.warning("%s not found in keys", source_name)
continue
outlet = dict(raw_data.outlets[source_name])
menus = raw_data.menus[source_name]
categories = raw_data.meal_categories[source_name]
locations = raw_data.locations[source_name]
outlet["isPublic"] = locations["isPublic"]
canteen = self.parser.parse_canteen_meta_times(outlet)
for meal_data in self.parser.parse_meals(menus, categories):
canteen.add_meal(**meal_data)
parsed[canteen_key] = canteen
return parsed
+25
View File
@@ -0,0 +1,25 @@
"""Registry for city/source parser implementations."""
from __future__ import annotations
from openmensa_parsers.parsers.base import OpenMensaParser
from openmensa_parsers.parsers.potsdam import PotsdamParser
PARSER_CLASSES: dict[str, type[OpenMensaParser]] = {
PotsdamParser.id: PotsdamParser,
}
def get_parser_class(parser_id: str) -> type[OpenMensaParser]:
try:
return PARSER_CLASSES[parser_id]
except KeyError as exc:
configured = ", ".join(sorted(PARSER_CLASSES))
raise KeyError(
f"Unknown parser {parser_id!r}; configured parsers: {configured}"
) from exc
def create_parser(parser_id: str) -> OpenMensaParser:
return get_parser_class(parser_id)()