modular parser framework
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, date
|
||||
from openmensa_parsers.xml_types.canteen_xml import CanteenMeta, CanteenXML
|
||||
from openmensa_parsers.xml_types.times_xml import CanteenOpenTimespec, TimesXML
|
||||
from openmensa_parsers.xml_types.meal_xml import MealXML
|
||||
|
||||
|
||||
EURO_PRICE_PATTERN = re.compile(r"(\d+(?:[,.]\d{1,2})?)\s*€")
|
||||
|
||||
|
||||
class WebspeiseplanParser:
|
||||
"""Parser for Webspeiseplan menu and outlet data."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Init WebspeiseplanParser object."""
|
||||
logging.basicConfig()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def parse_canteen_meta_times(self, outlet: dict):
|
||||
"""Parse the outlet data from outlet.
|
||||
|
||||
Args:
|
||||
outlet (dict): [description]
|
||||
"""
|
||||
self.logger.debug("parse_canteen_meta_times(): %s", outlet["name"])
|
||||
addr_info = outlet["addressInfo"]
|
||||
meta = {
|
||||
"name": outlet["name"],
|
||||
"address": f'{addr_info["street"]}, {addr_info["postalCode"]} '
|
||||
+ f'{addr_info["city"]}',
|
||||
"city": addr_info["city"],
|
||||
"phone": outlet["contactInfo"][0]["phone"],
|
||||
"email": outlet["contactInfo"][0]["email"],
|
||||
"availability": outlet["isPublic"]
|
||||
}
|
||||
|
||||
if outlet["positionInfo"]:
|
||||
meta["location"] = (
|
||||
outlet["positionInfo"]["longitude"],
|
||||
outlet["positionInfo"]["latitude"],
|
||||
)
|
||||
canteen_meta = CanteenMeta(**meta)
|
||||
weekday_dict = {
|
||||
# this approach only lists the first (valid) opening time,
|
||||
# since OpenMensa does not support multiple time specs
|
||||
# (yet).
|
||||
"monday": outlet['moZeit1'] or outlet['moZeit2'],
|
||||
"tuesday": outlet['diZeit1'] or outlet['diZeit2'],
|
||||
"wednesday": outlet['miZeit1'] or outlet['miZeit2'],
|
||||
"thursday": outlet['doZeit1'] or outlet['doZeit2'],
|
||||
"friday": outlet['frZeit1'] or outlet['frZeit2'],
|
||||
"saturday": outlet['saZeit1'] or outlet['saZeit2'],
|
||||
"sunday": outlet['soZeit1'] or outlet['soZeit2'],
|
||||
}
|
||||
|
||||
canteen_times = TimesXML({
|
||||
k: CanteenOpenTimespec(v) for k, v in weekday_dict.items()
|
||||
})
|
||||
canteen = CanteenXML(canteen_meta, canteen_times)
|
||||
return canteen
|
||||
|
||||
def _parse_price(self, value):
|
||||
if value in (None, "", {}):
|
||||
return None
|
||||
return float(str(value).replace(",", "."))
|
||||
|
||||
def _parse_embedded_prices(
|
||||
self, name: str, price: dict[str, float | None]
|
||||
) -> tuple[str, dict[str, float | None]]:
|
||||
if any(price.values()):
|
||||
return name, price
|
||||
|
||||
matches = EURO_PRICE_PATTERN.findall(name)
|
||||
if len(matches) < 2:
|
||||
return name, price
|
||||
|
||||
parsed = [self._parse_price(match) for match in matches]
|
||||
if len(parsed) >= 3:
|
||||
price = {
|
||||
"student": parsed[0],
|
||||
"employee": parsed[1],
|
||||
"other": parsed[2],
|
||||
}
|
||||
elif "Stud" in name and ("Gäste" in name or "Gaeste" in name):
|
||||
price = {
|
||||
"student": parsed[0],
|
||||
"employee": price["employee"],
|
||||
"other": parsed[1],
|
||||
}
|
||||
else:
|
||||
return name, price
|
||||
|
||||
name = EURO_PRICE_PATTERN.sub("", name)
|
||||
name = re.sub(r"\s*/\s*", " ", name)
|
||||
name = re.sub(r"\s+", " ", name).strip()
|
||||
return name, price
|
||||
|
||||
def parse_meals(
|
||||
self, menu_data, meal_categories
|
||||
) -> list[tuple[date, str, MealXML]]:
|
||||
"""Parse the menu and adds it to the builder."""
|
||||
meals = []
|
||||
for menu in menu_data:
|
||||
for meal_data in menu["speiseplanGerichtData"]:
|
||||
info = meal_data["speiseplanAdvancedGericht"]
|
||||
additional_info = meal_data["zusatzinformationen"]
|
||||
price = {
|
||||
"student": self._parse_price(
|
||||
additional_info["mitarbeiterpreisDecimal2"]
|
||||
),
|
||||
"employee": self._parse_price(
|
||||
additional_info["price3Decimal2"]
|
||||
),
|
||||
"other": self._parse_price(
|
||||
additional_info["gaestepreisDecimal2"]
|
||||
),
|
||||
}
|
||||
name, price = self._parse_embedded_prices(
|
||||
info["gerichtname"], price
|
||||
)
|
||||
meal = MealXML(name=name, price=price)
|
||||
day = datetime.fromisoformat(info["datum"]).date()
|
||||
category = meal_categories[info["gerichtkategorieID"]]["name"]
|
||||
meals.append(
|
||||
{"day": day, "category": category, "meal": meal}
|
||||
)
|
||||
self.logger.debug("parse_meals(): %s meals parsed", len(meals))
|
||||
return meals
|
||||
Reference in New Issue
Block a user