From f53ef27d349a40a8a3e92eb1780c49792e5913a7 Mon Sep 17 00:00:00 2001 From: root <root@s17944416.onlinehome-server.info> Date: Fri, 3 Jul 2015 00:01:05 +0200 Subject: [PATCH] added the parser itself --- panels/mensaPlan/mensaParser.py | 29 +++++++++++++++++++++++++++++ panels/mensaPlan/mensaPlant.json | 1 - 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100755 panels/mensaPlan/mensaParser.py delete mode 100755 panels/mensaPlan/mensaPlant.json diff --git a/panels/mensaPlan/mensaParser.py b/panels/mensaPlan/mensaParser.py new file mode 100755 index 0000000..c39f804 --- /dev/null +++ b/panels/mensaPlan/mensaParser.py @@ -0,0 +1,29 @@ +import urllib,json,io +from htmldom import htmldom +response = urllib.urlopen("http://www.stwdo.de/gastronomie/speiseplaene/hauptmensa/wochenansicht-hauptmensa/") +dom = htmldom.HtmlDom() +dom = dom.createDom(response.read()) +response.close() + +def parseDay( dom, dayName ): + gerichte = [] + date = dom.find('a[href="#'+dayName+'"]').first().text()[-10:] + day = dom.find("div#"+dayName).first().find("tbody").find("tr") + for tr in range(day.length()): + gericht = day[tr].find("td")[0].text() + art = day[tr].find("td")[1].text() + kategorie = day[tr].find("td")[2].text() + gerichte.append({'gericht': gericht,"art": art,"kategorie":kategorie}) + jday = {'date': date,"gerichte":gerichte} + return jday + +result = { + 'montag': parseDay(dom,"montag"), + 'dienstag': parseDay(dom,"dienstag"), + 'mittwoch': parseDay(dom,"mittwoch"), + 'donnerstag': parseDay(dom,"donnerstag"), + 'freitag': parseDay(dom,"freitag") +} + +with io.open('mensaPlan.json', 'w', encoding='utf-8') as f: + f.write(unicode(json.dumps(result))) #fixUnicodeProblem diff --git a/panels/mensaPlan/mensaPlant.json b/panels/mensaPlan/mensaPlant.json deleted file mode 100755 index 7b12a1b..0000000 --- a/panels/mensaPlan/mensaPlant.json +++ /dev/null @@ -1 +0,0 @@ -{"timestamp": "BLA"} \ No newline at end of file -- GitLab