Skip to content
Snippets Groups Projects
Commit f53ef27d authored by root's avatar root
Browse files

added the parser itself

parent e8c745d8
No related tags found
No related merge requests found
import urllib,json,io
from htmldom import htmldom
response = urllib.urlopen("http://www.stwdo.de/gastronomie/speiseplaene/hauptmensa/wochenansicht-hauptmensa/")
dom = htmldom.HtmlDom()
dom = dom.createDom(response.read())
response.close()
def parseDay( dom, dayName ):
gerichte = []
date = dom.find('a[href="#'+dayName+'"]').first().text()[-10:]
day = dom.find("div#"+dayName).first().find("tbody").find("tr")
for tr in range(day.length()):
gericht = day[tr].find("td")[0].text()
art = day[tr].find("td")[1].text()
kategorie = day[tr].find("td")[2].text()
gerichte.append({'gericht': gericht,"art": art,"kategorie":kategorie})
jday = {'date': date,"gerichte":gerichte}
return jday
result = {
'montag': parseDay(dom,"montag"),
'dienstag': parseDay(dom,"dienstag"),
'mittwoch': parseDay(dom,"mittwoch"),
'donnerstag': parseDay(dom,"donnerstag"),
'freitag': parseDay(dom,"freitag")
}
with io.open('mensaPlan.json', 'w', encoding='utf-8') as f:
f.write(unicode(json.dumps(result))) #fixUnicodeProblem
{"timestamp": "BLA"}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment