Skip to content
Snippets Groups Projects
Commit b70f6654 authored by Rico van Endern's avatar Rico van Endern
Browse files

+config -oldParser +JsonStuff +minimalSQL

parent da7a3f21
No related branches found
No related tags found
No related merge requests found
<?php
// Universal
$MENSA_URL = "http://www.stwdo.de/gastronomie/speiseplaene/hauptmensa/wochenansicht-hauptmensa/";
$PARSE_DAYLIST = array (
"montag",
"dienstag",
"mittwoch",
"donnerstag",
"freitag"
);
$DO_SQL = true;
$DO_JSON = true;
// SQL
$SQL_SERVER = "ovanier.de";
$SQL_USER = "info_writer";
$SQL_PW = "XAHQTZeGbqsnt8K6";
$SQL_DB = "infoscreen";
// JSON
$JSON_INC_ORIGINAL = false;
$JSON_INC_SHORT = true;
$JSON_INC_ART = true;
$JSON_INC_KIND = false;
$JSON_INC_IMG = true;
$JSON_INC_STOFFE = false;
$JSON_INC_DATE = true;
?>
\ No newline at end of file
<?php <?php
class GERICHT { class GERICHT {
private $shortText = "";
private $originalText = ""; private $originalText = "";
private $shortText = "";
private $rind = false; private $rind = false;
private $schwein = false; private $schwein = false;
private $gefluegel = false; private $gefluegel = false;
...@@ -47,12 +47,12 @@ class GERICHT { ...@@ -47,12 +47,12 @@ class GERICHT {
0 0
); );
private $date = ""; private $date = "";
public function getShortText() {
return $this->shortText;
}
public function getOriginalText() { public function getOriginalText() {
return $this->originalText; return $this->originalText;
} }
public function getShortText() {
return $this->shortText;
}
public function isRind() { public function isRind() {
return $this->rind; return $this->rind;
} }
...@@ -152,12 +152,12 @@ class GERICHT { ...@@ -152,12 +152,12 @@ class GERICHT {
public function getDate() { public function getDate() {
return $this->date; return $this->date;
} }
public function setShortText($shortText) {
$this->shortText = $shortText;
}
public function setOriginalText($originalText) { public function setOriginalText($originalText) {
$this->originalText = $originalText; $this->originalText = $originalText;
} }
public function setShortText($shortText) {
$this->shortText = $shortText;
}
public function setRind($rind) { public function setRind($rind) {
$this->rind = $rind; $this->rind = $rind;
} }
...@@ -269,8 +269,59 @@ class GERICHT { ...@@ -269,8 +269,59 @@ class GERICHT {
public function toggleZusatzstoff($in) { public function toggleZusatzstoff($in) {
$this->zusatzStoffe [$in] = ! $this->zusatzStoffe [$in]; $this->zusatzStoffe [$in] = ! $this->zusatzStoffe [$in];
} }
public function toJson() { public function toJson($inc_original = true, $inc_short = true, $inc_art = true, $inc_kind = true, $inc_img = true, $inc_stoffe = true, $inc_date = true) {
return '{' . '"shortText": "' . $this->shortText . '",' . '"originalText": "' . $this->originalText . '",' . '"rind":' . $this->rind . ',' . '"schwein":' . $this->schwein . ',' . '"gefluegel":' . $this->gefluegel . ',' . '"fisch":' . $this->fisch . ',' . '"vegetarisch":' . $this->vegetarisch . ',' . '"vegan":' . $this->vegan . ',' . '"kinderteller":' . $this->kinderteller . ',' . '"image": "' . $this->image . '",' . '"zusatzStoffe": {' . '"0":' . $this->zusatzStoffe [0] . ',' . '"1":' . $this->zusatzStoffe [1] . ',' . '"2":' . $this->zusatzStoffe [2] . ',' . '"3":' . $this->zusatzStoffe [3] . ',' . '"4":' . $this->zusatzStoffe [4] . ',' . '"5":' . $this->zusatzStoffe [5] . ',' . '"6":' . $this->zusatzStoffe [6] . ',' . '"7":' . $this->zusatzStoffe [7] . ',' . '"8":' . $this->zusatzStoffe [8] . ',' . '"9":' . $this->zusatzStoffe [9] . ',' . '"10":' . $this->zusatzStoffe [10] . ',' . '"11":' . $this->zusatzStoffe [11] . ',' . '"12":' . $this->zusatzStoffe [12] . ',' . '"13":' . $this->zusatzStoffe [13] . ',' . '"14":' . $this->zusatzStoffe [14] . ',' . '"15":' . $this->zusatzStoffe [15] . ',' . '"16":' . $this->zusatzStoffe [16] . ',' . '"17":' . $this->zusatzStoffe [17] . ',' . '"18":' . $this->zusatzStoffe [18] . ',' . '"19":' . $this->zusatzStoffe [19] . ',' . '"20":' . $this->zusatzStoffe [20] . ',' . '"21":' . $this->zusatzStoffe [21] . ',' . '"22":' . $this->zusatzStoffe [22] . ',' . '"23":' . $this->zusatzStoffe [23] . ',' . '"24":' . $this->zusatzStoffe [24] . ',' . '"25":' . $this->zusatzStoffe [25] . ',' . '"26":' . $this->zusatzStoffe [26] . ',' . '"27":' . $this->zusatzStoffe [27] . ',' . '"28":' . $this->zusatzStoffe [28] . ',' . '"29":' . $this->zusatzStoffe [29] . ',' . '"30":' . $this->zusatzStoffe [30] . ',' . '"31":' . $this->zusatzStoffe [31] . ',' . '"32":' . $this->zusatzStoffe [32] . ',' . '"33":' . $this->zusatzStoffe [33] . '}, ' . '"date": "' . $this->date . '"' . '}'; $json = "{";
if ($inc_original) {
$json .= '"originalText": "' . $this->originalText;
}
if ($inc_original && ($inc_short || $inc_art || $inc_kind || $inc_img || $inc_stoffe || $inc_date)) {
$json .= '",';
}
if ($inc_short) {
$json .= '"shortText": "' . $this->shortText;
}
if ($inc_short && ($inc_art || $inc_kind || $inc_img || $inc_stoffe || $inc_date)) {
$json .= '",';
}
if ($inc_art) {
$json .= '"rind":' . $this->rind;
$json .= '",';
$json .= '"schwein":' . $this->schwein;
$json .= '",';
$json .= '"gefluegel":' . $this->gefluegel;
$json .= '",';
$json .= '"fisch":' . $this->fisch;
$json .= '",';
$json .= '"vegetarisch":' . $this->vegetarisch;
$json .= '",';
$json .= '"vegan":' . $this->vegan;
}
if ($inc_art && ($inc_kind || $inc_img || $inc_stoffe || $inc_date)) {
$json .= '",';
}
if ($inc_kind) {
$json .= '"kinderteller":' . $this->kinderteller;
}
if ($inc_kind && ($inc_img || $inc_stoffe || $inc_date)) {
$json .= '",';
}
if ($inc_img) {
$json .= '"image": "' . $this->image;
}
if ($inc_img && ($inc_stoffe || $inc_date)) {
$json .= '",';
}
if ($inc_stoffe) {
$json .= '"zusatzStoffe": {' . '"0":' . $this->zusatzStoffe [0] . ',' . '"1":' . $this->zusatzStoffe [1] . ',' . '"2":' . $this->zusatzStoffe [2] . ',' . '"3":' . $this->zusatzStoffe [3] . ',' . '"4":' . $this->zusatzStoffe [4] . ',' . '"5":' . $this->zusatzStoffe [5] . ',' . '"6":' . $this->zusatzStoffe [6] . ',' . '"7":' . $this->zusatzStoffe [7] . ',' . '"8":' . $this->zusatzStoffe [8] . ',' . '"9":' . $this->zusatzStoffe [9] . ',' . '"10":' . $this->zusatzStoffe [10] . ',' . '"11":' . $this->zusatzStoffe [11] . ',' . '"12":' . $this->zusatzStoffe [12] . ',' . '"13":' . $this->zusatzStoffe [13] . ',' . '"14":' . $this->zusatzStoffe [14] . ',' . '"15":' . $this->zusatzStoffe [15] . ',' . '"16":' . $this->zusatzStoffe [16] . ',' . '"17":' . $this->zusatzStoffe [17] . ',' . '"18":' . $this->zusatzStoffe [18] . ',' . '"19":' . $this->zusatzStoffe [19] . ',' . '"20":' . $this->zusatzStoffe [20] . ',' . '"21":' . $this->zusatzStoffe [21] . ',' . '"22":' . $this->zusatzStoffe [22] . ',' . '"23":' . $this->zusatzStoffe [23] . ',' . '"24":' . $this->zusatzStoffe [24] . ',' . '"25":' . $this->zusatzStoffe [25] . ',' . '"26":' . $this->zusatzStoffe [26] . ',' . '"27":' . $this->zusatzStoffe [27] . ',' . '"28":' . $this->zusatzStoffe [28] . ',' . '"29":' . $this->zusatzStoffe [29] . ',' . '"30":' . $this->zusatzStoffe [30] . ',' . '"31":' . $this->zusatzStoffe [31] . ',' . '"32":' . $this->zusatzStoffe [32] . ',' . '"33":' . $this->zusatzStoffe [33] . '}';
}
if ($inc_stoffe && ($inc_date)) {
$json .= ',';
}
if ($inc_date) {
$json .= '"date": "' . $this->date . '"';
}
$json .= '}';
return $json;
} }
public function __toString() { public function __toString() {
for($i = 0; $i < count ( $this->zusatzStoffe ); $i ++) { for($i = 0; $i < count ( $this->zusatzStoffe ); $i ++) {
...@@ -279,7 +330,7 @@ class GERICHT { ...@@ -279,7 +330,7 @@ class GERICHT {
} }
} }
return "Short Text: " . $this->shortText . ",\n" . "Original Text: " . $this->originalText . ",\n" . "Rind: " . booleanToString ( $this->rind ) . ",\n" . "Schwein: " . booleanToString ( $this->schwein ) . ",\n" . "Fisch: " . booleanToString ( $this->fisch ) . ",\n" . "Vegetarisch: " . booleanToString ( $this->vegetarisch ) . ",\n" . "Vegan: " . booleanToString ( $this->vegan ) . ",\n" . "Kinderteller: " . booleanToString ( $this->kinderteller ) . ",\n" . "Image: " . $this->image . ",\n" . "Zusatzstoffe: " . $stoffe . ",\n"; return "Original Text: " . $this->originalText . ",\n" . "Short Text: " . $this->shortText . ",\n" . "Rind: " . booleanToString ( $this->rind ) . ",\n" . "Schwein: " . booleanToString ( $this->schwein ) . ",\n" . "Fisch: " . booleanToString ( $this->fisch ) . ",\n" . "Vegetarisch: " . booleanToString ( $this->vegetarisch ) . ",\n" . "Vegan: " . booleanToString ( $this->vegan ) . ",\n" . "Kinderteller: " . booleanToString ( $this->kinderteller ) . ",\n" . "Image: " . $this->image . ",\n" . "Zusatzstoffe: " . $stoffe . ",\n";
} }
} }
?> ?>
\ No newline at end of file
<?php <?php
$mysqli = new mysqli("ovanier.de", "info_reader", "7ELZqMyUwU8MaJba", "infoscreen"); $mysqli = new mysqli ( "ovanier.de", "info_reader", "7ELZqMyUwU8MaJba", "infoscreen" );
if ($mysqli->connect_errno) { if ($mysqli->connect_errno) {
printf("Connect failed: %s\n", $mysqli->connect_error); printf ( "Connect failed: %s\n", $mysqli->connect_error );
exit(); exit ();
} }
if ($result = $mysqli->query("SELECT * FROM City")) { if ($result = $mysqli->query ( "SELECT * FROM City" )) {
$result->close(); $result->close ();
} }
$mysqli->close(); $mysqli->close ();
?> ?>
\ No newline at end of file
...@@ -6,7 +6,7 @@ function booleanToString($bool) { ...@@ -6,7 +6,7 @@ function booleanToString($bool) {
return "False"; return "False";
} }
} }
function toJson($tage) { function toJson($tage, $inc_original, $inc_short, $inc_art, $inc_kind, $inc_stoffe, $inc_img, $inc_date) {
$kommata = false; $kommata = false;
$json = '{'; $json = '{';
foreach ( $tage as $tag ) { foreach ( $tage as $tag ) {
...@@ -15,10 +15,29 @@ function toJson($tage) { ...@@ -15,10 +15,29 @@ function toJson($tage) {
} else { } else {
$kommata = true; $kommata = true;
} }
$json .= $tag->toJson (); $json .= $tag->toJson ( $inc_original, $inc_short, $inc_art, $inc_kind, $inc_img, $inc_stoffe, $inc_date );
} }
$json .= "}"; $json .= "}";
file_put_contents("mensa.json", $json); file_put_contents ( "mensa.json", $json );
}
function toSql($tage, $server, $user, $pw, $db) {
$mysqli = new mysqli ( $server, $user, $pw, $db );
if ($mysqli->connect_errno) {
printf ( "Connect failed: %s\n", $mysqli->connect_error );
exit ();
}
$stmt = mysqli_prepare ( $mysqli, "INSERT INTO gerichte (originalText,shortText,rind,schwein,gefluegel,fisch,vegetarisch,vegan,kinderteller,image,date) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE originalText = ?" );
foreach ( $tage as $tag ) {
foreach ( $tag->gerichte as $gericht ) {
echo mysqli_stmt_bind_param ( $stmt, "ssiiiiiiisis", $gericht->getOriginalText (), $gericht->getShortText (), $gericht->isRind (), $gericht->isSchwein (), $gericht->isGefluegel (), $gericht->isFisch (), $gericht->isVegetarisch (), $gericht->isVegan (), $gericht->isKinderteller (), $gericht->getImage (), $gericht->getDate (), $gericht->getOriginalText () );
$stmt->execute (); // TODO
}
}
$stmt->close ();
$mysqli->close ();
} }
function parsDay($html, $day) { function parsDay($html, $day) {
$tag = new TAG ( $day, substr ( $html->find ( 'a[href="#' . $day . '"]', 0 )->innertext, - 10, 10 ) ); $tag = new TAG ( $day, substr ( $html->find ( 'a[href="#' . $day . '"]', 0 )->innertext, - 10, 10 ) );
......
This diff is collapsed.
<?php <?php
include 'simple_html_dom.php'; require 'config.php';
include 'helper.php'; require 'simple_html_dom.php';
include 'gericht.php'; require 'helper.php';
include 'tag.php'; require 'gericht.php';
require 'tag.php';
// Parse // Parse
$html = file_get_html ( 'http://www.stwdo.de/gastronomie/speiseplaene/hauptmensa/wochenansicht-hauptmensa/' ); $html = file_get_html ( $MENSA_URL );
$montag = parsDay ( $html, "montag" ); $tage = array ();
$dienstag = parsDay ( $html, "dienstag" ); foreach ( $PARSE_DAYLIST as $DAYNAME ) {
$mittwoch = parsDay ( $html, "mittwoch" ); array_push ( $tage, parsDay ( $html, $DAYNAME ) );
$donnerstag = parsDay ( $html, "donnerstag" );
$freitag = parsDay ( $html, "freitag" );
$tage = array (
$montag,
$dienstag,
$mittwoch,
$donnerstag,
$freitag
);
// SAVE
$mysqli = new mysqli ( "ovanier.de", "info_writer", "XAHQTZeGbqsnt8K6", "infoscreen" );
if ($mysqli->connect_errno) {
printf ( "Connect failed: %s\n", $mysqli->connect_error );
exit ();
} }
$stmt = mysqli_prepare ( $mysqli, "INSERT INTO gerichte (originalText,shortText,rind,schwein,gefluegel,fisch,vegetarisch,vegan,kinderteller,image,date) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" ); // CREATE JSON
foreach ( $tage as $tag ) { if ($DO_JSON) {
foreach ( $tag->gerichte as $gericht ) { toJson ( $tage, $JSON_INC_ORIGINAL, $JSON_INC_SHORT, $JSON_INC_ART, $JSON_INC_KIND, $JSON_INC_STOFFE, $JSON_INC_IMG, $JSON_INC_DATE );
mysqli_stmt_bind_param ( $stmt, "ssiiiiiiisi", $gericht->getOriginalText (), $gericht->getShortText (), $gericht->isRind (), $gericht->isSchwein (), $gericht->isGefluegel (), $gericht->isFisch (), $gericht->isVegetarisch (), $gericht->isVegan (), $gericht->isKinderteller (), $gericht->getImage (), $gericht->getDate () );
$stmt->execute ();
}
} }
$stmt->close ();
$mysqli->close (); // SAVE
if ($DO_SQL) {
// CREATE JSON toSql ( $tage, $SQL_SERVER, $SQL_USER, $SQL_PW, $SQL_DB );
toJson ( $tage ); }
?> ?>
\ No newline at end of file
import urllib,json,io,re
from htmldom import htmldom
response = urllib.urlopen("http://www.stwdo.de/gastronomie/speiseplaene/hauptmensa/wochenansicht-hauptmensa/")
dom = htmldom.HtmlDom()
dom = dom.createDom(response.read())
response.close()
def parseDay( dom, dayName ):
gerichte = []
date = dom.find('a[href="#'+dayName+'"]').first().text()[-10:]
day = dom.find("div#"+dayName).first().find("tbody").find("tr")
for tr in range(day.length()):
tds = day[tr].find("td");
gericht = tds[0].text()
#-zusatzstoffe
gericht = re.sub(r'\([\d+,]+\)', '', gericht)
#+verkuerzung
gericht = re.sub(r' dazu \d Beilagen nach Wahl', '', gericht)
gericht = re.sub(r',\sdazu\s', ' + ', gericht)
gericht = re.sub(r'\sund\s', ' & ', gericht)
#eyecandy
gericht = re.sub(r'\s,', ',', gericht)
gericht = re.sub(r',\S', ', ', gericht)
gericht = re.sub(r'\s$', '', gericht)
gericht = re.sub(r',$', '', gericht)
art = tds[1].text()
art = re.sub(r',A', '', art)
art = re.sub(r',K', '', art)
kategorie = "icon-aktionsteller.png"
#kategorie = tds[2].find("img").first().attr( "src" )
#kategorie = re.sub(r'fileadmin/images/speiseplaene/menuekategorie/', '', kategorie)
gerichte.append({'gericht': gericht,"art": art,"kategorie":kategorie})
jday = {'date': date,"gerichte":gerichte}
return jday
result = {
'montag': parseDay(dom,"montag"),
'dienstag': parseDay(dom,"dienstag"),
'mittwoch': parseDay(dom,"mittwoch"),
'donnerstag': parseDay(dom,"donnerstag"),
'freitag': parseDay(dom,"freitag")
}
with io.open('mensaPlan.json', 'w', encoding='utf-8') as f:
f.write(unicode(json.dumps(result)))
#multimensa
#style
\ No newline at end of file
CREATE TABLE IF NOT EXISTS `gerichte` (
`id` int(255) NOT NULL,
`originalText` text NOT NULL,
`shortText` text NOT NULL,
`rind` tinyint(1) NOT NULL DEFAULT '0',
`schwein` tinyint(1) NOT NULL DEFAULT '0',
`gefluegel` tinyint(1) NOT NULL DEFAULT '0',
`fisch` tinyint(1) NOT NULL DEFAULT '0',
`vegetarisch` tinyint(1) NOT NULL DEFAULT '0',
`vegan` tinyint(1) NOT NULL DEFAULT '0',
`kinderteller` tinyint(1) NOT NULL DEFAULT '0',
`image` text,
`date` varchar(10) NOT NULL PRIMARY KEY UNIQUE KEY
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
\ No newline at end of file
...@@ -31,7 +31,7 @@ class TAG { ...@@ -31,7 +31,7 @@ class TAG {
breakM; breakM;
} }
} }
public function toJson() { public function toJson($inc_original = true, $inc_short = true, $inc_art = true, $inc_kind = true, $inc_img = true, $inc_stoffe = true, $inc_date = true) {
$kommata = false; $kommata = false;
$json = '"' . $this->name . '":['; $json = '"' . $this->name . '":[';
foreach ( $this->gerichte as $gericht ) { foreach ( $this->gerichte as $gericht ) {
...@@ -40,7 +40,7 @@ class TAG { ...@@ -40,7 +40,7 @@ class TAG {
} else { } else {
$kommata = true; $kommata = true;
} }
$json .= $gericht->toJson (); $json .= $gericht->toJson ( $inc_original, $inc_short, $inc_art, $inc_kind, $inc_img, $inc_stoffe, $inc_dat );
} }
$json .= ']'; $json .= ']';
return $json; return $json;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment