Select Git revision
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
parsing.ts 21.84 KiB
import matter from "gray-matter"; // Front matter yaml metadata parsing
import md from "markdown-it"; // Markdown parsing
import bracketed_spans_plugin from "markdown-it-bracketed-spans"; //attributed spans (`[blah]{.fin yes=1)}`)
import attributes from "markdown-it-attrs";
import { full as emoji_plugin } from "markdown-it-emoji"; // `:smile:` -> `😊`
import footnote_plugin from "markdown-it-footnote"; // for `[^1]` and friends
import abbreviation from "markdown-it-abbr";
import definitionList from "markdown-it-deflist";
import inserted from "markdown-it-ins";
import marked from "markdown-it-mark";
import subscript from "markdown-it-sub";
import superscript from "markdown-it-sup";
import container from "markdown-it-container";
import Token from "markdown-it/lib/token.mjs";
import { getMetadataProblems } from "./metadataChecks";
import {
renderResolutionToHtml,
renderTodoToHtml,
renderContainerToAlert,
} from "./rendering";
const markdownParser = md({
html: true,
xhtmlOut: true,
breaks: true,
linkify: true,
typographer: true,
quotes: "„“‚‘",
})
.use(attributes)
.use(emoji_plugin)
.use(footnote_plugin)
.use(abbreviation)
.use(definitionList)
.use(inserted)
.use(marked)
.use(subscript)
.use(superscript)
// Legacy
.use(bracketed_spans_plugin)
.use(container, "warning", {
render: renderContainerToAlert("alert-warning", "alert"),
})
.use(container, "Meinungsbild", {
render: renderContainerToAlert("alert-secondary", "Meinungsbild"),
})
.use(container, "GO-Antrag", {
render: renderContainerToAlert("alert-secondary", "GO-Antrag"),
})
// New
.use(container, "resolution")
.use(container, "todo");
markdownParser.linkify.set({ fuzzyEmail: false });
export interface ParseResults {
meta: FinishedTranscriptMeta;
html: string;
ast: Token[];
}
export function parseOld(
old: FinishedTranscriptMeta[],
): FinishedTranscriptMeta[] {
return old.map((t) => ({
// Fallback for some options:
...t,
spec_version: t.spec_version ?? -1, // PDFs
no_link: t.no_link ?? false,
// Reformat resolutions
resolutions: t.resolutions?.map((r) => {
let accepted = r.accepted;
if (typeof accepted !== "boolean") {
switch (r.result) {
case "Angenommen":
accepted = true;
break;
case "Abgelehnt":
accepted = false;
break;
case undefined:
break;
default:
throw new TypeError(
`'result' must either be 'Angenommen' or 'Abgelehnt' or you must specify 'accepted' manually: ${t.number}`,
);
}
}
return {
...r,
html: `<span>${markdownParser.utils.escapeHtml(r.text)}</span>`,
accepted,
};
}),
}));
}
function _endBlock(ast: Token[], start: number, level: number, close: string) {
let end = start;
for (end++; end < ast.length; end++) {
const token = ast[end];
if (token.type === close) return end;
if (token.level <= level)
throw new TypeError(`Left block without close: '${close}'`);
}
throw new TypeError(`Encountered open without close: '${close}'`);
}
function _popKey<T>(
map: Map<string, string>,
key: string,
ok: (val: string) => T,
onMissing: string,
extraCheck?: (val: string) => boolean,
): T;
function _popKey<T>(
map: Map<string, string>,
key: string,
ok: (val: string) => T,
onMissing?: undefined,
extraCheck?: (val: string) => boolean,
): T | undefined;
function _popKey(
map: Map<string, string>,
key: string,
ok: undefined,
onMissing: string,
extraCheck?: (val: string) => boolean,
): string;
function _popKey(
map: Map<string, string>,
key: string,
ok?: undefined,
onMissing?: undefined,
extraCheck?: (val: string) => boolean,
): string | undefined;
function _popKey<T>(
map: Map<string, string>,
key: string,
ok?: (val: string) => T,
onMissing?: string,
extraCheck?: (val: string) => boolean,
) {
const val = map.get(key)?.trim();
map.delete(key); // We delete the key, so we can test for unknown attributes.
if (
!val
|| typeof val !== "string"
|| val.length === 0
|| (extraCheck !== undefined && !extraCheck(val))
) {
if (typeof onMissing === "string") throw new TypeError(onMissing);
} else {
return ok === undefined ? val : ok(val);
}
}
function _assertEmpty(
array: Array<string> | Map<string, string> | Set<string>,
name: string,
) {
if (array instanceof Map || array instanceof Set) array = [...array.keys()];
if (array.length === 0) return;
throw new TypeError(
`The following tags cannot be used in ${name}s: ${array.join(", ")}`,
);
}
/**
* Parse single transcript/meeting minutes from markdown
* @param markdown Raw markdown from the HedgeDoc pad used
*/
export function parse(markdown: string): ParseResults {
let { data, content } = matter(markdown, {
language: "yaml",
}) as unknown as {
data: TranscriptMeta;
content: string;
};
// Evil hack:
// Pandoc (used previously) allowed multiple front matters and this was employed to mark the end time at the bottom of the document.
// gray-matter does not allow this, so we cut out everything leading up to `---\nend:`, so that the second meta data
// is now at the beginning of the string again.
//
// Also we compensate for people using inferior line endings
const startOfEndBlock = content.search(/---(\n|\r\n|\r)end:/);
if (startOfEndBlock > -1) {
const endData = matter(content.slice(startOfEndBlock), {
language: "yaml",
}).data;
data = { ...data, end: endData.end };
content = content.replace(/---(\n|\r\n|\r)end:.+?(\n|\r\n|\r)---/m, "");
}
// First off, check metadata:
const { errors } = getMetadataProblems(data);
if (errors.length > 0) {
throw errors.join("\n");
}
// From now on, assume metadata is valid
const markdownAstTokens = markdownParser.parse(content, {});
const sharedState = {
nextResoNumbers: {
B: 1,
F: 1,
P: 1,
},
resolutions: [],
data,
};
for (let tokenIdx = 0; tokenIdx < markdownAstTokens.length; tokenIdx++) {
const token = markdownAstTokens[tokenIdx];
// Handle new extractions:
const mainClass = _extractMainClass(token);
if (typeof mainClass === "string") {
let endBlock = _endBlock(
markdownAstTokens,
tokenIdx,
token.level,
`container_${mainClass}_close`,
);
let rendered = tryExtractCards(
markdownAstTokens.slice(tokenIdx, endBlock + 1),
sharedState,
mainClass,
);
if (rendered !== undefined) {
// Sorry for changing the array while iterating
markdownAstTokens.splice(
tokenIdx,
endBlock - tokenIdx + 1,
rendered,
);
}
}
// Handle old extractions:
else if (
token.type === "inline"
&& token.children?.[0]?.type === "span_open"
) {
let rendered = tryExtractCards(
token.children,
sharedState,
undefined,
);
if (rendered !== undefined) {
markdownAstTokens[tokenIdx] = rendered;
}
}
// Handle headers
else if (token.type === "heading_open") {
const meta = extractHeaderMeta(token);
_popKey(meta, "start", (start) => {
const startBadgeToken = new Token("html_block", "span", 0);
startBadgeToken.content = `<span class="badge text-bg-info mb-1 me-1">Start: ${start}</span>`;
markdownAstTokens.splice(tokenIdx + 3, 0, startBadgeToken);
});
_popKey(meta, "origin", (origin) => {
const originBadgeToken = new Token("html_block", "span", 0);
originBadgeToken.content = `<span class="badge text-bg-info mb-1 me-1">Quelle: ${origin}</span>`;
markdownAstTokens.splice(tokenIdx + 3, 0, originBadgeToken);
});
_popKey(meta, "issue", (issue) => {
const issueBadgeToken = new Token("html_block", "span", 0);
issueBadgeToken.content = `<a href="https://gitlab.fachschaften.org/tudo-fsinfo/fsr/sitzungen/-/issues/${issue}"><span class="badge text-bg-info mb-1 me-1">GitLab-Issue: #${issue}</span></a>`;
markdownAstTokens.splice(tokenIdx + 3, 0, issueBadgeToken);
});
const classes = _popKey(meta, "class", (classes) =>
classes.split(" ").filter(
(c) =>
![
// Legacy classes we ignore:
"fin",
].includes(c),
),
);
if (classes !== undefined && classes.length !== 0) {
throw new TypeError(
`Unknown class in heading: ${classes.join(", ")}`,
);
}
_assertEmpty(meta, "heading");
}
}
const renderedHtml = markdownParser.renderer.render(
markdownAstTokens,
markdownParser.options,
{},
);
return {
meta: { ...data, resolutions: sharedState.resolutions },
html: renderedHtml,
ast: markdownAstTokens,
};
}
export interface Resolution {
date?: string;
number: string;
type: "B" | "F" | "P";
result: string;
accepted?: boolean;
/**
* Whether the resolution was accepted and is also not revoked by another resolution.
*/
isActive?: boolean;
html: string;
// I.e. for search. Is only exported to `index.json`s.
// Required by external tools like `finrefhelper`
text: string;
money_granted?: string;
provisional?: string;
modifies?: string[];
revokes?: string[];
note?: string;
votes?:
| {
yes: number;
no: number;
abstention: number;
}
| {
// 288.1
[key: string]: number;
abstention: number;
};
relevant?: { till?: Date | null; reason?: string };
no_link?: boolean;
}
export interface Todo {
html: string;
text: string;
team?: string;
people?: string;
}
interface TranscriptMeta {
// For data comming from `old.json`, most types are optional ;) (sorry, wasn't me)
spec_version: number; // -1: PDF, 2: current
lang: string;
number?: number;
date: string;
start: string;
end: string;
title: string;
label?: string;
head: string;
author: string;
present: string[];
absent: string[];
guests: string[];
no_link: boolean;
}
export interface FinishedTranscriptMeta extends TranscriptMeta {
// Why the ?, you ask? Because for the old PDF transcripts we
// didn't want to manually get all the resolutions out and just left them undefined ´
resolutions?: Resolution[];
}
function extractHeaderMeta(token: Token): Map<string, string> {
if (!Array.isArray(token?.attrs) || token.attrs.length < 1) {
return new Map();
}
return new Map(token.attrs);
}
function _extractMainClass(token: Token) {
return [...token.type.matchAll(/^container_(\S+)_open$/g)]?.[0]?.[1];
}
function _extractBase(
tokens: Token[],
mainClass?: string,
): {
spanAttributes: Map<string, string>;
classes: Set<string>;
html: string;
text: string;
} {
if (tokens.length < 1) {
throw TypeError(
"Parsing this requires an inline span with at least one child",
);
}
const duplicateAttrs =
tokens[0].attrs
?.map(([k, _v]) => k)
.filter((k, i, a) => a.indexOf(k) !== i) ?? [];
if (duplicateAttrs.length > 0) {
throw TypeError(
`The following properties appear multiple times in a resolution: ${duplicateAttrs.join(", ")}`,
);
}
const spanAttributes = new Map(tokens[0].attrs);
const html = markdownParser.renderer.render(
tokens,
{ breaks: false },
undefined,
);
// See comment in uresolution
const text = tokens
.filter((t) => t.type == "text" || t.type == "inline")
.map((t) => t.content)
.join(" ")
.trim();
const classes = new Set(
(
_popKey(spanAttributes, "class", (classes) => classes.split(" "))
?? []
).concat(mainClass === undefined ? [] : [mainClass]),
);
// FIXME: Code blocks in containers with language specified have the class name `undefined<language>`.
return {
spanAttributes,
classes,
html,
text,
};
}
function tryExtractCards(
tokens: Token[],
sharedState: {
nextResoNumbers: {
B: number;
F: number;
P: number;
};
resolutions: Resolution[];
data: FinishedTranscriptMeta;
},
mainClass?: string,
): Token | undefined {
let rendered: undefined | string = undefined;
const base = _extractBase(tokens, mainClass);
const resolution = tryExtractResolution(base, sharedState.data.date);
if (resolution) {
resolution.number =
sharedState.data.number
+ "."
+ sharedState.nextResoNumbers[resolution.type]
+ resolution.type;
// Check if Resolutions that are changed are in the past.
const changedResolutions = (resolution.revokes ?? []).concat(
resolution.modifies ?? [],
);
if (
changedResolutions.some((other: String) => {
const otherTranscriptNumber = parseInt(
other.match(/(^\d+)/)![1],
);
if (otherTranscriptNumber > sharedState.data.number!)
return true;
if (otherTranscriptNumber < sharedState.data.number!)
return false;
const otherResolutionNumber = parseInt(
other.match(/\.(\d+)[FBP]$/)![1],
);
const otherType = other[other.length - 1];
return (
otherResolutionNumber
>= sharedState.nextResoNumbers[otherType]
);
})
) {
throw new Error(
`Resolutions should not revoke themselves or newer resolutions. But ${resolution.number} is doing so.`,
);
}
sharedState.nextResoNumbers[resolution.type] += 1;
rendered = renderResolutionToHtml(resolution);
sharedState.resolutions.push(resolution);
}
const todo = tryExtractNewTodo(base);
if (todo) rendered = renderTodoToHtml(todo);
if (rendered !== undefined) {
let res = new Token("html_block", "div", 0);
res.content = rendered;
return res;
} else {
const { spanAttributes, classes } = base;
_assertEmpty(spanAttributes, "unknown card");
if (
!["warning", "Meinungsbild", "GO-Antrag"].some((mainClass) =>
classes.has(mainClass),
)
) {
_assertEmpty(classes, "unknown card");
}
return undefined;
}
}
function tryExtractResolution(
base: {
spanAttributes: Map<string, string>;
classes: Set<string>;
html: string;
text: string;
},
date: string,
): Resolution | null {
const { spanAttributes, classes, html, text } = base;
if (!classes.delete("resolution")) {
// no resolution class given -> bail gracefully, as this is simply no resolution
return null;
}
let type: Resolution["type"] = "B"; // regulärer Beschluss
if (classes.delete("fin")) {
type = "F"; // Finanzbeschluss
} else if (classes.delete("transcript")) {
type = "P"; // Protokollbeschluss
}
const yes =
_popKey(
spanAttributes,
"yes",
(yes) => parseInt(yes, 10),
"Beschluss muss ein `yes`-Attribut (Ja-Stimmen) enthalten",
) ?? NaN;
const no =
_popKey(
spanAttributes,
"no",
(no) => parseInt(no, 10),
"Beschluss muss ein `no`-Attribut (Nein-Stimmen) enthalten",
) ?? NaN;
const abstention =
_popKey(
spanAttributes,
"abstention",
(abstention) => parseInt(abstention, 10),
"Beschluss muss ein `abstention`-Attribut (Enthaltungen) enthalten",
) ?? NaN;
if (Number.isNaN(yes) || Number.isNaN(no) || Number.isNaN(abstention)) {
throw new TypeError(
"\n❌❌❌❌❌❌\nBeschlüsse müssen Integer-Werte für Ja, Nein und Enthaltungen aufweisen. Schämen und korrigieren!\n❌❌❌❌❌❌",
);
}
const result = _popKey(
spanAttributes,
"result",
undefined,
"Beschluss muss ein `result`-Attribut (Ergebnis 'Angenommen' oder 'Abgelehnt') enthalten",
(result) => result === "Angenommen" || result === "Abgelehnt",
);
const moneyGranted = _popKey(
spanAttributes,
"money-granted",
(moneyGranted) => {
if (/^\d+,\d\d|0$/.test(moneyGranted)) {
return moneyGranted;
} else {
throw new TypeError(
"'money-granted' must be in the format 'eur,ct' or '0'. Furthermore those should be numbers.",
);
}
},
);
const provisional = _popKey(spanAttributes, "provisional");
const modifies = _popKey(spanAttributes, "modifies", (modifies) =>
modifies.split(","),
);
const revokes = _popKey(spanAttributes, "revokes", (revokes) =>
revokes.split(","),
);
// Check for Well-Formed Resolution numbers
for (const changedResolution of (modifies ?? []).concat(revokes ?? [])) {
const otherTranscriptNumber = /^(\d+)\./.exec(changedResolution);
if (otherTranscriptNumber === null) {
throw new Error(
`Revokes or modifies tag ${changedResolution} should start with a Number.`,
);
}
// Handle old resolution numbers
const testRegex =
parseInt(otherTranscriptNumber[1]) >= 418
? /^\d+\.\d+[BFP]$/
: /^\d+\.\d+$/;
if (!testRegex.test(changedResolution)) {
throw new Error(
`Revokes or modifies tag ${changedResolution} is malformed.`,
);
}
}
const note = _popKey(spanAttributes, "note");
let reason = _popKey(spanAttributes, "reason");
let till = _popKey(spanAttributes, "relevant-till", (till) =>
till === "forever" ? null : new Date(till),
);
if (till === undefined && reason !== undefined) {
throw new TypeError("Auslaufgrund impliziert Auslaufdatum.");
}
if (till instanceof Date && isNaN(till?.valueOf())) {
throw new TypeError(
"Auslaufdatum muss ein gültiges Datum oder `forever` sein.",
);
}
if (till instanceof Date && till < new Date(date)) {
throw new TypeError(
"Auslaufdatum darf nicht in der Vergangenheit liegen.",
);
}
if (till === undefined && type === "F" && moneyGranted !== undefined) {
// I love hardcoding.
till = new Date(date);
till.setMonth(till.getMonth() + 5);
reason = "314.1";
}
const relevant = till === undefined ? undefined : { till, reason };
const nocheck = classes.delete("no_check_correct_result_resolution");
_assertEmpty(spanAttributes, "resolution");
_assertEmpty(classes, "resolution");
const accepted = result === "Angenommen";
if (!nocheck) {
if (type === "F") {
if ((yes * 3) / 2 < yes + no + abstention === accepted) {
throw new TypeError(
`Ein Fin Beschluss, hat ein nicht schlüssiges Ergebnis. Mit ${yes} Ja-Stimmen, ${no} Nein-Stimmen und ${abstention} Enthaltungen sollte nicht ${result} sein.`,
);
}
} else {
if (yes <= no === accepted) {
throw new TypeError(
`Ein normaler Beschluss, hat ein nicht schlüssiges Ergebnis. Mit ${yes} Ja-Stimmen, ${no} Nein-Stimmen und ${abstention} Enthaltungen sollte nicht ${result} sein.`,
);
}
}
}
return {
date,
number: "???",
type: type,
result: result,
accepted: accepted,
html,
text,
votes: {
yes: yes,
no: no,
abstention: abstention,
},
money_granted: moneyGranted,
provisional,
modifies,
revokes,
note,
relevant,
no_link: false,
};
}
function tryExtractNewTodo(base: {
spanAttributes: Map<string, string>;
classes: Set<string>;
html: string;
text: string;
}): Todo | null {
const { spanAttributes, classes, html, text } = base;
if (!classes.delete("todo")) {
// no todo class given -> bail gracefully, as this is simply no todo
return null;
}
const team = _popKey(spanAttributes, "team");
const people = _popKey(spanAttributes, "people");
_assertEmpty(spanAttributes, "todo");
_assertEmpty(classes, "todo");
return {
html,
text,
team,
people,
};
}