Skip to content
Snippets Groups Projects
Select Git revision
  • 56e43a3cc3522390ba19f041906210e204b28462
  • main default protected
  • renovate/node-24.x
  • renovate/bootstrap-5.x
  • renovate/vitest-3.x-lockfile
  • renovate/node-22.x-lockfile
  • renovate/esbuild-0.x-lockfile
  • renovate/prettier-3.x-lockfile
  • feat/prettier-attendence-sum
  • local-test
  • show_missing
  • feat/show-which-resolutions-are-void
  • cedric-hates-this
13 results

parsing.ts

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    parsing.ts 21.84 KiB
    import matter from "gray-matter"; // Front matter yaml metadata parsing
    import md from "markdown-it"; // Markdown parsing
    import bracketed_spans_plugin from "markdown-it-bracketed-spans"; //attributed spans (`[blah]{.fin yes=1)}`)
    import attributes from "markdown-it-attrs";
    import { full as emoji_plugin } from "markdown-it-emoji"; // `:smile:` -> `😊`
    import footnote_plugin from "markdown-it-footnote"; // for `[^1]` and friends
    import abbreviation from "markdown-it-abbr";
    import definitionList from "markdown-it-deflist";
    import inserted from "markdown-it-ins";
    import marked from "markdown-it-mark";
    import subscript from "markdown-it-sub";
    import superscript from "markdown-it-sup";
    import container from "markdown-it-container";
    import Token from "markdown-it/lib/token.mjs";
    import { getMetadataProblems } from "./metadataChecks";
    import {
        renderResolutionToHtml,
        renderTodoToHtml,
        renderContainerToAlert,
    } from "./rendering";
    
    const markdownParser = md({
        html: true,
        xhtmlOut: true,
        breaks: true,
        linkify: true,
        typographer: true,
        quotes: "„“‚‘",
    })
        .use(attributes)
        .use(emoji_plugin)
        .use(footnote_plugin)
        .use(abbreviation)
        .use(definitionList)
        .use(inserted)
        .use(marked)
        .use(subscript)
        .use(superscript)
        // Legacy
        .use(bracketed_spans_plugin)
        .use(container, "warning", {
            render: renderContainerToAlert("alert-warning", "alert"),
        })
        .use(container, "Meinungsbild", {
            render: renderContainerToAlert("alert-secondary", "Meinungsbild"),
        })
        .use(container, "GO-Antrag", {
            render: renderContainerToAlert("alert-secondary", "GO-Antrag"),
        })
        // New
        .use(container, "resolution")
        .use(container, "todo");
    markdownParser.linkify.set({ fuzzyEmail: false });
    
    export interface ParseResults {
        meta: FinishedTranscriptMeta;
        html: string;
        ast: Token[];
    }
    
    export function parseOld(
        old: FinishedTranscriptMeta[],
    ): FinishedTranscriptMeta[] {
        return old.map((t) => ({
            // Fallback for some options:
            ...t,
            spec_version: t.spec_version ?? -1, // PDFs
            no_link: t.no_link ?? false,
            // Reformat resolutions
            resolutions: t.resolutions?.map((r) => {
                let accepted = r.accepted;
                if (typeof accepted !== "boolean") {
                    switch (r.result) {
                        case "Angenommen":
                            accepted = true;
                            break;
                        case "Abgelehnt":
                            accepted = false;
                            break;
                        case undefined:
                            break;
                        default:
                            throw new TypeError(
                                `'result' must either be 'Angenommen' or 'Abgelehnt' or you must specify 'accepted' manually: ${t.number}`,
                            );
                    }
                }
                return {
                    ...r,
                    html: `<span>${markdownParser.utils.escapeHtml(r.text)}</span>`,
                    accepted,
                };
            }),
        }));
    }
    
    function _endBlock(ast: Token[], start: number, level: number, close: string) {
        let end = start;
        for (end++; end < ast.length; end++) {
            const token = ast[end];
            if (token.type === close) return end;
            if (token.level <= level)
                throw new TypeError(`Left block without close: '${close}'`);
        }
        throw new TypeError(`Encountered open without close: '${close}'`);
    }
    
    function _popKey<T>(
        map: Map<string, string>,
        key: string,
        ok: (val: string) => T,
        onMissing: string,
        extraCheck?: (val: string) => boolean,
    ): T;
    
    function _popKey<T>(
        map: Map<string, string>,
        key: string,
        ok: (val: string) => T,
        onMissing?: undefined,
        extraCheck?: (val: string) => boolean,
    ): T | undefined;
    
    function _popKey(
        map: Map<string, string>,
        key: string,
        ok: undefined,
        onMissing: string,
        extraCheck?: (val: string) => boolean,
    ): string;
    
    function _popKey(
        map: Map<string, string>,
        key: string,
        ok?: undefined,
        onMissing?: undefined,
        extraCheck?: (val: string) => boolean,
    ): string | undefined;
    
    function _popKey<T>(
        map: Map<string, string>,
        key: string,
        ok?: (val: string) => T,
        onMissing?: string,
        extraCheck?: (val: string) => boolean,
    ) {
        const val = map.get(key)?.trim();
        map.delete(key); // We delete the key, so we can test for unknown attributes.
        if (
            !val
            || typeof val !== "string"
            || val.length === 0
            || (extraCheck !== undefined && !extraCheck(val))
        ) {
            if (typeof onMissing === "string") throw new TypeError(onMissing);
        } else {
            return ok === undefined ? val : ok(val);
        }
    }
    
    function _assertEmpty(
        array: Array<string> | Map<string, string> | Set<string>,
        name: string,
    ) {
        if (array instanceof Map || array instanceof Set) array = [...array.keys()];
        if (array.length === 0) return;
        throw new TypeError(
            `The following tags cannot be used in ${name}s: ${array.join(", ")}`,
        );
    }
    
    /**
     * Parse single transcript/meeting minutes from markdown
     * @param markdown Raw markdown from the HedgeDoc pad used
     */
    export function parse(markdown: string): ParseResults {
        let { data, content } = matter(markdown, {
            language: "yaml",
        }) as unknown as {
            data: TranscriptMeta;
            content: string;
        };
    
        // Evil hack:
        // Pandoc (used previously) allowed multiple front matters and this was employed to mark the end time at the bottom of the document.
        // gray-matter does not allow this, so we cut out everything leading up to `---\nend:`, so that the second meta data
        // is now at the beginning of the string again.
        //
        // Also we compensate for people using inferior line endings
        const startOfEndBlock = content.search(/---(\n|\r\n|\r)end:/);
        if (startOfEndBlock > -1) {
            const endData = matter(content.slice(startOfEndBlock), {
                language: "yaml",
            }).data;
            data = { ...data, end: endData.end };
            content = content.replace(/---(\n|\r\n|\r)end:.+?(\n|\r\n|\r)---/m, "");
        }
    
        // First off, check metadata:
        const { errors } = getMetadataProblems(data);
        if (errors.length > 0) {
            throw errors.join("\n");
        }
        // From now on, assume metadata is valid
    
        const markdownAstTokens = markdownParser.parse(content, {});
    
        const sharedState = {
            nextResoNumbers: {
                B: 1,
                F: 1,
                P: 1,
            },
            resolutions: [],
            data,
        };
    
        for (let tokenIdx = 0; tokenIdx < markdownAstTokens.length; tokenIdx++) {
            const token = markdownAstTokens[tokenIdx];
    
            // Handle new extractions:
            const mainClass = _extractMainClass(token);
            if (typeof mainClass === "string") {
                let endBlock = _endBlock(
                    markdownAstTokens,
                    tokenIdx,
                    token.level,
                    `container_${mainClass}_close`,
                );
                let rendered = tryExtractCards(
                    markdownAstTokens.slice(tokenIdx, endBlock + 1),
                    sharedState,
                    mainClass,
                );
                if (rendered !== undefined) {
                    // Sorry for changing the array while iterating
                    markdownAstTokens.splice(
                        tokenIdx,
                        endBlock - tokenIdx + 1,
                        rendered,
                    );
                }
            }
            // Handle old extractions:
            else if (
                token.type === "inline"
                && token.children?.[0]?.type === "span_open"
            ) {
                let rendered = tryExtractCards(
                    token.children,
                    sharedState,
                    undefined,
                );
                if (rendered !== undefined) {
                    markdownAstTokens[tokenIdx] = rendered;
                }
            }
            // Handle headers
            else if (token.type === "heading_open") {
                const meta = extractHeaderMeta(token);
                _popKey(meta, "start", (start) => {
                    const startBadgeToken = new Token("html_block", "span", 0);
                    startBadgeToken.content = `<span class="badge text-bg-info mb-1 me-1">Start: ${start}</span>`;
                    markdownAstTokens.splice(tokenIdx + 3, 0, startBadgeToken);
                });
                _popKey(meta, "origin", (origin) => {
                    const originBadgeToken = new Token("html_block", "span", 0);
                    originBadgeToken.content = `<span class="badge text-bg-info mb-1 me-1">Quelle: ${origin}</span>`;
                    markdownAstTokens.splice(tokenIdx + 3, 0, originBadgeToken);
                });
                _popKey(meta, "issue", (issue) => {
                    const issueBadgeToken = new Token("html_block", "span", 0);
                    issueBadgeToken.content = `<a href="https://gitlab.fachschaften.org/tudo-fsinfo/fsr/sitzungen/-/issues/${issue}"><span class="badge text-bg-info mb-1 me-1">GitLab-Issue: #${issue}</span></a>`;
                    markdownAstTokens.splice(tokenIdx + 3, 0, issueBadgeToken);
                });
                const classes = _popKey(meta, "class", (classes) =>
                    classes.split(" ").filter(
                        (c) =>
                            ![
                                // Legacy classes we ignore:
                                "fin",
                            ].includes(c),
                    ),
                );
                if (classes !== undefined && classes.length !== 0) {
                    throw new TypeError(
                        `Unknown class in heading: ${classes.join(", ")}`,
                    );
                }
                _assertEmpty(meta, "heading");
            }
        }
    
        const renderedHtml = markdownParser.renderer.render(
            markdownAstTokens,
            markdownParser.options,
            {},
        );
    
        return {
            meta: { ...data, resolutions: sharedState.resolutions },
            html: renderedHtml,
            ast: markdownAstTokens,
        };
    }
    
    export interface Resolution {
        date?: string;
        number: string;
        type: "B" | "F" | "P";
        result: string;
        accepted?: boolean;
        /**
         * Whether the resolution was accepted and is also not revoked by another resolution.
         */
        isActive?: boolean;
        html: string;
        // I.e. for search. Is only exported to `index.json`s.
        // Required by external tools like `finrefhelper`
        text: string;
        money_granted?: string;
        provisional?: string;
        modifies?: string[];
        revokes?: string[];
        note?: string;
        votes?:
            | {
                  yes: number;
                  no: number;
                  abstention: number;
              }
            | {
                  // 288.1
                  [key: string]: number;
                  abstention: number;
              };
        relevant?: { till?: Date | null; reason?: string };
        no_link?: boolean;
    }
    
    export interface Todo {
        html: string;
        text: string;
        team?: string;
        people?: string;
    }
    
    interface TranscriptMeta {
        // For data comming from `old.json`, most types are optional ;) (sorry, wasn't me)
        spec_version: number; // -1: PDF, 2: current
        lang: string;
        number?: number;
        date: string;
        start: string;
        end: string;
        title: string;
        label?: string;
        head: string;
        author: string;
        present: string[];
        absent: string[];
        guests: string[];
        no_link: boolean;
    }
    
    export interface FinishedTranscriptMeta extends TranscriptMeta {
        // Why the ?, you ask? Because for the old PDF transcripts we
        // didn't want to manually get all the resolutions out and just left them undefined ´
        resolutions?: Resolution[];
    }
    
    function extractHeaderMeta(token: Token): Map<string, string> {
        if (!Array.isArray(token?.attrs) || token.attrs.length < 1) {
            return new Map();
        }
        return new Map(token.attrs);
    }
    
    function _extractMainClass(token: Token) {
        return [...token.type.matchAll(/^container_(\S+)_open$/g)]?.[0]?.[1];
    }
    
    function _extractBase(
        tokens: Token[],
        mainClass?: string,
    ): {
        spanAttributes: Map<string, string>;
        classes: Set<string>;
        html: string;
        text: string;
    } {
        if (tokens.length < 1) {
            throw TypeError(
                "Parsing this requires an inline span with at least one child",
            );
        }
    
        const duplicateAttrs =
            tokens[0].attrs
                ?.map(([k, _v]) => k)
                .filter((k, i, a) => a.indexOf(k) !== i) ?? [];
        if (duplicateAttrs.length > 0) {
            throw TypeError(
                `The following properties appear multiple times in a resolution: ${duplicateAttrs.join(", ")}`,
            );
        }
    
        const spanAttributes = new Map(tokens[0].attrs);
        const html = markdownParser.renderer.render(
            tokens,
            { breaks: false },
            undefined,
        );
        // See comment in uresolution
        const text = tokens
            .filter((t) => t.type == "text" || t.type == "inline")
            .map((t) => t.content)
            .join(" ")
            .trim();
    
        const classes = new Set(
            (
                _popKey(spanAttributes, "class", (classes) => classes.split(" "))
                ?? []
            ).concat(mainClass === undefined ? [] : [mainClass]),
        );
        // FIXME: Code blocks in containers with language specified have the class name `undefined<language>`.
    
        return {
            spanAttributes,
            classes,
            html,
            text,
        };
    }
    
    function tryExtractCards(
        tokens: Token[],
        sharedState: {
            nextResoNumbers: {
                B: number;
                F: number;
                P: number;
            };
            resolutions: Resolution[];
            data: FinishedTranscriptMeta;
        },
        mainClass?: string,
    ): Token | undefined {
        let rendered: undefined | string = undefined;
    
        const base = _extractBase(tokens, mainClass);
    
        const resolution = tryExtractResolution(base, sharedState.data.date);
        if (resolution) {
            resolution.number =
                sharedState.data.number
                + "."
                + sharedState.nextResoNumbers[resolution.type]
                + resolution.type;
    
            // Check if Resolutions that are changed are in the past.
            const changedResolutions = (resolution.revokes ?? []).concat(
                resolution.modifies ?? [],
            );
            if (
                changedResolutions.some((other: String) => {
                    const otherTranscriptNumber = parseInt(
                        other.match(/(^\d+)/)![1],
                    );
    
                    if (otherTranscriptNumber > sharedState.data.number!)
                        return true;
    
                    if (otherTranscriptNumber < sharedState.data.number!)
                        return false;
    
                    const otherResolutionNumber = parseInt(
                        other.match(/\.(\d+)[FBP]$/)![1],
                    );
                    const otherType = other[other.length - 1];
    
                    return (
                        otherResolutionNumber
                        >= sharedState.nextResoNumbers[otherType]
                    );
                })
            ) {
                throw new Error(
                    `Resolutions should not revoke themselves or newer resolutions. But ${resolution.number} is doing so.`,
                );
            }
    
            sharedState.nextResoNumbers[resolution.type] += 1;
    
            rendered = renderResolutionToHtml(resolution);
    
            sharedState.resolutions.push(resolution);
        }
    
        const todo = tryExtractNewTodo(base);
        if (todo) rendered = renderTodoToHtml(todo);
    
        if (rendered !== undefined) {
            let res = new Token("html_block", "div", 0);
            res.content = rendered;
            return res;
        } else {
            const { spanAttributes, classes } = base;
            _assertEmpty(spanAttributes, "unknown card");
            if (
                !["warning", "Meinungsbild", "GO-Antrag"].some((mainClass) =>
                    classes.has(mainClass),
                )
            ) {
                _assertEmpty(classes, "unknown card");
            }
            return undefined;
        }
    }
    
    function tryExtractResolution(
        base: {
            spanAttributes: Map<string, string>;
            classes: Set<string>;
            html: string;
            text: string;
        },
        date: string,
    ): Resolution | null {
        const { spanAttributes, classes, html, text } = base;
    
        if (!classes.delete("resolution")) {
            // no resolution class given -> bail gracefully, as this is simply no resolution
            return null;
        }
    
        let type: Resolution["type"] = "B"; // regulärer Beschluss
        if (classes.delete("fin")) {
            type = "F"; // Finanzbeschluss
        } else if (classes.delete("transcript")) {
            type = "P"; // Protokollbeschluss
        }
    
        const yes =
            _popKey(
                spanAttributes,
                "yes",
                (yes) => parseInt(yes, 10),
                "Beschluss muss ein `yes`-Attribut (Ja-Stimmen) enthalten",
            ) ?? NaN;
    
        const no =
            _popKey(
                spanAttributes,
                "no",
                (no) => parseInt(no, 10),
                "Beschluss muss ein `no`-Attribut (Nein-Stimmen) enthalten",
            ) ?? NaN;
    
        const abstention =
            _popKey(
                spanAttributes,
                "abstention",
                (abstention) => parseInt(abstention, 10),
                "Beschluss muss ein `abstention`-Attribut (Enthaltungen) enthalten",
            ) ?? NaN;
    
        if (Number.isNaN(yes) || Number.isNaN(no) || Number.isNaN(abstention)) {
            throw new TypeError(
                "\n❌❌❌❌❌❌\nBeschlüsse müssen Integer-Werte für Ja, Nein und Enthaltungen aufweisen. Schämen und korrigieren!\n❌❌❌❌❌❌",
            );
        }
    
        const result = _popKey(
            spanAttributes,
            "result",
            undefined,
            "Beschluss muss ein `result`-Attribut (Ergebnis 'Angenommen' oder 'Abgelehnt') enthalten",
            (result) => result === "Angenommen" || result === "Abgelehnt",
        );
    
        const moneyGranted = _popKey(
            spanAttributes,
            "money-granted",
            (moneyGranted) => {
                if (/^\d+,\d\d|0$/.test(moneyGranted)) {
                    return moneyGranted;
                } else {
                    throw new TypeError(
                        "'money-granted' must be in the format 'eur,ct' or '0'. Furthermore those should be numbers.",
                    );
                }
            },
        );
    
        const provisional = _popKey(spanAttributes, "provisional");
        const modifies = _popKey(spanAttributes, "modifies", (modifies) =>
            modifies.split(","),
        );
        const revokes = _popKey(spanAttributes, "revokes", (revokes) =>
            revokes.split(","),
        );
        // Check for Well-Formed Resolution numbers
        for (const changedResolution of (modifies ?? []).concat(revokes ?? [])) {
            const otherTranscriptNumber = /^(\d+)\./.exec(changedResolution);
            if (otherTranscriptNumber === null) {
                throw new Error(
                    `Revokes or modifies tag ${changedResolution} should start with a Number.`,
                );
            }
    
            // Handle old resolution numbers
            const testRegex =
                parseInt(otherTranscriptNumber[1]) >= 418
                    ? /^\d+\.\d+[BFP]$/
                    : /^\d+\.\d+$/;
            if (!testRegex.test(changedResolution)) {
                throw new Error(
                    `Revokes or modifies tag ${changedResolution} is malformed.`,
                );
            }
        }
    
        const note = _popKey(spanAttributes, "note");
        let reason = _popKey(spanAttributes, "reason");
        let till = _popKey(spanAttributes, "relevant-till", (till) =>
            till === "forever" ? null : new Date(till),
        );
        if (till === undefined && reason !== undefined) {
            throw new TypeError("Auslaufgrund impliziert Auslaufdatum.");
        }
    
        if (till instanceof Date && isNaN(till?.valueOf())) {
            throw new TypeError(
                "Auslaufdatum muss ein gültiges Datum oder `forever` sein.",
            );
        }
        if (till instanceof Date && till < new Date(date)) {
            throw new TypeError(
                "Auslaufdatum darf nicht in der Vergangenheit liegen.",
            );
        }
    
        if (till === undefined && type === "F" && moneyGranted !== undefined) {
            // I love hardcoding.
            till = new Date(date);
            till.setMonth(till.getMonth() + 5);
            reason = "314.1";
        }
    
        const relevant = till === undefined ? undefined : { till, reason };
    
        const nocheck = classes.delete("no_check_correct_result_resolution");
    
        _assertEmpty(spanAttributes, "resolution");
        _assertEmpty(classes, "resolution");
    
        const accepted = result === "Angenommen";
    
        if (!nocheck) {
            if (type === "F") {
                if ((yes * 3) / 2 < yes + no + abstention === accepted) {
                    throw new TypeError(
                        `Ein Fin Beschluss, hat ein nicht schlüssiges Ergebnis. Mit ${yes} Ja-Stimmen, ${no} Nein-Stimmen und ${abstention} Enthaltungen sollte nicht ${result} sein.`,
                    );
                }
            } else {
                if (yes <= no === accepted) {
                    throw new TypeError(
                        `Ein normaler Beschluss, hat ein nicht schlüssiges Ergebnis. Mit ${yes} Ja-Stimmen, ${no} Nein-Stimmen und ${abstention} Enthaltungen sollte nicht ${result} sein.`,
                    );
                }
            }
        }
    
        return {
            date,
            number: "???",
            type: type,
            result: result,
            accepted: accepted,
            html,
            text,
            votes: {
                yes: yes,
                no: no,
                abstention: abstention,
            },
            money_granted: moneyGranted,
            provisional,
            modifies,
            revokes,
            note,
            relevant,
            no_link: false,
        };
    }
    
    function tryExtractNewTodo(base: {
        spanAttributes: Map<string, string>;
        classes: Set<string>;
        html: string;
        text: string;
    }): Todo | null {
        const { spanAttributes, classes, html, text } = base;
    
        if (!classes.delete("todo")) {
            // no todo class given -> bail gracefully, as this is simply no todo
            return null;
        }
    
        const team = _popKey(spanAttributes, "team");
        const people = _popKey(spanAttributes, "people");
    
        _assertEmpty(spanAttributes, "todo");
        _assertEmpty(classes, "todo");
    
        return {
            html,
            text,
            team,
            people,
        };
    }