Facts4Chat v1.0.0

b386fbd6 · Nico Jeske · b386fbd6 · b386fbd6 · b386fbd6 · b386fbd6
Commit b386fbd6 authored 9 months ago by Nico Jeske
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/python
+{
+    "name": "Python 3",
+    "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
+    "postCreateCommand": "bash ./.devcontainer/post-install.sh",
+    "postStartCommand": "git config --global --add safe.directory ${containerWorkspaceFolder}",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "ms-python.python",
+                "vivaxy.vscode-conventional-commits",
+                "ms-python.black-formatter",
+                "ms-python.isort"
+            ],
+            "settings": {
+                "[python]": {
+                    "editor.defaultFormatter": "ms-python.black-formatter",
+                    "editor.formatOnSave": true,
+                    "editor.codeActionsOnSave": {
+                        "source.organizeImports": "always"
+                    }
+                },
+                "isort.args": [
+                    "--profile",
+                    "black"
+                ],
+                "python.analysis.typeCheckingMode": "basic",
+                "python.analysis.autoImportCompletions": true
+            }
+        }
+    },
+    "remoteUser": "vscode",
+    // Allow container to use GPU
+    "runArgs": [
+        // "--gpus=all"
+    ]
+}
--- a/.devcontainer/post-install.sh
+++ b/.devcontainer/post-install.sh
+#!/bin/bash
+git config --global --add safe.directory /workspaces/backend
+
+echo "Installing poetry..."
+pipx install poetry
+
+# Step 2: Ask the user for his PAT (Private Access Token)
+read -p "Please enter your PAT for GITLAB (see readme): " pat
+
+# Use the PAT to access the private GitLab repository
+echo "Storing PAT in poetry config..."
+poetry config http-basic.database_api "__token__" "$pat"
+
+# Step 3: Install poetry dependencies (including dev)
+echo "Installing dependencies..."
+poetry install --with=dev --no-root
+
+poetry run pre-commit install
+poetry run pre-commit install --hook-type commit-msg
--- a/.dockerignore
+++ b/.dockerignore
+.devcontainer
+.gitlab
+.vscode
+.env
+venv
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+.devcontainer/tempo-data/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+# This file is a template, and might need editing before it works on your project.
+# To contribute improvements to CI/CD templates, please follow the Development guide at:
+# https://docs.gitlab.com/ee/development/cicd/templates.html
+# This specific template is located at:
+# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Docker.gitlab-ci.yml
+
+# Build a Docker image with CI/CD and push to the GitLab registry.
+# Docker-in-Docker documentation: https://docs.gitlab.com/ee/ci/docker/using_docker_build.html
+#
+# This template uses one generic job with conditional builds
+# for the default branch and all other (MR) branches.
+
+docker-build:
+  # Use the official docker image.
+  image: docker:cli
+  stage: build
+  services:
+    - docker:dind
+  variables:
+    DOCKER_IMAGE_NAME: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
+  before_script:
+    - docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
+  # All branches are tagged with $DOCKER_IMAGE_NAME (defaults to commit ref slug)
+  # Default branch is also tagged with `latest`
+  script:
+    - docker pull $CI_REGISTRY_IMAGE:dev || true
+    - docker build --pull -t "$DOCKER_IMAGE_NAME" --cache-from $CI_REGISTRY_IMAGE:dev . --build-arg GITLAB_TOKEN_USER=$BACKEND_REPO_TOKEN
+    - docker push "$DOCKER_IMAGE_NAME"
+    - |
+      if [[ "$CI_COMMIT_BRANCH" == "$CI_DEFAULT_BRANCH" ]]; then
+        docker tag "$DOCKER_IMAGE_NAME" "$CI_REGISTRY_IMAGE:latest"
+        docker push "$CI_REGISTRY_IMAGE:latest"
+      fi
+  # Run this job in a branch where a Dockerfile exists
+  rules:
+    - if: $CI_COMMIT_BRANCH
+      exists:
+        - Dockerfile
--- a/.gitlab/merge_request_templates/default.md
+++ b/.gitlab/merge_request_templates/default.md
+<!--
+This is the default MR template for this project.
+Consider using this format. Or propose a new format, if this is insufficient :)
+-->
+
+## Summary
+
+This is my new feature/bugfix/whatever...
+
+## TODOs
+
+<!--
+A list of open tasks, that have to be finished before merging the MR.
+-->
+- [ ] task1
+- [ ] task2
+- [ ] task3
+
+## Proposed merge commit message
+
+<!--
+This is an example commit for the MR.
+All commits of a merge request will be squashed into one MR commit.
+The following message should be used as the squashed commit message.
+Please follow https://www.conventionalcommits.org
+-->
+```text
+feat(my_feature): ✨ add my_features
+```
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+# Hooks based on
+# - https://www.architecture-performance.fr/ap_blog/some-pre-commit-git-hooks-for-python/
+# - https://github.com/pre-commit/pre-commit-hooks
+#
+# All package versions are the latest as of 2021-10-20
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-ast # Checks that Python code can be parsed
+      - id: check-merge-conflict # Check for files that contain merge conflict strings
+      - id: mixed-line-ending # Replace with most frequent line ending
+        args:
+          - "--fix=lf"
+      - id: no-commit-to-branch # Prevent direct commits to master
+      - id: trailing-whitespace # Trims trailing whitespace
+      - id: end-of-file-fixer # Makes sure files end in a newline and only a newline
+
+  - repo: https://github.com/psf/black # Code formatter (PEP 8 compliant) also works with Jupyter notebooks
+    rev: 23.10.0
+    hooks:
+    -   id: black-jupyter
+
+  - repo: https://github.com/hadialqattan/pycln # removes unused imports
+    rev: v2.3.0
+    hooks:
+    -   id: pycln
+        args: [--config=pyproject.toml]
+
+  - repo: https://github.com/pycqa/isort # Sorts imports
+    rev: 5.12.0
+    hooks:
+    -   id: isort
+        files: "\\.(py)$"
+        args: [--settings-path=pyproject.toml]
+
+  - repo: https://github.com/compilerla/conventional-pre-commit # enforce conventional commit messages
+    rev: v2.4.0
+    hooks:
+      - id: conventional-pre-commit
+        stages: [commit-msg]
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Django",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/manage.py",
+            "args": [
+                "runserver"
+            ],
+            "django": true,
+            "justMyCode": true
+        }
+    ]
+}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
+{
+    "conventionalCommits.scopes": [
+        "Query",
+        "Voting",
+        "Retrieval",
+        "Docker",
+        "Configuration",
+        "LLM",
+        "API",
+        "User_QA",
+        "Assistants",
+        "data-tracking"
+    ],
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
--- a/Dockerfile
+++ b/Dockerfile
+# syntax=docker/dockerfile:1
+# Keep this syntax directive! It's used to enable Docker BuildKit
+ARG GITLAB_TOKEN_USER
+
+################################
+# PYTHON-BASE
+# Sets up all our shared environment variables
+################################
+FROM python:3.12-slim as python-base
+
+    # Python
+ENV PYTHONUNBUFFERED=1 \
+    # pip
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    PIP_DEFAULT_TIMEOUT=100 \
+    # Poetry
+    # https://python-poetry.org/docs/configuration/#using-environment-variables
+    POETRY_VERSION=1.7.1 \
+    POETRY_HOME="/opt/poetry" \
+    POETRY_NO_INTERACTION=1 \
+    POETRY_VIRTUALENVS_CREATE=false \
+    VIRTUAL_ENV="/venv"
+
+# prepend poetry and venv to path
+ENV PATH="$POETRY_HOME/bin:$VIRTUAL_ENV/bin:$PATH"
+
+# prepare virtual env
+RUN python -m venv $VIRTUAL_ENV
+
+# working directory and Python path
+WORKDIR /app
+ENV PYTHONPATH="/app:$PYTHONPATH"
+
+################################
+# BUILDER-BASE
+# Used to build deps + create our virtual environment
+################################
+FROM python-base as builder-base
+ARG GITLAB_TOKEN_USER
+RUN apt-get update && \
+    apt-get install -y \
+    apt-transport-https \
+    gnupg \
+    ca-certificates \
+    curl
+
+
+# install poetry - respects $POETRY_VERSION & $POETRY_HOME
+# The --mount will mount the buildx cache directory to where
+# Poetry and Pip store their cache so that they can re-use it
+RUN --mount=type=cache,target=/root/.cache \
+    curl -sSL https://install.python-poetry.org | python -
+
+# used to init dependencies
+WORKDIR /app
+COPY poetry.lock pyproject.toml ./
+
+# Login to private gitlab registry
+RUN poetry config http-basic.database_api "__token__" "$GITLAB_TOKEN_USER"
+
+# install runtime deps to $VIRTUAL_ENV
+RUN --mount=type=cache,target=/root/.cache \
+    poetry install --no-root --only main
+
+################################
+# PRODUCTION
+# Final image used for runtime
+################################
+FROM python-base as production
+
+# copy in our built poetry + venv
+COPY --from=builder-base $POETRY_HOME $POETRY_HOME
+COPY --from=builder-base $VIRTUAL_ENV $VIRTUAL_ENV
+
+WORKDIR /app
+COPY ./entrypoint.sh .
+RUN chmod 755 /app/entrypoint.sh
+COPY . .
+
+# Use gunicorn as the WSGI server instead of the development server
+ENTRYPOINT ["/app/entrypoint.sh"]
+CMD ["gunicorn", "--bind", "0.0.0.0:8000", "djangoConfig.wsgi:application", "--worker-class", "gevent"]
--- a/LICENSE
+++ b/LICENSE
+MIT License
+
+Copyright (c) 2024 Ahmed Asakrah, Robin Ebbinghaus, Michael Frichert, Nico Jeske, Colin Kolbe, Benedikt Kordus, Jonas Röger, Daniel Spenner, project-group-662-tu-dortmund-computer-science
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
+# Facts4Chat Backend
+
+## Getting Started
+
+If you are using the devcontainer, you can skip to step 5.
+
+1. Generate a personal access token (PAT) for the Gitlab API.
+    - Go to https://gitlab.fachschaften.org/-/user_settings/personal_access_tokens
+    - The token needs the following scopes:
+        - read_api
+        - read_repository
+        - read_registry
+2. Install [Poetry](https://python-poetry.org/docs/#installation).
+3. Add credentials for the private [Database-API](https://gitlab.fachschaften.org/PG-Facts4Chat/datacollection/database-api) package registry:
+    ```bash
+    poetry config http-basic.database_api __token__ <your-access-token>
+    ```
+4. Install dependencies with `poetry install --with=dev --no-root`.
+5. Copy djangoConfig/.env.template to djangoConfig/.env
+5. Fill all options in djangoConfig/.env
+6. Run `poetry run python manage.py migrate` to create the database.
+7. Run `poetry run python manage.py runserver` to start the server.
+
+The api is now available at `http://localhost:8000/api/`.
+Documentation is available at `http://localhost:8000/api/docs/`.
+
+## Access the admin panel
+
+To change configuration and add data, access the admin panel at `http://localhost:8000/admin/`.
+Create a superuser with `poetry run python manage.py createsuperuser`.
+
+## Changing models
+
+After changing models run `poetry run python manage.py makemigrations` and `poetry run python manage.py migrate` to update the database.
+
+## Using the external model and retriever
+
+Use ssl tunneling to access the external model and retriever and s3. For example with
+```bash
+ssh ... -L 1234:localhost:1234 -L 7000:localhost:7000 -L 9000:localhost:9000
+```
+Then set the model and retriever endpoints to `http://localhost:1234` and `http://localhost:7000` and the s3 endpoint to `http://localhost:9000`.
+
+If you are running the project in a docker container using devcontainer, you can (normally) use `host.docker.internal` instead of `localhost` to access the host machine.
--- a/configuration/__init__.py
+++ b/configuration/__init__.py
--- a/configuration/admin.py
+++ b/configuration/admin.py
+from django.contrib import admin
+from solo.admin import SingletonModelAdmin  # type: ignore
+
+from configuration.models import (
+    ChromaDBRetrievalConfig,
+    EmbeddingModel,
+    Index,
+    ModelConfig,
+    OpenSearchRetrievalConfig,
+    RetrievalConfig,
+)
+from services.query.models import ConfigSnapshot
+
+# Register your models here.
+admin.site.register(
+    [
+        ModelConfig,
+        RetrievalConfig,
+        OpenSearchRetrievalConfig,
+        ChromaDBRetrievalConfig,
+    ],
+    SingletonModelAdmin,
+)
+
+
+class ConfigSnapshotAdmin(admin.ModelAdmin):
+    list_display = (
+        "id",
+        "modelConfig",
+        "retrieverConfig",
+        "openSearchRetrievalConfig",
+        "chromaDbRetrievalConfig",
+    )
+
+
+admin.site.register(ConfigSnapshot, ConfigSnapshotAdmin)
+admin.site.register(
+    [
+        EmbeddingModel,
+        Index,
+    ]
+)
--- a/configuration/apps.py
+++ b/configuration/apps.py
+from django.apps import AppConfig
+
+
+class ConfigurationConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "configuration"
--- a/configuration/configurationHelper.py
+++ b/configuration/configurationHelper.py
+from typing import Type, TypeVar
+
+from django.db import models
+
+from configuration.models import (
+    ChromaDBRetrievalConfigHistory,
+    ModelConfig,
+    ModelConfigHistory,
+    OpenSearchRetrievalConfigHistory,
+    RetrievalConfig,
+    RetrievalConfigHistory,
+)
+from services.query.models import ConfigSnapshot
+
+T = TypeVar("T", bound=models.Model)
+
+
+class ConfigurationHelper:
+    @staticmethod
+    def _get_singleton_instance(type: Type[T]) -> T:
+        instance: T | None = type.objects.first()
+
+        if instance is None:
+            raise Exception(f"No {type} found")
+
+        return instance
+
+    @staticmethod
+    def get_model_config() -> ModelConfig:
+        return ConfigurationHelper._get_singleton_instance(ModelConfig)
+
+    @staticmethod
+    def get_retrieval_config() -> RetrievalConfig:
+        return ConfigurationHelper._get_singleton_instance(RetrievalConfig)
+
+    @staticmethod
+    def get_current_config_snapshot() -> ConfigSnapshot:
+        modelConfig = ModelConfigHistory.objects.latest("created_at")
+        retrievalConfig = RetrievalConfigHistory.objects.latest("created_at")
+        openSearchRetrievalConfig = OpenSearchRetrievalConfigHistory.objects.latest(
+            "created_at"
+        )
+        chromaDbRetrievalConfig = ChromaDBRetrievalConfigHistory.objects.latest(
+            "created_at"
+        )
+
+        return ConfigSnapshot(
+            modelConfig=modelConfig,
+            retrieverConfig=retrievalConfig,
+            openSearchRetrievalConfig=openSearchRetrievalConfig,
+            chromaDbRetrievalConfig=chromaDbRetrievalConfig,
+        )
--- a/configuration/migrations/0001_initial.py
+++ b/configuration/migrations/0001_initial.py
+# Generated by Django 4.2.7 on 2023-12-02 15:36
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = []
+
+    operations = [
+        migrations.CreateModel(
+            name="ModelConfig",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "model_type",
+                    models.CharField(
+                        choices=[
+                            ("MOCK", "Mock Model"),
+                            ("TGI", "Text Generation Inference Model"),
+                        ],
+                        default="TGI",
+                        max_length=4,
+                    ),
+                ),
+                ("system_prompt", models.TextField(default="This is a test prompt.")),
+            ],
+        ),
+        migrations.CreateModel(
+            name="OpenSearchRetrievalConfig",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("operator", models.CharField(default="or", max_length=10)),
+                ("indices", models.TextField(default="development")),
+                ("analyzer", models.CharField(default="english", max_length=50)),
+                ("fuzziness", models.CharField(default="AUTO", max_length=10)),
+                ("prefix_length", models.IntegerField(default=2)),
+                ("minimum_should_match", models.IntegerField(default=1)),
+                ("max_length", models.IntegerField(default=5000)),
+            ],
+        ),
+        migrations.CreateModel(
+            name="RetrievalConfig",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "retrieval_type",
+                    models.CharField(
+                        choices=[
+                            ("MOCK", "Mock Retrieval"),
+                            ("OPNS", "OpenSearch Retrieval"),
+                        ],
+                        default="MOCK",
+                        max_length=4,
+                    ),
+                ),
+            ],
+        ),
+        migrations.CreateModel(
+            name="TgiConfig",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("max_new_tokens", models.IntegerField(default=512)),
+            ],
+        ),
+    ]
--- a/configuration/migrations/0002_modelconfig_huggingface_id.py
+++ b/configuration/migrations/0002_modelconfig_huggingface_id.py
+# Generated by Django 4.2.7 on 2023-12-20 12:32
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("configuration", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="modelconfig",
+            name="huggingface_id",
+            field=models.CharField(
+                blank=True,
+                default="",
+                help_text="Huggingface model id used for tokenization",
+                max_length=50,
+            ),
+        ),
+    ]
--- a/configuration/migrations/0003_modelconfig_max_tokens.py
+++ b/configuration/migrations/0003_modelconfig_max_tokens.py
+# Generated by Django 4.2.7 on 2023-12-20 13:51
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("configuration", "0002_modelconfig_huggingface_id"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="modelconfig",
+            name="max_tokens",
+            field=models.IntegerField(default=1024),
+        ),
+    ]
--- a/configuration/migrations/0004_opensearchretrievalconfig_maximum_results_and_more.py
+++ b/configuration/migrations/0004_opensearchretrievalconfig_maximum_results_and_more.py
+# Generated by Django 4.2.7 on 2024-01-19 17:36
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("configuration", "0003_modelconfig_max_tokens"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="opensearchretrievalconfig",
+            name="maximum_results",
+            field=models.IntegerField(default=100),
+        ),
+        migrations.AddField(
+            model_name="opensearchretrievalconfig",
+            name="rerank_model",
+            field=models.CharField(
+                blank=True,
+                default="all-MiniLM-L6-v2",
+                help_text="model id used for reranking",
+                max_length=50,
+            ),
+        ),
+        migrations.AlterField(
+            model_name="modelconfig",
+            name="max_tokens",
+            field=models.IntegerField(
+                default=1024, help_text="Maximum number of tokens"
+            ),
+        ),
+    ]