Source code for backend.app.api.datasets

from typing import Annotated

from fastapi import APIRouter, Depends, HTTPException
from sqlmodel import Session, col, func, select  # noqa: F401

from backend.app.models.dataset import Dataset
from backend.app.models.site import Site
from backend.app.models.voc_subclass import VocSubclass
from backend.app.utils.query_utils import (
    get_all_voc_subclass_ancestor_ids_by_voc_subclass_id,
)
from backend.database import get_db

dataset_router = APIRouter(prefix="/datasets", tags=["Datasets"])


[docs] @dataset_router.post("/") async def create_dataset( dataset: Dataset, db: Annotated[Session, Depends(get_db)] ): """ Create a new dataset. This endpoint allows the creation of a new dataset in the database. :param dataset: The dataset information to be added. :type dataset: Dataset :param db: The database session. :type db: Session :return: The created dataset. :rtype: Dataset """ db.add(dataset) db.commit() db.refresh(dataset) return dataset
[docs] @dataset_router.get("/") async def get_datasets(db: Annotated[Session, Depends(get_db)]): """ Retrieve all datasets. This endpoint retrieves all datasets stored in the database. :param db: The database session. :type db: Session :return: A list of all datasets. :rtype: list[Dataset] """ return db.query(Dataset).all()
[docs] @dataset_router.get("/by-site/{site_id}") async def get_datasets_by_site_id( site_id: int, db: Annotated[Session, Depends(get_db)] ): """ Retrieve datasets by site ID. This endpoint retrieves datasets associated with a specific site. :param site_id: The ID of the site. :type site_id: int :param db: The database session. :type db: Session :return: A list of datasets associated with the specified site. :rtype: list[Dataset] """ statement = select(Dataset).where(Dataset.site_id == site_id) return db.exec(statement).all()
[docs] @dataset_router.get("/by-country/{country-name}") async def get_datasets_by_country( country: str, db: Annotated[Session, Depends(get_db)] ): """ Retrieve datasets by country. This endpoint retrieves datasets associated with a specific country. :param country: The name of the country. :type country: str :param db: The database session. :type db: Session :return: A list of datasets associated with the specified country. :rtype: list[Dataset] """ statement = select(Dataset).where(Dataset.site.country == country) return db.exec(statement).all()
[docs] @dataset_router.get("/by-subclass/{subclass_name}") async def get_datasets_by_subclass_name( subclass_name: str, db: Annotated[Session, Depends(get_db)] ): """ Retrieve datasets by VOC subclass name. This endpoint retrieves datasets associated with a specific VOC subclass. This checks multiple cases: - is VOC subclass directly associated with dataset? - is VOC subclass indirectly associated with dataset, \ i.e. is an ancestor of directly linked VOC Subclass - is VOC subclass associated with specific VOC, \ that is directly associated with this dataset? - is VOC subclass indirectly associated with specific VOC, \ that is directly associated with this dataset, i.e.\ is VOC Subclass ancestor of directly linked VOCs Subclass :param subclass_name: The name of the VOC subclass. :type subclass_name: str :param db: The database session. :type db: Session :return: A list of datasets associated with the specified VOC subclass. :rtype: list[Dataset] :raise HTTPException: no datasets found for subclass """ # get subclass_id by name given_subclass_id = ( db.exec(select(VocSubclass).where(VocSubclass.name == subclass_name)) .one() .id ) # init array for relevant datasets relevant_datasets = [] # get all datasets from db datasets = db.exec(select(Dataset)) # TODO: There might be a more elegant solution for this function # by utilizing more sql functionality for dataset in datasets: contained_voc_subclasses = [] subclasses = dataset.voc_subclasses for subclass in subclasses: # for every dataset, collect the ancestors # for each associated voc_subclass ancestor_ids = get_all_voc_subclass_ancestor_ids_by_voc_subclass_id( subclass.id, db ) for ancestor_id in ancestor_ids: contained_voc_subclasses.append(ancestor_id) for voc in dataset.vocs: # do the same for all the VOCs and their voc_subclasses if voc.voc_subclass: ancestor_ids = ( get_all_voc_subclass_ancestor_ids_by_voc_subclass_id( voc.voc_subclass.id, db ) ) for ancestor_id in ancestor_ids: contained_voc_subclasses.append(ancestor_id) contained_voc_subclasses = list(set(contained_voc_subclasses)) # if any of the linked voc_subclasses or ancestors # are equal to the given_subclass_id # (i.e. the voc_subclass we are looking for), # add the dataset to the relevant datasets if given_subclass_id in contained_voc_subclasses: relevant_datasets.append(dataset) if not relevant_datasets: raise HTTPException( status_code=404, detail="No datasets found that refer to your" " specified VOC subgroup", ) return relevant_datasets
[docs] @dataset_router.get( "/datasets/by-area/{min_lon}/{min_lat}/{max_lon}/{max_lat}/" ) async def get_datasets_within_area( min_lon: float, min_lat: float, max_lon: float, max_lat: float, db: Annotated[Session, Depends(get_db)], ): """ Retrieve datasets within a specified area. This endpoint retrieves datasets that are \ located within a specified bounding box. :param min_lon: Minimum longitude of the bounding box. :type min_lon: float :param min_lat: Minimum latitude of the bounding box. :type min_lat: float :param max_lon: Maximum longitude of the bounding box. :type max_lon: float :param max_lat: Maximum latitude of the bounding box. :type max_lat: float :param db: The database session. :type db: Session :return: A list of datasets within the specified bounding box. :rtype: list[Dataset] :raise HTTPException: no datasets found in that area """ # Define the bounding box bbox = ( f"POLYGON(({min_lon} {min_lat}, {min_lon} {max_lat}, {max_lon}" f" {max_lat}, {max_lon} {min_lat}, {min_lon} {min_lat}))" ) # Query to get datasets within the specified bounding box statement = select(Dataset).where( Dataset.site.func.ST_Within( Site.geo_location, func.ST_GeomFromText(bbox, 4326) ) ) results = db.exec(statement).all() if not results: raise HTTPException( status_code=404, detail="No datasets found in the specified area" ) return results