Source code for data.pybbbc_loader

import sys
from pybbbc import BBBC021


[docs]
def download_bbbc021(data_root: str = "/scratch/cv-course2025/group8") -> None:
    """Downloads the complete BBBC021 dataset.

    Args:
        data_root: Root directory where the dataset will be stored.
        
    Note:
        We are using the pybbbc library to download the dataset. This
        function is simply a wrapper around the `BBBC021.download`
        method.
    
    Note:
        If you  are using RAMSES, you can use the default path,
        the data should be there already.
        
    Dataset Structure:
        After download, the directory structure should look like::
        
            data_root/raw/
            ├── images/
            │   ├── Week1_22123/
            │   ├── Week1_22141/
            │   ├── Week1_22161/
            │   └── ... (all experimental weeks)
            └── some_metadata_file.csv
            
    Example:
        >>> # Download to default location
        >>> download_bbbc021()
        
        >>> # Download to custom location
        >>> download_bbbc021("/path/to/my/data/bbbc021")
        
    References:        
        Dataset: https://bbbc.broadinstitute.org/BBBC021
        pybbbc docs: https://github.com/giacomodeodato/pybbbc
    """
    
    BBBC021.download(root_path=data_root)  # Downloads the dataset files
    print(f"BBBC021 dataset downloaded and extracted to {data_root}.")

    

[docs]
def preprocess_bbbc021(data_root: str = "/scratch/cv-course2025/group8") -> None:
    """
    Preprocess the BBBC021 dataset. More information on the preprocessing
    can be found in the pybbbc documentation.
    
    Args:
        data_root: Root directory where the raw dataset is stored.
    """
    # Create the dataset structure
    BBBC021.make_dataset(root_path=data_root)
    print(f"BBBC021 dataset preprocessed and ready for use at {data_root}.")


if __name__ == "__main__":
    #download_bbbc021()
    #preprocess_bbbc021()
    #print("BBBC021 dataset is ready for use.")

    # to be able to specify a path
    if len(sys.argv) > 1:
        data_root = sys.argv[1]
    else:
        data_root = "/scratch/cv-course2025/group8"

    download_bbbc021(data_root)
    preprocess_bbbc021(data_root)
    print(f"BBBC021 dataset is ready for use at {data_root}/processed")

# python3 data/pybbbc_loader.py /custom/data/path