Source code for data.pybbbc_loader
import sys
from pybbbc import BBBC021
[docs]
def download_bbbc021(data_root: str = "/scratch/cv-course2025/group8") -> None:
"""Downloads the complete BBBC021 dataset.
Args:
data_root: Root directory where the dataset will be stored.
Note:
We are using the pybbbc library to download the dataset. This
function is simply a wrapper around the `BBBC021.download`
method.
Note:
If you are using RAMSES, you can use the default path,
the data should be there already.
Dataset Structure:
After download, the directory structure should look like::
data_root/raw/
├── images/
│ ├── Week1_22123/
│ ├── Week1_22141/
│ ├── Week1_22161/
│ └── ... (all experimental weeks)
└── some_metadata_file.csv
Example:
>>> # Download to default location
>>> download_bbbc021()
>>> # Download to custom location
>>> download_bbbc021("/path/to/my/data/bbbc021")
References:
Dataset: https://bbbc.broadinstitute.org/BBBC021
pybbbc docs: https://github.com/giacomodeodato/pybbbc
"""
BBBC021.download(root_path=data_root) # Downloads the dataset files
print(f"BBBC021 dataset downloaded and extracted to {data_root}.")
[docs]
def preprocess_bbbc021(data_root: str = "/scratch/cv-course2025/group8") -> None:
"""
Preprocess the BBBC021 dataset. More information on the preprocessing
can be found in the pybbbc documentation.
Args:
data_root: Root directory where the raw dataset is stored.
"""
# Create the dataset structure
BBBC021.make_dataset(root_path=data_root)
print(f"BBBC021 dataset preprocessed and ready for use at {data_root}.")
if __name__ == "__main__":
#download_bbbc021()
#preprocess_bbbc021()
#print("BBBC021 dataset is ready for use.")
# to be able to specify a path
if len(sys.argv) > 1:
data_root = sys.argv[1]
else:
data_root = "/scratch/cv-course2025/group8"
download_bbbc021(data_root)
preprocess_bbbc021(data_root)
print(f"BBBC021 dataset is ready for use at {data_root}/processed")
# python3 data/pybbbc_loader.py /custom/data/path