Source code for squadds.database.hf_dataset_ops

"""Helpers for Hugging Face dataset naming and uploads."""

from __future__ import annotations

import hashlib
import os
from datetime import datetime


[docs] def build_dataset_name( components: list[str], data_type: str, data_nature: str, data_source: str, institute: str | None, pi_name: str | None, date: str | None = None, ) -> str: """Build the legacy dataset repository name.""" components_joined = "-".join(components) date = date or datetime.now().strftime("%Y%m%d") base_string = f"{components_joined}_{data_type}_{data_nature}_{data_source}_{institute}_{pi_name}_{date}" uid_hash = hashlib.sha256(base_string.encode()).hexdigest()[:8] return f"{base_string}_{uid_hash}"
[docs] def ensure_dataset_repository(api, token: str | None, dataset_name: str) -> None: """Create the dataset repository and preserve the existing print-based UX.""" try: api.create_repo(repo_id=dataset_name, token=token, repo_type="dataset") print(f"Dataset repository {dataset_name} created.") except Exception as error: print(f"Error creating dataset repository: {error}")
[docs] def upload_dataset_files(api, token: str | None, dataset_name: str, files: list[str]) -> None: """Upload files to a Hugging Face dataset repository with legacy print output.""" for file_path in files: try: api.upload_file( path_or_fileobj=file_path, path_in_repo=os.path.basename(file_path), repo_id=dataset_name, repo_type="dataset", token=token, ) print(f"Uploaded {file_path} to {dataset_name}.") except Exception as error: print(f"Error uploading file {file_path}: {error}")