Module `qute.project`

Manage training projects.

Classes

class Project (config: qute.config._config.Config, clean: bool = False)

Sets up Project directory structure.

Parameters

config : Config: Configuration for current project.
clean : bool = False: Set to True to clean the project directory from failed or incomplete runs.

Expand source code

class Project:
    """Sets up Project directory structure."""

    def __init__(self, config: Config, clean: bool = False):
        """

        Parameters
        ----------

        config: qute.config.Config
            Configuration for current project.

        clean: bool = False
            Set to True to clean the project directory from failed or incomplete runs.
        """

        # Store reference to the configuration
        self._config = config

        # Internal properties
        self._project_dir: Path = self._config.project_dir
        self._runs_dir: Path = self._project_dir / "runs"
        self._data_dir: Path = self._config.data_dir
        self._selected_model_path: Union[None, Path, str] = None
        self._target_for_prediction_path = self._config.target_for_prediction
        self._source_for_prediction_path = self._config.source_for_prediction

        # Set the model path
        self._set_selected_model(self._config.source_model_path)

        # Set up project
        self._project_dir.mkdir(exist_ok=True, parents=True)
        self._runs_dir.mkdir(exist_ok=True, parents=True)
        self._data_dir.mkdir(exist_ok=True, parents=True)

        # Create new run
        self._run_dir = None
        self._models_dir = None
        self._results_dir = None
        self.new_run()

        # Clean if needed
        if clean is True:
            self.clean()

    @property
    def data_dir(self) -> Path:
        return self._data_dir

    @data_dir.setter
    def data_dir(self, data_dir: Union[Path, str]):
        raise RuntimeError("Cannot override data_dir!")

    @property
    def selected_model_path(self) -> Path:
        return self._selected_model_path

    @selected_model_path.setter
    def selected_model_path(self, selected_model_path: Union[Path, str]):
        """Override the model from the configuration file."""
        self._set_selected_model(selected_model_path)

    @property
    def source_for_prediction(self) -> Path:
        return self._source_for_prediction_path

    @property
    def target_for_prediction(self) -> Path:
        return self._target_for_prediction_path

    @property
    def models_dir(self) -> Path:
        return self._models_dir

    @models_dir.setter
    def models_dir(self, models_dir: Union[Path, str]):
        raise RuntimeError("Cannot override models_dir!")

    @property
    def results_dir(self) -> Path:
        return self._results_dir

    @results_dir.setter
    def results_dir(self, results_dir: Union[Path, str]):
        raise RuntimeError("Cannot override results_dir!")

    @property
    def run_dir(self) -> Path:
        return self._run_dir

    @run_dir.setter
    def run_dir(self, run_dir: Union[Path, str]):
        raise RuntimeError("Cannot override run_dir!")

    def new_run(self):
        """Create a new run with model and results subfolders."""
        name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        self._run_dir = self._runs_dir / name
        self._models_dir = self._run_dir / "models"
        self._models_dir.mkdir(parents=True)
        self._results_dir = self._run_dir / "results"
        self._results_dir.mkdir(parents=True)
        logs_dir = self._results_dir / "lightning_logs"
        logs_dir.mkdir(parents=True)
        if self._target_for_prediction_path is None:
            # Create standard predictions location
            self._target_for_prediction_path = self._project_dir / "predictions" / name
            self._target_for_prediction_path.mkdir(exist_ok=True, parents=True)
        else:
            # Make sure the passed one exists (create if necessary)
            self._target_for_prediction_path.mkdir(exist_ok=True, parents=True)

    def copy_configuration_file(self):
        """Copy configuration file to run directory."""
        shutil.copy(self._config.config_file, self._run_dir)

    def store_best_score(self, monitor: str, score: float, fold: int = -1):
        """Store the best score to the run directory."""
        if fold >= 0:
            out_file = self._run_dir / f"fold_{fold}_best_score.txt"
        else:
            out_file = self._run_dir / "best_score.txt"
        with open(out_file, "w") as file:
            file.write(f"{monitor}: {float(score)}")

    def _set_selected_model(self, model_path: Union[None, Path, str] = None):
        # Make sure the passed project exists
        if model_path is None or model_path == "":
            self._selected_model_path = None
            return

        model_path = Path(model_path)
        if model_path.is_dir():
            # The model path must contain at least one fold sub-folder
            if len(list(model_path.glob("fold_0/*.ckpt"))) == 0:
                raise IOError(
                    f"The selected model folder {model_path} does not contain trained models."
                )
        elif not model_path.is_file():
            raise IOError(f"The selected model {model_path} does not exist.")
        self._selected_model_path = model_path

    def models(self) -> List[Path]:
        """Return a list of all models available in the project."""
        if self._run_dir is None:
            return []
        return list(self._run_dir.rglob(f"*.ckpt"))

    def _is_valid_run_name(self, run) -> bool:
        """Check whether the run has a valid name."""
        # Check run directory name format
        name = run.name
        len_correct = len(name) == 15
        try:
            _ = int(name[:8])
            date_correct = True
        except ValueError:
            date_correct = False
        try:
            _ = int(name[-6:])
            time_correct = True
        except ValueError:
            time_correct = False
        if not len_correct or not date_correct or not time_correct:
            return False

        return True

    def clean(self):
        """Clean incomplete runs and predictions."""

        # Check runs
        for run in self._runs_dir.iterdir():
            to_clean = False
            if not run.is_dir():
                continue

            # Check run directory name format
            if not self._is_valid_run_name(run):
                continue

            # Make sure not to delete current run
            if self._run_dir == run:
                # This is current run and won't have any models or results yet
                continue

            models_dir = Path(run) / "models"
            if not models_dir.is_dir():
                to_clean = True
            else:
                models_found = list(models_dir.rglob(f"*.ckpt"))
                if len(models_found) == 0:
                    to_clean = True

            results_dir = Path(run) / "results"
            if not results_dir.is_dir():
                to_clean = True
            else:
                logs_found = list(results_dir.rglob(f"*version*"))
                if len(logs_found) == 0:
                    to_clean = True

            if to_clean:
                # Remove folder recursively
                try:
                    shutil.rmtree(self._runs_dir / run.name)
                except Exception as e:
                    print(e)

        # Check predictions: we only clean if the target for predictions is contained
        # in current run folder
        predictions_in_current_run_folder = self._project_dir / "predictions"
        if self._target_for_prediction_path.parent != predictions_in_current_run_folder:
            # This is an external folder, we do not clean it
            return

        for pred in predictions_in_current_run_folder.iterdir():
            to_clean = False
            if not pred.is_dir():
                continue

            # Check run directory name format
            if not self._is_valid_run_name(pred):
                continue

            # Make sure not to delete current prediction folder
            if self._target_for_prediction_path == pred:
                # This is current run and won't have any predictions yet
                continue

            # Are there predictions (or anything else)? We only clean empy folders.
            predictions_found = list(pred.rglob(f"*"))
            if len(predictions_found) == 0:
                to_clean = True

            if to_clean:
                # Remove folder recursively
                try:
                    shutil.rmtree(pred)
                except Exception as e:
                    print(e)

Instance variables

prop data_dir : pathlib.Path

Expand source code

@property
def data_dir(self) -> Path:
    return self._data_dir

prop models_dir : pathlib.Path

Expand source code

@property
def models_dir(self) -> Path:
    return self._models_dir

prop results_dir : pathlib.Path

Expand source code

@property
def results_dir(self) -> Path:
    return self._results_dir

prop run_dir : pathlib.Path

Expand source code

@property
def run_dir(self) -> Path:
    return self._run_dir

prop selected_model_path : pathlib.Path

Expand source code

@property
def selected_model_path(self) -> Path:
    return self._selected_model_path

prop source_for_prediction : pathlib.Path

Expand source code

@property
def source_for_prediction(self) -> Path:
    return self._source_for_prediction_path

prop target_for_prediction : pathlib.Path

Expand source code

@property
def target_for_prediction(self) -> Path:
    return self._target_for_prediction_path

Methods

def clean(self): Clean incomplete runs and predictions.
def copy_configuration_file(self): Copy configuration file to run directory.
def models(self) ‑> List[pathlib.Path]: Return a list of all models available in the project.
def new_run(self): Create a new run with model and results subfolders.
def store_best_score(self, monitor: str, score: float, fold: int = -1): Store the best score to the run directory.