diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index b9461afdf..0c60e69de 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -10,6 +10,7 @@ from .base import ResourceAPI if TYPE_CHECKING: + from openml.estimation_procedures import OpenMLEstimationProcedure from openml.evaluations import OpenMLEvaluation from openml.flows.flow import OpenMLFlow from openml.setups.setup import OpenMLSetup @@ -41,6 +42,9 @@ class EstimationProcedureAPI(ResourceAPI): resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE + @abstractmethod + def list(self) -> list[OpenMLEstimationProcedure]: ... + class EvaluationAPI(ResourceAPI): """Abstract API interface for evaluation resources.""" diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py index a45f7af66..52663d2c6 100644 --- a/openml/_api/resources/estimation_procedure.py +++ b/openml/_api/resources/estimation_procedure.py @@ -1,11 +1,84 @@ from __future__ import annotations +import warnings + +import xmltodict + +from openml.estimation_procedures.estimation_procedure import OpenMLEstimationProcedure +from openml.tasks.task import TaskType + from .base import EstimationProcedureAPI, ResourceV1API, ResourceV2API class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI): - """Version 1 API implementation for estimation procedure resources.""" + """V1 API implementation for estimation procedures. + + Fetches estimation procedures from the v1 XML API endpoint. + """ + + def list(self) -> list[OpenMLEstimationProcedure]: + """Return a list of all estimation procedures which are on OpenML. + + Returns + ------- + procedures : list + A list of all estimation procedures. Every procedure is represented by + a dictionary containing the following information: id, task type id, + name, type, repeats, folds, stratified. + """ + path = "estimationprocedure/list" + response = self._http.get(path) + xml_content = response.text + + procs_dict = xmltodict.parse(xml_content) + + # Minimalistic check if the XML is useful + if "oml:estimationprocedures" not in procs_dict: + raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.") + + if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]: + raise ValueError( + "Error in return XML, does not contain tag " + "@xmlns:oml as a child of oml:estimationprocedures.", + ) + + if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml": + raise ValueError( + "Error in return XML, value of " + "oml:estimationprocedures/@xmlns:oml is not " + "http://openml.org/openml, but {}".format( + str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"]) + ), + ) + + procs: list[OpenMLEstimationProcedure] = [] + for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]: + task_type_int = int(proc_["oml:ttid"]) + try: + task_type_id = TaskType(task_type_int) + procs.append( + OpenMLEstimationProcedure( + id=int(proc_["oml:id"]), + task_type_id=task_type_id, + name=proc_["oml:name"], + type=proc_["oml:type"], + ) + ) + except ValueError as e: + warnings.warn( + f"Could not create task type id for {task_type_int} due to error {e}", + RuntimeWarning, + stacklevel=2, + ) + + return procs class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI): - """Version 2 API implementation for estimation procedure resources.""" + """V2 API implementation for estimation procedures. + + Fetches estimation procedures from the v2 JSON API endpoint. + """ + + def list(self) -> list[OpenMLEstimationProcedure]: + self._not_supported(method="list") diff --git a/openml/estimation_procedures/__init__.py b/openml/estimation_procedures/__init__.py new file mode 100644 index 000000000..5dbd793a4 --- /dev/null +++ b/openml/estimation_procedures/__init__.py @@ -0,0 +1,5 @@ +# License: BSD 3-Clause + +from .estimation_procedure import OpenMLEstimationProcedure + +__all__ = ["OpenMLEstimationProcedure"] diff --git a/openml/estimation_procedures/estimation_procedure.py b/openml/estimation_procedures/estimation_procedure.py new file mode 100644 index 000000000..f4ffc7b7a --- /dev/null +++ b/openml/estimation_procedures/estimation_procedure.py @@ -0,0 +1,50 @@ +# License: BSD 3-Clause +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml.tasks import TaskType + + +@dataclass +class OpenMLEstimationProcedure: + """ + Contains all meta-information about a run / evaluation combination, + according to the evaluation/list function + + Parameters + ---------- + id : int + ID of estimation procedure + task_type_id : TaskType + Assosiated task type + name : str + Name of estimation procedure + type : str + Type of estimation procedure + """ + + id: int + task_type_id: TaskType + name: str + type: str + + def _to_dict(self) -> dict: + return asdict(self) + + def __repr__(self) -> str: + header = "OpenML Estimation Procedure" + header = f"{header}\n{'=' * len(header)}\n" + + fields = { + "ID": self.id, + "Name": self.name, + "Type": self.type, + "Task Type": self.task_type_id, + } + longest_field_name_length = max(len(name) for name in fields) + field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}" + body = "\n".join(field_line_format.format(name, value) for name, value in fields.items()) + return header + body diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index ee07a16f8..f4e07c1b8 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -9,7 +9,6 @@ import numpy as np import pandas as pd -import xmltodict import openml import openml._api_calls @@ -167,24 +166,8 @@ def list_estimation_procedures() -> list[str]: ------- list """ - api_call = "estimationprocedure/list" - xml_string = openml._api_calls._perform_api_call(api_call, "get") - api_results = xmltodict.parse(xml_string) - - # Minimalistic check if the XML is useful - if "oml:estimationprocedures" not in api_results: - raise ValueError('Error in return XML, does not contain "oml:estimationprocedures"') - - if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]: - raise ValueError('Error in return XML, does not contain "oml:estimationprocedure"') - - if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list): - raise TypeError('Error in return XML, does not contain "oml:estimationprocedure" as a list') - - return [ - prod["oml:name"] - for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"] - ] + result = openml._backend.estimation_procedure.list() + return [i.name for i in result] def list_evaluations_setups( diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index 3fbc7adee..22fb26f9b 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -80,50 +80,8 @@ def _get_estimation_procedure_list() -> list[dict[str, Any]]: a dictionary containing the following information: id, task type id, name, type, repeats, folds, stratified. """ - url_suffix = "estimationprocedure/list" - xml_string = openml._api_calls._perform_api_call(url_suffix, "get") - - procs_dict = xmltodict.parse(xml_string) - # Minimalistic check if the XML is useful - if "oml:estimationprocedures" not in procs_dict: - raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.") - - if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]: - raise ValueError( - "Error in return XML, does not contain tag " - "@xmlns:oml as a child of oml:estimationprocedures.", - ) - - if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml": - raise ValueError( - "Error in return XML, value of " - "oml:estimationprocedures/@xmlns:oml is not " - "http://openml.org/openml, but {}".format( - str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"]) - ), - ) - - procs: list[dict[str, Any]] = [] - for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]: - task_type_int = int(proc_["oml:ttid"]) - try: - task_type_id = TaskType(task_type_int) - procs.append( - { - "id": int(proc_["oml:id"]), - "task_type_id": task_type_id, - "name": proc_["oml:name"], - "type": proc_["oml:type"], - }, - ) - except ValueError as e: - warnings.warn( - f"Could not create task type id for {task_type_int} due to error {e}", - RuntimeWarning, - stacklevel=2, - ) - - return procs + result = openml._backend.estimation_procedure.list() + return [i._to_dict() for i in result] def list_tasks( # noqa: PLR0913 diff --git a/tests/test_api/test_estimation_procedure.py b/tests/test_api/test_estimation_procedure.py new file mode 100644 index 000000000..fff7bb91b --- /dev/null +++ b/tests/test_api/test_estimation_procedure.py @@ -0,0 +1,32 @@ +# License: BSD 3-Clause +from __future__ import annotations + +import pytest +from openml._api import EstimationProcedureV1API, EstimationProcedureV2API +from openml.exceptions import OpenMLNotSupportedError +from openml.estimation_procedures import OpenMLEstimationProcedure + + +@pytest.fixture +def estimation_procedure_v1(http_client_v1, minio_client) -> EstimationProcedureV1API: + return EstimationProcedureV1API(http=http_client_v1, minio=minio_client) + + +@pytest.fixture +def estimation_procedure_v2(http_client_v2, minio_client) -> EstimationProcedureV2API: + return EstimationProcedureV2API(http=http_client_v2, minio=minio_client) + + +@pytest.mark.test_server() +def test_v1_list(estimation_procedure_v1): + details = estimation_procedure_v1.list() + + assert isinstance(details, list) + assert len(details) > 0 + assert all(isinstance(d, OpenMLEstimationProcedure) for d in details) + + +@pytest.mark.test_server() +def test_v2_list(estimation_procedure_v2): + with pytest.raises(OpenMLNotSupportedError): + estimation_procedure_v2.list()