diff --git a/.flake8 b/.flake8 index 08bb8ea10..211234f22 100644 --- a/.flake8 +++ b/.flake8 @@ -5,6 +5,7 @@ select = C,E,F,W,B,T ignore = E203, E402, W503 per-file-ignores = *__init__.py:F401 + *cli.py:T001 exclude = venv examples diff --git a/doc/progress.rst b/doc/progress.rst index f27dd1137..2fbf95b31 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -15,6 +15,7 @@ Changelog * DOC #1051: Document existing extensions to OpenML-Python besides the shipped scikit-learn extension. * FIX #1035: Render class attributes and methods again. +* ADD #1049: Add a command line tool for configuration openml-python. * FIX #1042: Fixes a rare concurrency issue with OpenML-Python and joblib which caused the joblib worker pool to fail. * FIX #1053: Fixes a bug which could prevent importing the package in a docker container. diff --git a/doc/usage.rst b/doc/usage.rst index 23ef4ec84..e106e6d60 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -59,6 +59,10 @@ which are separated by newlines. The following keys are defined: * 1: info output * 2: debug output +This file is easily configurable by the ``openml`` command line interface. +To see where the file is stored, and what its values are, use `openml configure none`. +Set any field with ``openml configure FIELD`` or even all fields with just ``openml configure``. + ~~~~~~~~~~~~ Key concepts ~~~~~~~~~~~~ diff --git a/examples/20_basic/introduction_tutorial.py b/examples/20_basic/introduction_tutorial.py index 151692fdc..737362e49 100644 --- a/examples/20_basic/introduction_tutorial.py +++ b/examples/20_basic/introduction_tutorial.py @@ -42,13 +42,17 @@ # * After logging in, open your account page (avatar on the top right) # * Open 'Account Settings', then 'API authentication' to find your API key. # -# There are two ways to authenticate: +# There are two ways to permanently authenticate: # +# * Use the ``openml`` CLI tool with ``openml configure apikey MYKEY``, +# replacing **MYKEY** with your API key. # * Create a plain text file **~/.openml/config** with the line # **'apikey=MYKEY'**, replacing **MYKEY** with your API key. The config # file must be in the directory ~/.openml/config and exist prior to # importing the openml module. -# * Run the code below, replacing 'YOURKEY' with your API key. +# +# Alternatively, by running the code below and replacing 'YOURKEY' with your API key, +# you authenticate for the duration of the python process. # # .. warning:: This example uploads data. For that reason, this example # connects to the test server instead. This prevents the live server from diff --git a/openml/cli.py b/openml/cli.py new file mode 100644 index 000000000..b26e67d2e --- /dev/null +++ b/openml/cli.py @@ -0,0 +1,331 @@ +"""" Command Line Interface for `openml` to configure its settings. """ + +import argparse +import os +import pathlib +import string +from typing import Union, Callable +from urllib.parse import urlparse + + +from openml import config + + +def is_hex(string_: str) -> bool: + return all(c in string.hexdigits for c in string_) + + +def looks_like_url(url: str) -> bool: + # There's no thorough url parser, but we only seem to use netloc. + try: + return bool(urlparse(url).netloc) + except Exception: + return False + + +def wait_until_valid_input( + prompt: str, check: Callable[[str], str], sanitize: Union[Callable[[str], str], None] +) -> str: + """ Asks `prompt` until an input is received which returns True for `check`. + + Parameters + ---------- + prompt: str + message to display + check: Callable[[str], str] + function to call with the given input, that provides an error message if the input is not + valid otherwise, and False-like otherwise. + sanitize: Callable[[str], str], optional + A function which attempts to sanitize the user input (e.g. auto-complete). + + Returns + ------- + valid input + + """ + + while True: + response = input(prompt) + if sanitize: + response = sanitize(response) + error_message = check(response) + if error_message: + print(error_message, end="\n\n") + else: + return response + + +def print_configuration(): + file = config.determine_config_file_path() + header = f"File '{file}' contains (or defaults to):" + print(header) + + max_key_length = max(map(len, config.get_config_as_dict())) + for field, value in config.get_config_as_dict().items(): + print(f"{field.ljust(max_key_length)}: {value}") + + +def verbose_set(field, value): + config.set_field_in_config_file(field, value) + print(f"{field} set to '{value}'.") + + +def configure_apikey(value: str) -> None: + def check_apikey(apikey: str) -> str: + if len(apikey) != 32: + return f"The key should contain 32 characters but contains {len(apikey)}." + if not is_hex(apikey): + return "Some characters are not hexadecimal." + return "" + + instructions = ( + f"Your current API key is set to: '{config.apikey}'. " + "You can get an API key at https://new.openml.org. " + "You must create an account if you don't have one yet:\n" + " 1. Log in with the account.\n" + " 2. Navigate to the profile page (top right circle > Your Profile). \n" + " 3. Click the API Key button to reach the page with your API key.\n" + "If you have any difficulty following these instructions, let us know on Github." + ) + + configure_field( + field="apikey", + value=value, + check_with_message=check_apikey, + intro_message=instructions, + input_message="Please enter your API key:", + ) + + +def configure_server(value: str) -> None: + def check_server(server: str) -> str: + is_shorthand = server in ["test", "production"] + if is_shorthand or looks_like_url(server): + return "" + return "Must be 'test', 'production' or a url." + + def replace_shorthand(server: str) -> str: + if server == "test": + return "https://test.openml.org/api/v1/xml" + if server == "production": + return "https://www.openml.org/api/v1/xml" + return server + + configure_field( + field="server", + value=value, + check_with_message=check_server, + intro_message="Specify which server you wish to connect to.", + input_message="Specify a url or use 'test' or 'production' as a shorthand: ", + sanitize=replace_shorthand, + ) + + +def configure_cachedir(value: str) -> None: + def check_cache_dir(path: str) -> str: + p = pathlib.Path(path) + if p.is_file(): + return f"'{path}' is a file, not a directory." + expanded = p.expanduser() + if not expanded.is_absolute(): + return f"'{path}' is not absolute (even after expanding '~')." + if not expanded.exists(): + try: + os.mkdir(expanded) + except PermissionError: + return f"'{path}' does not exist and there are not enough permissions to create it." + return "" + + configure_field( + field="cachedir", + value=value, + check_with_message=check_cache_dir, + intro_message="Configuring the cache directory. It can not be a relative path.", + input_message="Specify the directory to use (or create) as cache directory: ", + ) + print("NOTE: Data from your old cache directory is not moved over.") + + +def configure_connection_n_retries(value: str) -> None: + def valid_connection_retries(n: str) -> str: + if not n.isdigit(): + return f"Must be an integer number (smaller than {config.max_retries})." + if int(n) > config.max_retries: + return f"connection_n_retries may not exceed {config.max_retries}." + if int(n) == 0: + return "connection_n_retries must be non-zero." + return "" + + configure_field( + field="connection_n_retries", + value=value, + check_with_message=valid_connection_retries, + intro_message="Configuring the number of times to attempt to connect to the OpenML Server", + input_message=f"Enter an integer between 0 and {config.max_retries}: ", + ) + + +def configure_avoid_duplicate_runs(value: str) -> None: + def is_python_bool(bool_: str) -> str: + if bool_ in ["True", "False"]: + return "" + return "Must be 'True' or 'False' (mind the capital)." + + def autocomplete_bool(bool_: str) -> str: + if bool_.lower() in ["n", "no", "f", "false", "0"]: + return "False" + if bool_.lower() in ["y", "yes", "t", "true", "1"]: + return "True" + return bool_ + + intro_message = ( + "If set to True, when `run_flow_on_task` or similar methods are called a lookup is " + "performed to see if there already exists such a run on the server. " + "If so, download those results instead. " + "If set to False, runs will always be executed." + ) + + configure_field( + field="avoid_duplicate_runs", + value=value, + check_with_message=is_python_bool, + intro_message=intro_message, + input_message="Enter 'True' or 'False': ", + sanitize=autocomplete_bool, + ) + + +def configure_verbosity(value: str) -> None: + def is_zero_through_two(verbosity: str) -> str: + if verbosity in ["0", "1", "2"]: + return "" + return "Must be '0', '1' or '2'." + + intro_message = ( + "Set the verbosity of log messages which should be shown by openml-python." + " 0: normal output (warnings and errors)" + " 1: info output (some high-level progress output)" + " 2: debug output (detailed information (for developers))" + ) + + configure_field( + field="verbosity", + value=value, + check_with_message=is_zero_through_two, + intro_message=intro_message, + input_message="Enter '0', '1' or '2': ", + ) + + +def configure_field( + field: str, + value: Union[None, str], + check_with_message: Callable[[str], str], + intro_message: str, + input_message: str, + sanitize: Union[Callable[[str], str], None] = None, +) -> None: + """ Configure `field` with `value`. If `value` is None ask the user for input. + + `value` and user input are first corrected/auto-completed with `convert_value` if provided, + then validated with `check_with_message` function. + If the user input a wrong value in interactive mode, the user gets to input a new value. + The new valid value is saved in the openml configuration file. + In case an invalid `value` is supplied directly (non-interactive), no changes are made. + + Parameters + ---------- + field: str + Field to set. + value: str, None + Value to field to. If `None` will ask user for input. + check_with_message: Callable[[str], str] + Function which validates `value` or user input, and returns either an error message if it + is invalid, or a False-like value if `value` is valid. + intro_message: str + Message that is printed once if user input is requested (e.g. instructions). + input_message: str + Message that comes with the input prompt. + sanitize: Union[Callable[[str], str], None] + A function to convert user input to 'more acceptable' input, e.g. for auto-complete. + If no correction of user input is possible, return the original value. + If no function is provided, don't attempt to correct/auto-complete input. + """ + if value is not None: + if sanitize: + value = sanitize(value) + malformed_input = check_with_message(value) + if malformed_input: + print(malformed_input) + quit() + else: + print(intro_message) + value = wait_until_valid_input( + prompt=input_message, check=check_with_message, sanitize=sanitize, + ) + verbose_set(field, value) + + +def configure(args: argparse.Namespace): + """ Calls the right submenu(s) to edit `args.field` in the configuration file. """ + set_functions = { + "apikey": configure_apikey, + "server": configure_server, + "cachedir": configure_cachedir, + "connection_n_retries": configure_connection_n_retries, + "avoid_duplicate_runs": configure_avoid_duplicate_runs, + "verbosity": configure_verbosity, + } + + def not_supported_yet(_): + print(f"Setting '{args.field}' is not supported yet.") + + if args.field not in ["all", "none"]: + set_functions.get(args.field, not_supported_yet)(args.value) + else: + if args.value is not None: + print(f"Can not set value ('{args.value}') when field is specified as '{args.field}'.") + quit() + print_configuration() + + if args.field == "all": + for set_field_function in set_functions.values(): + print() # Visually separating the output by field. + set_field_function(args.value) + + +def main() -> None: + subroutines = {"configure": configure} + + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest="subroutine") + + parser_configure = subparsers.add_parser( + "configure", + description="Set or read variables in your configuration file. For more help also see " + "'https://openml.github.io/openml-python/master/usage.html#configuration'.", + ) + + configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] + + parser_configure.add_argument( + "field", + type=str, + choices=[*configurable_fields, "all", "none"], + default="all", + nargs="?", + help="The field you wish to edit. " + "Choosing 'all' lets you configure all fields one by one. " + "Choosing 'none' will print out the current configuration.", + ) + + parser_configure.add_argument( + "value", type=str, default=None, nargs="?", help="The value to set the FIELD to.", + ) + + args = parser.parse_args() + subroutines.get(args.subroutine, lambda _: parser.print_help())(args) + + +if __name__ == "__main__": + main() diff --git a/openml/config.py b/openml/config.py index 4516e96e1..7295ea82e 100644 --- a/openml/config.py +++ b/openml/config.py @@ -9,7 +9,7 @@ import os from pathlib import Path import platform -from typing import Tuple, cast +from typing import Tuple, cast, Any from io import StringIO import configparser @@ -177,6 +177,16 @@ def stop_using_configuration_for_example(cls): cls._start_last_called = False +def determine_config_file_path() -> Path: + if platform.system() == "Linux": + config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml")) + else: + config_dir = Path("~") / ".openml" + # Still use os.path.expanduser to trigger the mock in the unit test + config_dir = Path(os.path.expanduser(config_dir)) + return config_dir / "config" + + def _setup(config=None): """Setup openml package. Called on first import. @@ -193,13 +203,8 @@ def _setup(config=None): global connection_n_retries global max_retries - if platform.system() == "Linux": - config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml")) - else: - config_dir = Path("~") / ".openml" - # Still use os.path.expanduser to trigger the mock in the unit test - config_dir = Path(os.path.expanduser(config_dir)) - config_file = config_dir / "config" + config_file = determine_config_file_path() + config_dir = config_file.parent # read config file, create directory for config file if not os.path.exists(config_dir): @@ -258,6 +263,27 @@ def _get(config, key): ) +def set_field_in_config_file(field: str, value: Any): + """ Overwrites the `field` in the configuration file with the new `value`. """ + if field not in _defaults: + return ValueError(f"Field '{field}' is not valid and must be one of '{_defaults.keys()}'.") + + globals()[field] = value + config_file = determine_config_file_path() + config = _parse_config(str(config_file)) + with open(config_file, "w") as fh: + for f in _defaults.keys(): + # We can't blindly set all values based on globals() because when the user + # sets it through config.FIELD it should not be stored to file. + # There doesn't seem to be a way to avoid writing defaults to file with configparser, + # because it is impossible to distinguish from an explicitly set value that matches + # the default value, to one that was set to its default because it was omitted. + value = config.get("FAKE_SECTION", f) + if f == field: + value = globals()[f] + fh.write(f"{f} = {value}\n") + + def _parse_config(config_file: str): """ Parse the config file, set up defaults. """ config = configparser.RawConfigParser(defaults=_defaults) diff --git a/setup.py b/setup.py index 2d2a638b5..bad7da2b4 100644 --- a/setup.py +++ b/setup.py @@ -102,4 +102,5 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", ], + entry_points={"console_scripts": ["openml=openml.cli:main"]}, )