From 50553eff9dc48a84024bed4f2e60b0f2bc29628d Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Thu, 24 Nov 2022 22:18:09 +0100 Subject: [PATCH 1/3] feat(minio): Allow for proxies --- openml/_api_calls.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 87511693c..8416d9e7b 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -10,6 +10,7 @@ import urllib.parse import xml import xmltodict +from urllib3 import ProxyManager from typing import Dict, Optional, Union import minio @@ -23,6 +24,26 @@ ) +def resolve_env_proxies(url: str) -> Optional[str]: + """Attempt to find a suitable proxy for this url. + + Relies on ``requests`` internals to remain consistent. To disable this from the + environment, please set the enviornment varialbe ``no_proxy="*"``. + + Parameters + ---------- + url : str + The url endpoint + + Returns + ------- + Optional[str] + The proxy url if found, else None + """ + resolved_proxies = requests.utils.get_environ_proxies(url) + selected_proxy = requests.utils.select_proxy(url, resolved_proxies) + return selected_proxy + def _create_url_from_endpoint(endpoint: str) -> str: url = config.server if not url.endswith("/"): @@ -84,6 +105,7 @@ def _download_minio_file( source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True, + proxy: Union[bool, str] = True ) -> None: """Download file ``source`` from a MinIO Bucket and store it at ``destination``. @@ -95,7 +117,10 @@ def _download_minio_file( Path to store the file to, if a directory is provided the original filename is used. exists_ok : bool, optional (default=True) If False, raise FileExists if a file already exists in ``destination``. - + proxy: Union[bool, str] + If True (default), environemnt variables will be parsed using ``requests`` to find a + suitable proxy. If False, no proxy will be used. Finally, pass a str to use a + custom proxy. To disable this from the environment varialbes, use ``no_proxy="*"``. """ destination = pathlib.Path(destination) parsed_url = urllib.parse.urlparse(source) @@ -107,7 +132,18 @@ def _download_minio_file( if destination.is_file() and not exists_ok: raise FileExistsError(f"File already exists in {destination}.") - client = minio.Minio(endpoint=parsed_url.netloc, secure=False) + if isinstance(proxy, str): + proxy_client = ProxyManager(proxy) + else: + proxy_url = resolve_env_proxies(parsed_url.geturl()) if proxy else None + if proxy_url: + proxy_client = ProxyManager(proxy_url) + + client = minio.Minio( + endpoint=parsed_url.netloc, + secure=False, + http_client=proxy_client + ) try: client.fget_object( From 389f14d776e3eb98c612929e706bba44cd4f411a Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Thu, 24 Nov 2022 22:39:38 +0100 Subject: [PATCH 2/3] fix: Declared proxy_client as None --- openml/_api_calls.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 8416d9e7b..1a60558fe 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -132,6 +132,7 @@ def _download_minio_file( if destination.is_file() and not exists_ok: raise FileExistsError(f"File already exists in {destination}.") + proxy_client = None if isinstance(proxy, str): proxy_client = ProxyManager(proxy) else: From 9cd3b44f3f4d0917c4dc374790b03c4ea62d6282 Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Fri, 25 Nov 2022 12:41:34 +0100 Subject: [PATCH 3/3] refactor(proxy): Change to `str | None` with "auto" --- openml/_api_calls.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 1a60558fe..7db1155cc 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -105,7 +105,7 @@ def _download_minio_file( source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True, - proxy: Union[bool, str] = True + proxy: Optional[str] = "auto", ) -> None: """Download file ``source`` from a MinIO Bucket and store it at ``destination``. @@ -117,10 +117,10 @@ def _download_minio_file( Path to store the file to, if a directory is provided the original filename is used. exists_ok : bool, optional (default=True) If False, raise FileExists if a file already exists in ``destination``. - proxy: Union[bool, str] - If True (default), environemnt variables will be parsed using ``requests`` to find a - suitable proxy. If False, no proxy will be used. Finally, pass a str to use a - custom proxy. To disable this from the environment varialbes, use ``no_proxy="*"``. + proxy: str, optional (default = "auto") + The proxy server to use. By default it's "auto" which uses ``requests`` to + automatically find the proxy to use. Pass None or the environment variable + ``no_proxy="*"`` to disable proxies. """ destination = pathlib.Path(destination) parsed_url = urllib.parse.urlparse(source) @@ -132,13 +132,10 @@ def _download_minio_file( if destination.is_file() and not exists_ok: raise FileExistsError(f"File already exists in {destination}.") - proxy_client = None - if isinstance(proxy, str): - proxy_client = ProxyManager(proxy) - else: - proxy_url = resolve_env_proxies(parsed_url.geturl()) if proxy else None - if proxy_url: - proxy_client = ProxyManager(proxy_url) + if proxy == "auto": + proxy = resolve_env_proxies(parsed_url.geturl()) + + proxy_client = ProxyManager(proxy) if proxy else None client = minio.Minio( endpoint=parsed_url.netloc,