From b9405d1b319621f2f3df94124a8cbbf7dda66c42 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 21 Dec 2022 15:00:46 -0800 Subject: [PATCH 1/2] WEB: Add backoff + retry for Github API calls --- web/pandas_web.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/web/pandas_web.py b/web/pandas_web.py index d054e273cde5e..82ab6b06f10fe 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -42,6 +42,12 @@ import requests import yaml +retries = requests.adapters.Retry(total=5, backoff_factor=0.25, status_forcelist=[403]) +session = requests.Session() +session.mount( + "https://api.github.com/", requests.adapters.HTTPAdapter(max_retries=retries) +) + class Preprocessors: """ @@ -166,7 +172,7 @@ def maintainers_add_info(context): for kind in ("active", "inactive"): context["maintainers"][f"{kind}_with_github_info"] = [] for user in context["maintainers"][kind]: - resp = requests.get(f"https://api.github.com/users/{user}") + resp = session.get(f"https://api.github.com/users/{user}") if context["ignore_io_errors"] and resp.status_code == 403: return context resp.raise_for_status() @@ -178,7 +184,7 @@ def home_add_releases(context): context["releases"] = [] github_repo_url = context["main"]["github_repo_url"] - resp = requests.get(f"https://api.github.com/repos/{github_repo_url}/releases") + resp = session.get(f"https://api.github.com/repos/{github_repo_url}/releases") if context["ignore_io_errors"] and resp.status_code == 403: return context resp.raise_for_status() @@ -243,7 +249,7 @@ def roadmap_pdeps(context): # under discussion github_repo_url = context["main"]["github_repo_url"] - resp = requests.get( + resp = session.get( "https://api.github.com/search/issues?" f"q=is:pr is:open label:PDEP repo:{github_repo_url}" ) From 33abfcff8b8743a9b8d8c286c0be5a4ee6aa88a6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 27 Dec 2022 14:19:19 -0800 Subject: [PATCH 2/2] Try with token --- .github/workflows/docbuild-and-upload.yml | 1 + web/pandas_web.py | 24 ++++++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index ee79c10c12d4e..908259597cafb 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -15,6 +15,7 @@ on: env: ENV_FILE: environment.yml PANDAS_CI: 1 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} permissions: contents: read diff --git a/web/pandas_web.py b/web/pandas_web.py index 82ab6b06f10fe..e9e8e70066b3f 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -42,11 +42,11 @@ import requests import yaml -retries = requests.adapters.Retry(total=5, backoff_factor=0.25, status_forcelist=[403]) -session = requests.Session() -session.mount( - "https://api.github.com/", requests.adapters.HTTPAdapter(max_retries=retries) -) +api_token = os.environ.get("GITHUB_TOKEN") +if api_token is not None: + GITHUB_API_HEADERS = {"Authorization": f"Bearer {api_token}"} +else: + GITHUB_API_HEADERS = {} class Preprocessors: @@ -172,7 +172,9 @@ def maintainers_add_info(context): for kind in ("active", "inactive"): context["maintainers"][f"{kind}_with_github_info"] = [] for user in context["maintainers"][kind]: - resp = session.get(f"https://api.github.com/users/{user}") + resp = requests.get( + f"https://api.github.com/users/{user}", headers=GITHUB_API_HEADERS + ) if context["ignore_io_errors"] and resp.status_code == 403: return context resp.raise_for_status() @@ -184,7 +186,10 @@ def home_add_releases(context): context["releases"] = [] github_repo_url = context["main"]["github_repo_url"] - resp = session.get(f"https://api.github.com/repos/{github_repo_url}/releases") + resp = requests.get( + f"https://api.github.com/repos/{github_repo_url}/releases", + headers=GITHUB_API_HEADERS, + ) if context["ignore_io_errors"] and resp.status_code == 403: return context resp.raise_for_status() @@ -249,9 +254,10 @@ def roadmap_pdeps(context): # under discussion github_repo_url = context["main"]["github_repo_url"] - resp = session.get( + resp = requests.get( "https://api.github.com/search/issues?" - f"q=is:pr is:open label:PDEP repo:{github_repo_url}" + f"q=is:pr is:open label:PDEP repo:{github_repo_url}", + headers=GITHUB_API_HEADERS, ) if context["ignore_io_errors"] and resp.status_code == 403: return context