From 4d9339b6964755859aa0dfca8805ce8aa2e33478 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 14:23:48 +0200 Subject: [PATCH 01/11] [skip changelog] Add stats fetching from Arduino CDN using AWS Athena --- .github/tools/fetch_athena_stats.sh | 93 ++++++++++++++++++++++++++++ .github/workflows/arduino-stats.yaml | 45 ++++++++++++++ 2 files changed, 138 insertions(+) create mode 100755 .github/tools/fetch_athena_stats.sh create mode 100644 .github/workflows/arduino-stats.yaml diff --git a/.github/tools/fetch_athena_stats.sh b/.github/tools/fetch_athena_stats.sh new file mode 100755 index 00000000000..dec5fd27ac0 --- /dev/null +++ b/.github/tools/fetch_athena_stats.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash + +# This script performs the following: +# 1. Run the query, use jq to capture the QueryExecutionId, and then capture that into bash variable +# 2. Wait for the query to finish running (240 seconds). +# 3. Get the results. +# 4. Json data points struct build + +# Expected env variables are: +# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for accessing AWS resources +# AWS_ATHENA_SOURCE_TABLE +# AWS_ATHENA_OUTPUT_LOCATION +# GITHUB_REPOSITORY + +set -euo pipefail + +! read -r -d '' query << EOM +select +replace(url_extract_path("d.url"), '/arduino-cli/arduino-cli_', '') as flavor, +count("id") as gauge +from ${AWS_ATHENA_SOURCE_TABLE} +where "d.url" like 'https://downloads.arduino.cc/arduino-cli/arduino-cli_%' +and "d.url" not like '%latest%' -- exclude latest redirect +and "d.url" not like '%alpha%' -- exclude early alpha releases +and "d.url" not like '%.tar.bz2%' -- exclude very old releases archive formats +group by 1 +EOM + +queryExecutionId=$( +aws athena start-query-execution \ +--query-string "${query}" \ +--query-execution-context "Database=demo_books" \ +--result-configuration "OutputLocation=${AWS_ATHENA_OUTPUT_LOCATION}" \ +--region us-east-1 | jq -r ".QueryExecutionId" +) + +echo "QueryExecutionId is ${queryExecutionId}" +for i in $(seq 1 120); do + queryState=$( aws athena get-query-execution \ + --query-execution-id "${queryExecutionId}" \ + --region us-east-1 | jq -r ".QueryExecution.Status.State" + ); + + if [[ "${queryState}" == "SUCCEEDED" ]]; then + break; + fi; + + echo "QueryExecutionId ${queryExecutionId} - state is ${queryState}" + + if [[ "${queryState}" == "FAILED" ]]; then + exit 1; + fi; + + sleep 2 +done + +echo "Query succeeded. Processing data" +queryResult=$( aws athena get-query-results \ +--query-execution-id "${queryExecutionId}" \ +--region us-east-1 | jq --compact-output +); + +! read -r -d '' jsonTemplate << EOM +{ +"type": "gauge", +"name": "arduino.downloads.total", +"value": "%s", +"host": "${GITHUB_REPOSITORY}", +"tags": [ +"version:%s", +"os:%s", +"arch:%s", +"cdn:downloads.arduino.cc", +"project:arduino-cli" +] +}, +EOM + +datapoints="[" +for row in $(echo "${queryResult}" |jq 'del(.ResultSet.Rows[0])' | jq -r '.ResultSet.Rows[] | .Data' --compact-output); do + value=$(jq -r ".[1].VarCharValue" <<< "${row}") + tag=$(jq -r ".[0].VarCharValue" <<< "${row}") + # Some splitting to obtain 0.6.0, Windows, 32bit elements from string 0.6.0_Windows_32bit.zip + split=($(echo "$tag" | tr '_' '\n')) + if [[ ${#split[@]} -ne 3 ]]; then + continue + fi + archSplit=($(echo "${split[2]}" | tr '.' '\n')) + datapoints+=$(printf "${jsonTemplate}" "${value}" "${split[0]}" "${split[1]}" "${archSplit[0]}") +done +datapoints="${datapoints::-1}]" + +echo "::set-output name=result::$(jq --compact-output <<< "${datapoints}")" diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml new file mode 100644 index 00000000000..0977adb9b54 --- /dev/null +++ b/.github/workflows/arduino-stats.yaml @@ -0,0 +1,45 @@ +name: download-stats + +on: + push: + branches: + - rsora/downloads-stats-action + schedule: + # run every day at 12:00:00 + - cron: '* 12 * * *' + +jobs: + push-stats: + runs-on: ubuntu-latest + + steps: + - name: Fetch downloads count form Arduino CDN using AWS Athena + id: fetch + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_ATHENA_SOURCE_TABLE: ${{ secrets.AWS_ATHENA_SOURCE_TABLE }} + AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.AWS_ATHENA_OUTPUT_LOCATION }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: ./tools/fetch_athena_stats.sh + + - name: Send metrics + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.DD_API_KEY }} + # Metrics input expects YAML but JSON will work just right. + metrics: ${{steps.fetch.outputs.result}} + + - name: Report failure + if: failure() + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.DD_API_KEY }} + events: | + - title: "Arduino CLI stats failing" + text: "Stats collection failed" + alert_type: "error" + host: ${{ github.repository }} + tags: + - "project:arduino-cli" + - "cdn:downloads.arduino.cc" From 47557793e41e1d2b1f5c92cf0c929154777d7045 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 14:36:13 +0200 Subject: [PATCH 02/11] Fix path typo for athena stats --- .github/workflows/arduino-stats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 0977adb9b54..95adf40d9d3 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -21,7 +21,7 @@ jobs: AWS_ATHENA_SOURCE_TABLE: ${{ secrets.AWS_ATHENA_SOURCE_TABLE }} AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.AWS_ATHENA_OUTPUT_LOCATION }} GITHUB_REPOSITORY: ${{ github.repository }} - run: ./tools/fetch_athena_stats.sh + run: .github/tools/fetch_athena_stats.sh - name: Send metrics uses: masci/datadog@v1 From 49ec7bfffe75e25dc1866bd9e476fa58f9dc1a98 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 14:38:02 +0200 Subject: [PATCH 03/11] Fix path typo for athena stats again --- .github/workflows/arduino-stats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 95adf40d9d3..b8dfb2345d2 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -21,7 +21,7 @@ jobs: AWS_ATHENA_SOURCE_TABLE: ${{ secrets.AWS_ATHENA_SOURCE_TABLE }} AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.AWS_ATHENA_OUTPUT_LOCATION }} GITHUB_REPOSITORY: ${{ github.repository }} - run: .github/tools/fetch_athena_stats.sh + run: ./.github/tools/fetch_athena_stats.sh - name: Send metrics uses: masci/datadog@v1 From f67930167be09d22cd95992bf3fc015da953bf13 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 14:42:07 +0200 Subject: [PATCH 04/11] Add sh --- .github/workflows/arduino-stats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index b8dfb2345d2..9d580d44df6 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -21,7 +21,7 @@ jobs: AWS_ATHENA_SOURCE_TABLE: ${{ secrets.AWS_ATHENA_SOURCE_TABLE }} AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.AWS_ATHENA_OUTPUT_LOCATION }} GITHUB_REPOSITORY: ${{ github.repository }} - run: ./.github/tools/fetch_athena_stats.sh + run: sh .github/tools/fetch_athena_stats.sh - name: Send metrics uses: masci/datadog@v1 From 8a0f37e7cab98520480a104a7e81cb61843337d2 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 14:45:11 +0200 Subject: [PATCH 05/11] Add checkout step --- .github/workflows/arduino-stats.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 9d580d44df6..53c301348b5 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -13,6 +13,9 @@ jobs: runs-on: ubuntu-latest steps: + - name: Checkout + uses: actions/checkout@v1 + - name: Fetch downloads count form Arduino CDN using AWS Athena id: fetch env: @@ -21,7 +24,7 @@ jobs: AWS_ATHENA_SOURCE_TABLE: ${{ secrets.AWS_ATHENA_SOURCE_TABLE }} AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.AWS_ATHENA_OUTPUT_LOCATION }} GITHUB_REPOSITORY: ${{ github.repository }} - run: sh .github/tools/fetch_athena_stats.sh + run: .github/tools/fetch_athena_stats.sh - name: Send metrics uses: masci/datadog@v1 From dc5ee84439533e97a4ef79690bf49f35bb61b750 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 15:35:48 +0200 Subject: [PATCH 06/11] Add STATS_ prefix to stats secret and env vars --- .github/workflows/arduino-stats.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 53c301348b5..6484ceb1852 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -19,10 +19,10 @@ jobs: - name: Fetch downloads count form Arduino CDN using AWS Athena id: fetch env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_ATHENA_SOURCE_TABLE: ${{ secrets.AWS_ATHENA_SOURCE_TABLE }} - AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.AWS_ATHENA_OUTPUT_LOCATION }} + AWS_ACCESS_KEY_ID: ${{ secrets.STATS_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.STATS_AWS_SECRET_ACCESS_KEY }} + AWS_ATHENA_SOURCE_TABLE: ${{ secrets.STATS_AWS_ATHENA_SOURCE_TABLE }} + AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.STATS_AWS_ATHENA_OUTPUT_LOCATION }} GITHUB_REPOSITORY: ${{ github.repository }} run: .github/tools/fetch_athena_stats.sh From 697d19740c7f4953c32b07ffc59b5aae1f9a9f2b Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 15:54:27 +0200 Subject: [PATCH 07/11] Use latest version of jq --- .github/tools/fetch_athena_stats.sh | 2 +- .github/workflows/arduino-stats.yaml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/tools/fetch_athena_stats.sh b/.github/tools/fetch_athena_stats.sh index dec5fd27ac0..1d22d662a24 100755 --- a/.github/tools/fetch_athena_stats.sh +++ b/.github/tools/fetch_athena_stats.sh @@ -77,7 +77,7 @@ queryResult=$( aws athena get-query-results \ EOM datapoints="[" -for row in $(echo "${queryResult}" |jq 'del(.ResultSet.Rows[0])' | jq -r '.ResultSet.Rows[] | .Data' --compact-output); do +for row in $(echo "${queryResult}" | jq 'del(.ResultSet.Rows[0])' | jq -r '.ResultSet.Rows[] | .Data' --compact-output); do value=$(jq -r ".[1].VarCharValue" <<< "${row}") tag=$(jq -r ".[0].VarCharValue" <<< "${row}") # Some splitting to obtain 0.6.0, Windows, 32bit elements from string 0.6.0_Windows_32bit.zip diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 6484ceb1852..10bba2b2022 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -16,6 +16,12 @@ jobs: - name: Checkout uses: actions/checkout@v1 + - name: Get latest jq version + run: | + wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 + chmod +x jq-linux64 + alias jq="${{ github.workspace }}/jq-linux64" + - name: Fetch downloads count form Arduino CDN using AWS Athena id: fetch env: From 6820110088d8ee65022ab560f61c26b783919c69 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 16:05:11 +0200 Subject: [PATCH 08/11] Use latest version of jq inside fetch action --- .github/workflows/arduino-stats.yaml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 10bba2b2022..0f3a4cbecbc 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -16,12 +16,6 @@ jobs: - name: Checkout uses: actions/checkout@v1 - - name: Get latest jq version - run: | - wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 - chmod +x jq-linux64 - alias jq="${{ github.workspace }}/jq-linux64" - - name: Fetch downloads count form Arduino CDN using AWS Athena id: fetch env: @@ -30,7 +24,12 @@ jobs: AWS_ATHENA_SOURCE_TABLE: ${{ secrets.STATS_AWS_ATHENA_SOURCE_TABLE }} AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.STATS_AWS_ATHENA_OUTPUT_LOCATION }} GITHUB_REPOSITORY: ${{ github.repository }} - run: .github/tools/fetch_athena_stats.sh + run: | + # Fetch jq 1.6 as VM has only 1.5 ATM + wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 + chmod +x jq-linux64 + alias jq="${{ github.workspace }}/jq-linux64" + .github/tools/fetch_athena_stats.sh - name: Send metrics uses: masci/datadog@v1 From 1d9b0628cf6aa6bddef5f12f37040ea654e0e988 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 16:14:07 +0200 Subject: [PATCH 09/11] Use PATH override to use latest version of jq inside fetch action --- .github/workflows/arduino-stats.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 0f3a4cbecbc..98d4d02fda3 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -26,9 +26,9 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} run: | # Fetch jq 1.6 as VM has only 1.5 ATM - wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 - chmod +x jq-linux64 - alias jq="${{ github.workspace }}/jq-linux64" + wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 -O jq + chmod +x jq + PATH=${{ github.workspace }}:$PATH${{ github.workspace }} .github/tools/fetch_athena_stats.sh - name: Send metrics From 866d63fdeccd513077e37f20b94bee2682a72272 Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 17:58:01 +0200 Subject: [PATCH 10/11] Fix path typo --- .github/workflows/arduino-stats.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index 98d4d02fda3..fb55eadde41 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -28,7 +28,7 @@ jobs: # Fetch jq 1.6 as VM has only 1.5 ATM wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 -O jq chmod +x jq - PATH=${{ github.workspace }}:$PATH${{ github.workspace }} + PATH=${{ github.workspace }}:$PATH .github/tools/fetch_athena_stats.sh - name: Send metrics From fde9a7bb399c1f25f49d7b1a907257f65476983b Mon Sep 17 00:00:00 2001 From: rsora Date: Wed, 22 Apr 2020 18:30:07 +0200 Subject: [PATCH 11/11] Remove push event --- .github/workflows/arduino-stats.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/arduino-stats.yaml b/.github/workflows/arduino-stats.yaml index fb55eadde41..4b9ce973b7d 100644 --- a/.github/workflows/arduino-stats.yaml +++ b/.github/workflows/arduino-stats.yaml @@ -1,9 +1,6 @@ name: download-stats on: - push: - branches: - - rsora/downloads-stats-action schedule: # run every day at 12:00:00 - cron: '* 12 * * *'