Skip to content

Commit e682df0

Browse files
committed
Code review, gather logs from gcloud
1 parent 38a3006 commit e682df0

File tree

5 files changed

+40
-53
lines changed

5 files changed

+40
-53
lines changed

tests/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ run-tests-on-vm: ## Run the functional tests on a GCP VM
8080

8181
.PHONY: nfr-test
8282
nfr-test: ## Run the NFR tests on a GCP VM
83-
bash scripts/run-tests-gcp-vm.sh true
83+
NFR=true bash scripts/run-tests-gcp-vm.sh
8484

8585
.PHONY: start-longevity-test
8686
start-longevity-test: ## Start the longevity test to run for 4 days in GKE

tests/README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,17 @@ To start the longevity test, set up your VM (`create-and-setup-vm`) and run
218218
make start-longevity-test
219219
```
220220

221+
<!-- -->
222+
> Note: If you want to change the time period for which the test runs, update the `wrk` commands in `suite/scripts/longevity-wrk.sh` to the time period you want, and run `make sync-files-to-vm`.
223+
224+
<!-- -->
221225
> Note: If you want to re-run the longevity test, you need to clear out the `cafe.example.com` entry from the `/etc/hosts` file on your VM.
222226
223227
You can verify the test is working by checking nginx logs to see traffic flow, and check that the cronjob is running and redeploying apps.
224228

225-
To complete the longevity test and collect results, first visit the [GCP Monitoring Dashboards](https://console.cloud.google.com/monitoring/dashboards) page and select the `NGF Longevity Test` dashboard. Take PNG screenshots of each chart for the time period in which your test ran, and save those to be added to the results file.
229+
After 4 days (96h), you can complete the longevity tests and collect results. To ensure that the traffic has stopped flowing, you can ssh to the VM using `gcloud compute ssh` and run `ps aux | grep wrk` to verify the `wrk` commands are no longer running. Then, visit the [GCP Monitoring Dashboards](https://console.cloud.google.com/monitoring/dashboards) page and select the `NGF Longevity Test` dashboard. Take PNG screenshots of each chart for the time period in which your test ran, and save those to be added to the results file.
226230

227-
Next, run:
231+
Finally, run
228232

229233
```makefile
230234
make stop-longevity-test

tests/scripts/create-gke-cluster.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ gcloud container clusters create ${GKE_CLUSTER_NAME} \
1515
--enable-private-nodes \
1616
--master-ipv4-cidr 172.16.${ip_random_digit}.32/28 \
1717
--metadata=block-project-ssh-keys=TRUE \
18-
--monitoring=SYSTEM,POD,DEPLOYMENT
18+
--monitoring=SYSTEM,POD,DEPLOYMENT \
19+
--logging=SYSTEM,WORKLOAD
1920

2021
# Add current IP to GKE master control node access, if this script is not invoked during a CI run.
2122
if [ "${IS_CI}" = "false" ]; then

tests/scripts/run-tests-gcp-vm.sh

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
44

5-
NFR=${1:-false}
6-
75
source scripts/vars.env
86

97
SCRIPT=run-tests.sh
@@ -21,3 +19,27 @@ gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} username@
2119
if [ "${NFR}" = "true" ]; then
2220
gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} --recurse username@${RESOURCE_NAME}:~/nginx-gateway-fabric/tests/results .
2321
fi
22+
23+
## If tearing down the longevity test, we need to collect logs from gcloud and add to the results
24+
if [ "${STOP_LONGEVITY}" = "true" ]; then
25+
version=${NGF_VERSION}
26+
if [ "$version" = "" ]; then
27+
version=${TAG}
28+
fi
29+
30+
results="${SCRIPT_DIR}/../results/longevity/$version/$version.md"
31+
printf "\n## Error Logs\n\n" >> $results
32+
33+
## ngf error logs
34+
ngfErrText=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND severity=ERROR AND SEARCH("error")' --format "value(textPayload)")
35+
ngfErrJSON=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND severity=ERROR AND SEARCH("error")' --format "value(jsonPayload)")
36+
printf "### nginx-gateway\n$ngfErrText\n$ngfErrJSON\n\n" >> $results
37+
38+
## nginx error logs
39+
ngxErr=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND severity=ERROR AND SEARCH("`[warn]`") OR SEARCH("`[error]`") OR SEARCH("`[emerg]`")' --format "value(textPayload)")
40+
printf "### nginx\n$ngxErr\n\n" >> $results
41+
42+
## nginx non-200 responses (also filter out 499 since wrk cancels connections)
43+
ngxNon200=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND "GET" "HTTP/1.1" -"200" -"499" -"client prematurely closed connection"' --format "value(textPayload)")
44+
printf "$ngxNon200\n\n" >> $results
45+
fi

tests/suite/longevity_test.go

Lines changed: 7 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
package suite
22

33
import (
4-
"bufio"
5-
"bytes"
6-
"context"
74
"fmt"
8-
"io"
95
"os"
106
"path/filepath"
117
"strings"
@@ -81,57 +77,21 @@ var _ = Describe("Longevity", Label("longevity-setup", "longevity-teardown"), fu
8177
Expect(err).ToNot(HaveOccurred())
8278

8379
Expect(framework.WriteContent(resultsFile, "\n## Traffic\n"))
84-
writeTrafficResults(resultsFile, homeDir, "coffee.txt", "HTTP")
85-
writeTrafficResults(resultsFile, homeDir, "tea.txt", "HTTPS")
86-
87-
// gather any error logs
88-
names, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout)
89-
Expect(err).ToNot(HaveOccurred())
90-
91-
Expect(framework.WriteContent(resultsFile, "\n## Error Logs\n"))
92-
writeErrorLogs(resultsFile, names[0], "nginx-gateway")
93-
writeErrorLogs(resultsFile, names[0], "nginx")
80+
Expect(writeTrafficResults(resultsFile, homeDir, "coffee.txt", "HTTP")).To(Succeed())
81+
Expect(writeTrafficResults(resultsFile, homeDir, "tea.txt", "HTTPS")).To(Succeed())
9482

9583
Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed())
9684
Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed())
9785
})
9886
})
9987

100-
func writeTrafficResults(resultsFile *os.File, homeDir, filename, testname string) {
88+
func writeTrafficResults(resultsFile *os.File, homeDir, filename, testname string) error {
10189
file := fmt.Sprintf("%s/%s", homeDir, filename)
10290
content, err := os.ReadFile(file)
103-
Expect(err).ToNot(HaveOccurred())
104-
105-
formattedContent := fmt.Sprintf("%s:\n\n```text\n%s```\n", testname, string(content))
106-
Expect(framework.WriteContent(resultsFile, formattedContent)).To(Succeed())
107-
}
108-
109-
func writeErrorLogs(resultsFile *os.File, pod, container string) {
110-
logReq := clientGoClient.CoreV1().Pods(ngfNamespace).GetLogs(pod, &core.PodLogOptions{Container: container})
111-
112-
ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout)
113-
defer cancel()
114-
115-
logs, err := logReq.Stream(ctx)
116-
Expect(err).ToNot(HaveOccurred())
117-
defer logs.Close()
118-
119-
buf := new(bytes.Buffer)
120-
_, err = io.Copy(buf, logs)
121-
Expect(err).ToNot(HaveOccurred())
122-
123-
Expect(framework.WriteContent(resultsFile, fmt.Sprintf("\n### %s\n", container)))
124-
125-
scanner := bufio.NewScanner(strings.NewReader(buf.String()))
126-
for scanner.Scan() {
127-
line := scanner.Text()
128-
if isError(line) {
129-
Expect(framework.WriteContent(resultsFile, line)).To(Succeed())
130-
}
91+
if err != nil {
92+
return err
13193
}
132-
Expect(scanner.Err()).ToNot(HaveOccurred())
133-
}
13494

135-
func isError(line string) bool {
136-
return strings.Contains(line, "error") || strings.Contains(line, "warn") || strings.Contains(line, "emerg")
95+
formattedContent := fmt.Sprintf("%s:\n\n```text\n%s```\n", testname, string(content))
96+
return framework.WriteContent(resultsFile, formattedContent)
13797
}

0 commit comments

Comments
 (0)