Skip to content

Added e2e test to reproduce issue #340 #397

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/project-codeflare/multi-cluster-app-dispatcher
go 1.18

require (
github.com/eapache/go-resiliency v1.3.0
github.com/emicklei/go-restful v2.16.0+incompatible
github.com/golang/protobuf v1.4.3
github.com/hashicorp/go-multierror v1.1.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/eapache/go-resiliency v1.3.0 h1:RRL0nge+cWGlxXbUzJ7yMcq6w2XBEr19dCN6HECGaT0=
github.com/eapache/go-resiliency v1.3.0/go.mod h1:5yPzW0MIvSe0JDsv0v+DvcjEv2FyD6iZYSs1ZI+iQho=
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
Expand Down
10 changes: 9 additions & 1 deletion hack/run-e2e-kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export CLUSTER_CONTEXT="--name test"
# Using older image due to older version of kubernetes cluster"
export IMAGE_ECHOSERVER="kicbase/echo-server:1.0"
export IMAGE_UBUNTU_LATEST="ubuntu:latest"
export IMAGE_UBI_LATEST="registry.access.redhat.com/ubi8/ubi:latest"
export KIND_OPT=${KIND_OPT:=" --config ${ROOT_DIR}/hack/e2e-kind-config.yaml"}
export KA_BIN=_output/bin
export WAIT_TIME="20s"
Expand Down Expand Up @@ -220,6 +221,13 @@ function kind-up-cluster {
exit 1
fi

docker pull ${IMAGE_UBI_LATEST}
if [ $? -ne 0 ]
then
echo "Failed to pull ${IMAGE_UBI_LATEST}"
exit 1
fi

if [[ "$MCAD_IMAGE_PULL_POLICY" = "Always" ]]
then
docker pull ${IMAGE_MCAD}
Expand All @@ -236,7 +244,7 @@ function kind-up-cluster {
fi
docker images

for image in ${IMAGE_ECHOSERVER} ${IMAGE_UBUNTU_LATEST} ${IMAGE_MCAD}
for image in ${IMAGE_ECHOSERVER} ${IMAGE_UBUNTU_LATEST} ${IMAGE_MCAD} ${IMAGE_UBI_LATEST}
do
kind load docker-image ${image} ${CLUSTER_CONTEXT}
if [ $? -ne 0 ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,20 +530,19 @@ func (qm *QuotaManager) Fits(aw *arbv1.AppWrapper, awResDemands *clusterstateapi
klog.V(4).Infof("[Fits] Sending quota allocation request: %#v ", consumerInfo)
allocResponse, err := qm.quotaManagerBackend.AllocateForest(QuotaManagerForestName, consumerID)
if err != nil {
qm.removeConsumer(consumerID)
klog.Errorf("[Fits] Error allocating consumer: %s/%s, err=%#v.", aw.Namespace, aw.Name, err)
return false, nil, err.Error()
}
if allocResponse != nil && len(strings.TrimSpace(allocResponse.GetMessage())) > 0 {
klog.Errorf("[Fits] Error allocating consumer: %s/%s, msg=%s, err=%#v.",
aw.Namespace, aw.Name, allocResponse.GetMessage(), err)
return false, nil, allocResponse.GetMessage()
}
klog.V(4).Infof("[Fits] allocation response received. Quota Allocated: %t, Message: '%s', Preempted app wrappers count: %d", allocResponse.IsAllocated(),
strings.TrimSpace(allocResponse.GetMessage()), len(allocResponse.GetPreemptedIds()))
doesFit := allocResponse.IsAllocated()
if !doesFit {
qm.removeConsumer(consumerID)
return doesFit, preemptIds, strings.TrimSpace(allocResponse.GetMessage())
}
preemptIds = qm.getAppWrappers(allocResponse.GetPreemptedIds())

return doesFit, preemptIds, ""
return doesFit, preemptIds, strings.TrimSpace(allocResponse.GetMessage())
}

func (qm *QuotaManager) getAppWrappers(preemptIds []string) []*arbv1.AppWrapper {
Expand Down Expand Up @@ -614,3 +613,14 @@ func (qm *QuotaManager) Release(aw *arbv1.AppWrapper) bool {

return released
}
func (qm *QuotaManager) removeConsumer(consumerID string) {
// removing the consumer to allow for the consumer to be added if and when
// the function is called for the same app wrapper
removed, err := qm.quotaManagerBackend.RemoveConsumer(consumerID)
if err != nil {
klog.Warningf("Failed to remove consumer %s, %#v", consumerID, err)
}
if !removed {
klog.Warningf("Failed to remove consumer %s", consumerID)
}
}
168 changes: 168 additions & 0 deletions pkg/quotaplugins/quota-forest/quota-manager/quota/quotamanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package quota_test
import (
"strings"
"testing"
"time"

"github.com/eapache/go-resiliency/retrier"
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/quota-forest/quota-manager/quota"
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/quota-forest/quota-manager/quota/utils"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -225,3 +227,169 @@ func TestNewQuotaManagerConsumerAllocationRelease(t *testing.T) {
})
}
}
func TestQuotaManagerQuotaUsedLongRunningConsumers(t *testing.T) {
forestName := "unit-test-1"
qmManagerUnderTest := quota.NewManager()

err := qmManagerUnderTest.AddForest(forestName)
assert.NoError(t, err, "No error expected when adding a forest")
testTreeName, err := qmManagerUnderTest.AddTreeFromString(
`{
"kind": "QuotaTree",
"metadata": {
"name": "test-tree"
},
"spec": {
"resourceNames": [
"cpu",
"memory"
],
"nodes": {
"root": {
"parent": "nil",
"hard": "true",
"quota": {
"cpu": "10",
"memory": "256"
}
},
"gold": {
"parent": "root",
"hard": "true",
"quota": {
"cpu": "10",
"memory": "256"
}
}
}
}
}`)
if err != nil {
assert.Fail(t, "No error expected when adding a tree to forest")
}
err = qmManagerUnderTest.AddTreeToForest(forestName, testTreeName)
assert.NoError(t, err, "No error expected when adding a tree from forest")
modeSet := qmManagerUnderTest.SetMode(quota.Normal)
assert.True(t, modeSet, "Setting the mode should not fail.")

// Define the test table
var tests = []struct {
name string
consumer utils.JConsumer
}{
// the table itself
{"Gold consumer 1",
utils.JConsumer{
Kind: "Consumer",
MetaData: utils.JMetaData{
Name: "gpld-consumer-data",
},
Spec: utils.JConsumerSpec{
ID: "gold-consumer-1",
Trees: []utils.JConsumerTreeSpec{
{
TreeName: testTreeName,
GroupID: "gold",
Request: map[string]int{
"cpu": 10,
"memory": 4,
"gpu": 0,
},
Priority: 0,
CType: 0,
UnPreemptable: false,
},
},
},
},
},
// the table itself
{"Gold consumer 2",
utils.JConsumer{
Kind: "Consumer",
MetaData: utils.JMetaData{
Name: "gpld-consumer-data",
},
Spec: utils.JConsumerSpec{
ID: "gold-consumer-2",
Trees: []utils.JConsumerTreeSpec{
{
TreeName: testTreeName,
GroupID: "gold",
Request: map[string]int{
"cpu": 10,
"memory": 4,
"gpu": 0,
},
Priority: 0,
CType: 0,
UnPreemptable: false,
},
},
},
},
},
}
// Execute tests in parallel
for _, tc := range tests {
tc := tc // capture range variable
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
// Get list of quota management tree IDs
qmTreeIDs := qmManagerUnderTest.GetTreeNames()

consumerInfo, err := quota.NewConsumerInfo(tc.consumer)
assert.NoError(t, err, "No error expected when building consumer")
assert.Contains(t, qmTreeIDs, tc.consumer.Spec.Trees[0].TreeName)

retryAllocation := retrier.New(retrier.LimitedExponentialBackoff(10, 10*time.Millisecond, 1*time.Second),
&AllocationClassifier{})

err = retryAllocation.Run(func() error {
added, err2 := qmManagerUnderTest.AddConsumer(consumerInfo)
assert.NoError(t, err2, "No error expected when adding consumer")
assert.True(t, added, "Consumer is expected to be added")

response, err2 := qmManagerUnderTest.AllocateForest(forestName, consumerInfo.GetID())
if err2 == nil {
assert.Equal(t, 0, len(strings.TrimSpace(response.GetMessage())), "A empty response is expected")
assert.True(t, response.IsAllocated(), "The allocation should succeed")
} else {
removed, err3 := qmManagerUnderTest.RemoveConsumer(consumerInfo.GetID())
assert.NoError(t, err3, "No Error expected when removing consumer")
assert.True(t, removed, "Removal of consumer should succeed")
}
return err2

})
if err != nil {
assert.Failf(t, "Allocation of quota should have succeed: '%s'", err.Error())
}

//simulate a long running consumer that has quota allocated
time.Sleep(10 * time.Millisecond)

deAllocated := qmManagerUnderTest.DeAllocateForest(forestName, consumerInfo.GetID())
assert.True(t, deAllocated, "De-allocation expected to succeed")

removed, err := qmManagerUnderTest.RemoveConsumer(consumerInfo.GetID())
assert.NoError(t, err, "No Error expected when removing consumer")
assert.True(t, removed, "Removal of consumer should succeed")

})
}

}

type AllocationClassifier struct {
}

func (c *AllocationClassifier) Classify(err error) retrier.Action {
if err == nil {
return retrier.Succeed
}
if strings.TrimSpace(err.Error()) == "Failed to allocate quota on quota designation 'test-tree'" {
return retrier.Retry
}
return retrier.Fail
}
25 changes: 25 additions & 0 deletions test/e2e-kuttl/quota-errors/00-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Verify CRDs existence
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: appwrappers.mcad.ibm.com
status:
acceptedNames:
kind: AppWrapper
listKind: AppWrapperList
plural: appwrappers
singular: appwrapper
storedVersions:
- v1beta1
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: quotasubtrees.ibm.com
status:
acceptedNames:
kind: QuotaSubtree
singular: quotasubtree
plural: quotasubtrees
storedVersions:
- v1
24 changes: 24 additions & 0 deletions test/e2e-kuttl/quota-errors/01-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Verify subtree creations
apiVersion: ibm.com/v1
kind: QuotaSubtree
metadata:
name: context-root
namespace: kube-system
labels:
tree: quota_context
---
apiVersion: ibm.com/v1
kind: QuotaSubtree
metadata:
name: service-root
namespace: kube-system
labels:
tree: quota_service
---
apiVersion: ibm.com/v1
kind: QuotaSubtree
metadata:
name: context-root-children
namespace: kube-system
labels:
tree: quota_context
5 changes: 5 additions & 0 deletions test/e2e-kuttl/quota-errors/02-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Verify test namespace existence
apiVersion: v1
kind: Namespace
metadata:
name: quota-errors
4 changes: 4 additions & 0 deletions test/e2e-kuttl/quota-errors/02-install.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: quota-errors
27 changes: 27 additions & 0 deletions test/e2e-kuttl/quota-errors/03-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Verify AppWrapper was dispatched and pod was created
apiVersion: mcad.ibm.com/v1beta1
kind: AppWrapper
metadata:
name: deployment-silver-lo-pri-1replica
namespace: quota-errors
labels:
quota_context: "silver"
quota_service: "service-root"
status:
state: Running
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: deployment-silver-lo-pri-1replica
namespace: quota-errors
labels:
app: deployment-silver-lo-pri-1replica
appwrapper.mcad.ibm.com: deployment-silver-lo-pri-1replica
resourceName: deployment-silver-lo-pri-1replica
status:
availableReplicas: 1
observedGeneration: 1
readyReplicas: 1
replicas: 1
updatedReplicas: 1
Loading