From b33a4a332484a447878fa7b3a6a37cb0a9954e7c Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Tue, 8 Jun 2021 14:45:08 +0000 Subject: [PATCH 01/23] Potential fix for pod startup issue in GBU CNE env after node drain/repave operation. --- .../kubernetes/ItParameterizedDomain.java | 11 --- .../operator/DomainProcessorImpl.java | 8 +- .../kubernetes/operator/JobWatcher.java | 19 +++++ .../operator/MakeRightDomainOperation.java | 2 + .../kubernetes/operator/PodWatcher.java | 75 +++++++++++++++++++ .../kubernetes/operator/TuningParameters.java | 5 +- .../operator/TuningParametersImpl.java | 3 +- .../kubernetes/operator/WaitForReadyStep.java | 56 +++++--------- .../operator/helpers/DomainPresenceInfo.java | 13 ++++ .../kubernetes/operator/WatcherTestBase.java | 2 +- .../ManagedServerUpIteratorStepTest.java | 2 +- 11 files changed, 143 insertions(+), 53 deletions(-) diff --git a/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java b/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java index 96d51d6f6dd..8bae052c138 100644 --- a/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java +++ b/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java @@ -137,7 +137,6 @@ import static oracle.weblogic.kubernetes.utils.DeployUtil.deployUsingWlst; import static oracle.weblogic.kubernetes.utils.FileUtils.doesFileExistInPod; import static oracle.weblogic.kubernetes.utils.K8sEvents.DOMAIN_CHANGED; -import static oracle.weblogic.kubernetes.utils.K8sEvents.DOMAIN_PROCESSING_COMPLETED; import static oracle.weblogic.kubernetes.utils.K8sEvents.DOMAIN_PROCESSING_STARTING; import static oracle.weblogic.kubernetes.utils.K8sEvents.POD_STARTED; import static oracle.weblogic.kubernetes.utils.K8sEvents.POD_TERMINATED; @@ -740,16 +739,6 @@ public void testMultiClustersRollingRestart() { .until(checkDomainEvent(opNamespace, miiDomainNamespace, miiDomainUid, DOMAIN_PROCESSING_STARTING, "Normal", timestamp)); - withStandardRetryPolicy - .conditionEvaluationListener( - condition -> logger.info("Waiting for domain event {0} to be logged " - + "(elapsed time {1}ms, remaining time {2}ms)", - DOMAIN_PROCESSING_COMPLETED, - condition.getElapsedTimeInMS(), - condition.getRemainingTimeInMS())) - .until(checkDomainEvent(opNamespace, miiDomainNamespace, miiDomainUid, - DOMAIN_PROCESSING_COMPLETED, "Normal", timestamp)); - // Verify that pod termination and started events are logged only once for each managed server in each cluster for (int i = 1; i <= NUMBER_OF_CLUSTERS_MIIDOMAIN; i++) { for (int j = 1; j <= replicaCount; j++) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java index 57164921262..84bb9798d0e 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java @@ -746,7 +746,7 @@ private void addServerToMaps(Map serverHealthMap, */ class MakeRightDomainOperationImpl implements MakeRightDomainOperation { - private final DomainPresenceInfo liveInfo; + private DomainPresenceInfo liveInfo; private boolean explicitRecheck; private boolean deleting; private boolean willInterrupt; @@ -851,6 +851,12 @@ public void setInspectionRun() { inspectionRun = true; } + @Override + public void setLiveInfo(DomainPresenceInfo info) { + this.liveInfo = info; + } + + @Override public boolean wasInspectionRun() { return inspectionRun; diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index 250792f2552..4036408f59a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -12,6 +12,7 @@ import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.annotation.Nonnull; @@ -25,13 +26,16 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; +import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.ResponseStep; import oracle.kubernetes.operator.logging.LoggingFacade; import oracle.kubernetes.operator.logging.LoggingFactory; import oracle.kubernetes.operator.logging.MessageKeys; +import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.watcher.WatchListener; +import oracle.kubernetes.operator.work.NextAction; import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; import oracle.kubernetes.utils.SystemClock; @@ -302,6 +306,21 @@ Throwable createTerminationException(V1Job job) { void logWaiting(String name) { LOGGER.fine(MessageKeys.WAITING_FOR_JOB_READY, name); } + + @Override + protected DefaultResponseStep resumeIfReady(Callback callback) { + return new DefaultResponseStep<>(null) { + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + if (isReady(callResponse.getResult())) { + callback.proceedFromWait(callResponse.getResult()); + return doNext(packet); + } + return doDelay(createReadAndIfReadyCheckStep(callback), packet, + getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); + } + }; + } } static class DeadlineExceededException extends Exception { diff --git a/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java b/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java index df2414966fe..4bc55b66c6d 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java +++ b/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java @@ -36,6 +36,8 @@ public interface MakeRightDomainOperation { void setInspectionRun(); + void setLiveInfo(DomainPresenceInfo info); + boolean wasInspectionRun(); private static boolean wasInspectionRun(Packet packet) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index 9f34fd907b9..231ed18b608 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.annotation.Nonnull; @@ -28,7 +29,9 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; +import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; +import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.LegalNames; import oracle.kubernetes.operator.helpers.PodHelper; @@ -36,8 +39,14 @@ import oracle.kubernetes.operator.logging.LoggingFacade; import oracle.kubernetes.operator.logging.LoggingFactory; import oracle.kubernetes.operator.logging.MessageKeys; +import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.watcher.WatchListener; +import oracle.kubernetes.operator.work.NextAction; +import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; +import oracle.kubernetes.weblogic.domain.model.Domain; + +import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; /** * Watches for changes to pods. @@ -322,6 +331,55 @@ V1ObjectMeta getMetadata(V1Pod pod) { Step createReadAsyncStep(String name, String namespace, String domainUid, ResponseStep responseStep) { return new CallBuilder().readPodAsync(name, namespace, domainUid, responseStep); } + + @Override + protected DefaultResponseStep resumeIfReady(Callback callback) { + return new DefaultResponseStep<>(null) { + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); + Optional.ofNullable(callResponse.getResult()).ifPresent(pod -> setServerPodFromEvent(info, pod)); + if (isReady(callResponse.getResult())) { + resetWatchBackstopRecheckCount(info); + return proceedFromWait(packet, callResponse); + } + if (shouldWait(info)) { + // Watch backstop recheck count is less than or equal to the configured recheck count, delay. + return doDelay(createReadAndIfReadyCheckStep(callback), packet, + getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); + } else { + // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. + return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), + info.getNamespace(), new MakeRightDomainStep(null)), packet); + } + } + + private void resetWatchBackstopRecheckCount(DomainPresenceInfo info) { + Optional.ofNullable(info).ifPresent(DomainPresenceInfo::resetWatchBackstopRecheckCount); + } + + private void setServerPodFromEvent(DomainPresenceInfo info, V1Pod pod) { + Optional.ofNullable(info).ifPresent(i -> i.setServerPodFromEvent(getPodLabel(pod), pod)); + } + + private NextAction proceedFromWait(Packet packet, CallResponse callResponse) { + callback.proceedFromWait(callResponse.getResult()); + return doNext(packet); + } + + private boolean shouldWait(DomainPresenceInfo info) { + return info == null || info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount(); + } + + private String getPodLabel(V1Pod pod) { + return Optional.ofNullable(pod) + .map(V1Pod::getMetadata) + .map(V1ObjectMeta::getLabels) + .map(m -> m.get(LabelConstants.SERVERNAME_LABEL)) + .orElse(null); + } + }; + } } private class WaitForPodReadyStep extends WaitForPodStatusStep { @@ -360,6 +418,7 @@ protected void removeCallback(String podName, Consumer callback) { protected void logWaiting(String name) { LOGGER.fine(MessageKeys.WAITING_FOR_POD_READY, name); } + } private class WaitForPodDeleteStep extends WaitForPodStatusStep { @@ -383,4 +442,20 @@ protected void removeCallback(String podName, Consumer callback) { removeOnDeleteCallback(podName, callback); } } + + private static class MakeRightDomainStep extends DefaultResponseStep { + MakeRightDomainStep(Step next) { + super(next); + } + + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + MakeRightDomainOperation makeRightDomainOperation = + (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); + makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); + makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); + return super.onSuccess(packet, callResponse); + } + } + } diff --git a/operator/src/main/java/oracle/kubernetes/operator/TuningParameters.java b/operator/src/main/java/oracle/kubernetes/operator/TuningParameters.java index 041f59a1f35..445a97a44bd 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/TuningParameters.java +++ b/operator/src/main/java/oracle/kubernetes/operator/TuningParameters.java @@ -194,6 +194,7 @@ class WatchTuning { public final int watchLifetime; public final int watchMinimumDelay; public final int watchBackstopRecheckDelay; + public final int watchBackstopRecheckCount; /** * Create watch tuning. @@ -201,10 +202,12 @@ class WatchTuning { * @param watchMinimumDelay Minimum delay before accepting new events to prevent hot loops * @param watchBackstopRecheckDelay Recheck delay for get while waiting for a status to backstop missed watch events */ - public WatchTuning(int watchLifetime, int watchMinimumDelay, int watchBackstopRecheckDelay) { + public WatchTuning(int watchLifetime, int watchMinimumDelay, int watchBackstopRecheckDelay, + int watchBackstopRecheckCount) { this.watchLifetime = watchLifetime; this.watchMinimumDelay = watchMinimumDelay; this.watchBackstopRecheckDelay = watchBackstopRecheckDelay; + this.watchBackstopRecheckCount = watchBackstopRecheckCount; } @Override diff --git a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java index cdfe665d9d3..3ab4ed06bf5 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java @@ -75,7 +75,8 @@ private void update() { new WatchTuning( (int) readTuningParameter("watchLifetime", 300), (int) readTuningParameter("watchMinimumDelay", 5), - (int) readTuningParameter("watchBackstopRecheckDelaySeconds", 5)); + (int) readTuningParameter("watchBackstopRecheckDelaySeconds", 5), + (int) readTuningParameter("watchBackstopRecheckCount", 24)); PodTuning pod = new PodTuning( diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index ce82cd3529b..8c5d83a3287 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -4,16 +4,12 @@ package oracle.kubernetes.operator; import java.util.Optional; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import io.kubernetes.client.openapi.models.V1ObjectMeta; -import io.kubernetes.client.openapi.models.V1Pod; -import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.ResponseStep; -import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.work.AsyncFiber; import oracle.kubernetes.operator.work.NextAction; import oracle.kubernetes.operator.work.Packet; @@ -27,7 +23,9 @@ * @param the type of resource handled by this step */ abstract class WaitForReadyStep extends Step { + private static final int DEFAULT_RECHECK_SECONDS = 5; + private static final int DEFAULT_RECHECK_COUNT = 24; static int getWatchBackstopRecheckDelaySeconds() { return Optional.ofNullable(TuningParameters.getInstance()) @@ -36,6 +34,13 @@ static int getWatchBackstopRecheckDelaySeconds() { .orElse(DEFAULT_RECHECK_SECONDS); } + static int getWatchBackstopRecheckCount() { + return Optional.ofNullable(TuningParameters.getInstance()) + .map(TuningParameters::getWatchTuning) + .map(t -> t.watchBackstopRecheckCount) + .orElse(DEFAULT_RECHECK_COUNT); + } + private final T initialResource; private final String resourceName; @@ -173,14 +178,16 @@ private void checkUpdatedResource(Packet packet, AsyncFiber fiber, Callback call null); } - private Step createReadAndIfReadyCheckStep(Callback callback) { + Step createReadAndIfReadyCheckStep(Callback callback) { if (initialResource != null) { return createReadAsyncStep(getName(), getNamespace(), getDomainUid(), resumeIfReady(callback)); } else { - return new ReadAndIfReadyCheckStep(getName(), callback, getNext()); + return new ReadAndIfReadyCheckStep(getName(), callback, resumeIfReady(callback), getNext()); } } + protected abstract ResponseStep resumeIfReady(Callback callback); + private String getNamespace() { return getMetadata(initialResource).getNamespace(); } @@ -193,53 +200,28 @@ public String getName() { return initialResource != null ? getMetadata(initialResource).getName() : resourceName; } - private DefaultResponseStep resumeIfReady(Callback callback) { - return new DefaultResponseStep<>(null) { - @Override - public NextAction onSuccess(Packet packet, CallResponse callResponse) { - if ((callResponse != null) && (callResponse.getResult() instanceof V1Pod)) { - V1Pod pod = (V1Pod) callResponse.getResult(); - Optional.ofNullable(packet.getSpi(DomainPresenceInfo.class)) - .ifPresent(i -> i.setServerPodFromEvent(getPodLabel(pod, LabelConstants.SERVERNAME_LABEL), pod)); - } - if (isReady(callResponse.getResult())) { - callback.proceedFromWait(callResponse.getResult()); - return doNext(packet); - } - return doDelay(createReadAndIfReadyCheckStep(callback), packet, - getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); - } - - private String getPodLabel(V1Pod pod, String labelName) { - return Optional.ofNullable(pod) - .map(V1Pod::getMetadata) - .map(V1ObjectMeta::getLabels) - .map(m -> m.get(labelName)) - .orElse(null); - } - }; - } private class ReadAndIfReadyCheckStep extends Step { private final Callback callback; private final String resourceName; + private final ResponseStep responseStep; - ReadAndIfReadyCheckStep(String resourceName, Callback callback, Step next) { + ReadAndIfReadyCheckStep(String resourceName, Callback callback, ResponseStep responseStep, Step next) { super(next); this.callback = callback; this.resourceName = resourceName; + this.responseStep = responseStep; } @Override public NextAction apply(Packet packet) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); return doNext(createReadAsyncStep(resourceName, info.getNamespace(), - info.getDomainUid(), resumeIfReady(callback)), packet); + info.getDomainUid(), responseStep), packet); } - } - private class Callback implements Consumer { + class Callback implements Consumer { private final AsyncFiber fiber; private final Packet packet; private final AtomicBoolean didResume = new AtomicBoolean(false); @@ -258,7 +240,7 @@ public void accept(T resource) { } // The resource has now either completed or failed, so we can continue processing. - private void proceedFromWait(T resource) { + void proceedFromWait(T resource) { removeCallback(getName(), this); if (mayResumeFiber()) { handleResourceReady(fiber, packet, resource); diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java index a13b7535308..6eb4c3c14a8 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java @@ -55,6 +55,7 @@ public class DomainPresenceInfo { private final AtomicBoolean isDeleting = new AtomicBoolean(false); private final AtomicBoolean isPopulated = new AtomicBoolean(false); private final AtomicInteger retryCount = new AtomicInteger(0); + private final AtomicInteger watchBackstopRecheckCount = new AtomicInteger(0); private final AtomicReference> serverStartupInfo; private final AtomicReference> serverShutdownInfo; @@ -559,6 +560,18 @@ int getRetryCount() { return retryCount.get(); } + public void resetWatchBackstopRecheckCount() { + watchBackstopRecheckCount.set(0); + } + + public int incrementAndGetWatchBackstopRecheckCount() { + return watchBackstopRecheckCount.incrementAndGet(); + } + + public int getWatchBackstopRecheckCount() { + return watchBackstopRecheckCount.get(); + } + /** Sets the last completion time to now. */ public void complete() { resetFailureCount(); diff --git a/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java b/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java index 16f11f7660a..567f02e169e 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java +++ b/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java @@ -40,7 +40,7 @@ public abstract class WatcherTestBase extends ThreadFactoryTestBase implements A private final List mementos = new ArrayList<>(); private final List> callBacks = new ArrayList<>(); private final AtomicBoolean stopping = new AtomicBoolean(false); - final WatchTuning tuning = new WatchTuning(30, 0, 5); + final WatchTuning tuning = new WatchTuning(30, 0, 5, 24); private BigInteger resourceVersion = INITIAL_RESOURCE_VERSION; private V1ObjectMeta createMetaData() { diff --git a/operator/src/test/java/oracle/kubernetes/operator/steps/ManagedServerUpIteratorStepTest.java b/operator/src/test/java/oracle/kubernetes/operator/steps/ManagedServerUpIteratorStepTest.java index c2ba266c0e5..73e4b1ef8df 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/steps/ManagedServerUpIteratorStepTest.java +++ b/operator/src/test/java/oracle/kubernetes/operator/steps/ManagedServerUpIteratorStepTest.java @@ -95,7 +95,7 @@ public class ManagedServerUpIteratorStepTest extends ThreadFactoryTestBase imple private final AtomicBoolean stopping = new AtomicBoolean(false); private static final BigInteger INITIAL_RESOURCE_VERSION = new BigInteger("234"); private final PodWatcher watcher = createWatcher(NS, stopping, INITIAL_RESOURCE_VERSION); - final TuningParameters.WatchTuning tuning = new TuningParameters.WatchTuning(30, 0, 5); + final TuningParameters.WatchTuning tuning = new TuningParameters.WatchTuning(30, 0, 5, 24); @Nonnull private static String getManagedServerName(int n) { From 9c83cb0d03daa924515be813ce410ff8857f37b7 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Wed, 9 Jun 2021 15:44:25 +0000 Subject: [PATCH 02/23] Increase the pod ready wait timeout to 5 min (instead of 2 min) to avoid the intermittent test failures. --- .../java/oracle/kubernetes/operator/TuningParametersImpl.java | 2 +- .../main/java/oracle/kubernetes/operator/WaitForReadyStep.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java index 3ab4ed06bf5..749c32f69f7 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java @@ -76,7 +76,7 @@ private void update() { (int) readTuningParameter("watchLifetime", 300), (int) readTuningParameter("watchMinimumDelay", 5), (int) readTuningParameter("watchBackstopRecheckDelaySeconds", 5), - (int) readTuningParameter("watchBackstopRecheckCount", 24)); + (int) readTuningParameter("watchBackstopRecheckCount", 60)); PodTuning pod = new PodTuning( diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 8c5d83a3287..9076dd37d97 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -25,7 +25,7 @@ abstract class WaitForReadyStep extends Step { private static final int DEFAULT_RECHECK_SECONDS = 5; - private static final int DEFAULT_RECHECK_COUNT = 24; + private static final int DEFAULT_RECHECK_COUNT = 60; static int getWatchBackstopRecheckDelaySeconds() { return Optional.ofNullable(TuningParameters.getInstance()) From 4d6749c27e868740fc41972b184dfa7316307f1b Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Wed, 9 Jun 2021 20:47:25 +0000 Subject: [PATCH 03/23] Undo refactoring related changes. --- .../kubernetes/operator/JobWatcher.java | 19 ----- .../kubernetes/operator/PodWatcher.java | 75 ----------------- .../kubernetes/operator/WaitForReadyStep.java | 81 ++++++++++++++++--- 3 files changed, 70 insertions(+), 105 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index 4036408f59a..250792f2552 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -12,7 +12,6 @@ import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.annotation.Nonnull; @@ -26,16 +25,13 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; -import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.ResponseStep; import oracle.kubernetes.operator.logging.LoggingFacade; import oracle.kubernetes.operator.logging.LoggingFactory; import oracle.kubernetes.operator.logging.MessageKeys; -import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.watcher.WatchListener; -import oracle.kubernetes.operator.work.NextAction; import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; import oracle.kubernetes.utils.SystemClock; @@ -306,21 +302,6 @@ Throwable createTerminationException(V1Job job) { void logWaiting(String name) { LOGGER.fine(MessageKeys.WAITING_FOR_JOB_READY, name); } - - @Override - protected DefaultResponseStep resumeIfReady(Callback callback) { - return new DefaultResponseStep<>(null) { - @Override - public NextAction onSuccess(Packet packet, CallResponse callResponse) { - if (isReady(callResponse.getResult())) { - callback.proceedFromWait(callResponse.getResult()); - return doNext(packet); - } - return doDelay(createReadAndIfReadyCheckStep(callback), packet, - getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); - } - }; - } } static class DeadlineExceededException extends Exception { diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index 231ed18b608..9f34fd907b9 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -11,7 +11,6 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.annotation.Nonnull; @@ -29,9 +28,7 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; -import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; -import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.LegalNames; import oracle.kubernetes.operator.helpers.PodHelper; @@ -39,14 +36,8 @@ import oracle.kubernetes.operator.logging.LoggingFacade; import oracle.kubernetes.operator.logging.LoggingFactory; import oracle.kubernetes.operator.logging.MessageKeys; -import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.watcher.WatchListener; -import oracle.kubernetes.operator.work.NextAction; -import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; -import oracle.kubernetes.weblogic.domain.model.Domain; - -import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; /** * Watches for changes to pods. @@ -331,55 +322,6 @@ V1ObjectMeta getMetadata(V1Pod pod) { Step createReadAsyncStep(String name, String namespace, String domainUid, ResponseStep responseStep) { return new CallBuilder().readPodAsync(name, namespace, domainUid, responseStep); } - - @Override - protected DefaultResponseStep resumeIfReady(Callback callback) { - return new DefaultResponseStep<>(null) { - @Override - public NextAction onSuccess(Packet packet, CallResponse callResponse) { - DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); - Optional.ofNullable(callResponse.getResult()).ifPresent(pod -> setServerPodFromEvent(info, pod)); - if (isReady(callResponse.getResult())) { - resetWatchBackstopRecheckCount(info); - return proceedFromWait(packet, callResponse); - } - if (shouldWait(info)) { - // Watch backstop recheck count is less than or equal to the configured recheck count, delay. - return doDelay(createReadAndIfReadyCheckStep(callback), packet, - getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); - } else { - // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. - return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), - info.getNamespace(), new MakeRightDomainStep(null)), packet); - } - } - - private void resetWatchBackstopRecheckCount(DomainPresenceInfo info) { - Optional.ofNullable(info).ifPresent(DomainPresenceInfo::resetWatchBackstopRecheckCount); - } - - private void setServerPodFromEvent(DomainPresenceInfo info, V1Pod pod) { - Optional.ofNullable(info).ifPresent(i -> i.setServerPodFromEvent(getPodLabel(pod), pod)); - } - - private NextAction proceedFromWait(Packet packet, CallResponse callResponse) { - callback.proceedFromWait(callResponse.getResult()); - return doNext(packet); - } - - private boolean shouldWait(DomainPresenceInfo info) { - return info == null || info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount(); - } - - private String getPodLabel(V1Pod pod) { - return Optional.ofNullable(pod) - .map(V1Pod::getMetadata) - .map(V1ObjectMeta::getLabels) - .map(m -> m.get(LabelConstants.SERVERNAME_LABEL)) - .orElse(null); - } - }; - } } private class WaitForPodReadyStep extends WaitForPodStatusStep { @@ -418,7 +360,6 @@ protected void removeCallback(String podName, Consumer callback) { protected void logWaiting(String name) { LOGGER.fine(MessageKeys.WAITING_FOR_POD_READY, name); } - } private class WaitForPodDeleteStep extends WaitForPodStatusStep { @@ -442,20 +383,4 @@ protected void removeCallback(String podName, Consumer callback) { removeOnDeleteCallback(podName, callback); } } - - private static class MakeRightDomainStep extends DefaultResponseStep { - MakeRightDomainStep(Step next) { - super(next); - } - - @Override - public NextAction onSuccess(Packet packet, CallResponse callResponse) { - MakeRightDomainOperation makeRightDomainOperation = - (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); - makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); - makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); - return super.onSuccess(packet, callResponse); - } - } - } diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 9076dd37d97..58a00da7d41 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -4,17 +4,25 @@ package oracle.kubernetes.operator; import java.util.Optional; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; +import io.kubernetes.client.openapi.models.V1Job; import io.kubernetes.client.openapi.models.V1ObjectMeta; +import io.kubernetes.client.openapi.models.V1Pod; +import oracle.kubernetes.operator.calls.CallResponse; +import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.ResponseStep; +import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.work.AsyncFiber; import oracle.kubernetes.operator.work.NextAction; import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; +import oracle.kubernetes.weblogic.domain.model.Domain; +import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; import static oracle.kubernetes.operator.helpers.KubernetesUtils.getDomainUidLabel; /** @@ -23,7 +31,6 @@ * @param the type of resource handled by this step */ abstract class WaitForReadyStep extends Step { - private static final int DEFAULT_RECHECK_SECONDS = 5; private static final int DEFAULT_RECHECK_COUNT = 60; @@ -178,16 +185,14 @@ private void checkUpdatedResource(Packet packet, AsyncFiber fiber, Callback call null); } - Step createReadAndIfReadyCheckStep(Callback callback) { + private Step createReadAndIfReadyCheckStep(Callback callback) { if (initialResource != null) { return createReadAsyncStep(getName(), getNamespace(), getDomainUid(), resumeIfReady(callback)); } else { - return new ReadAndIfReadyCheckStep(getName(), callback, resumeIfReady(callback), getNext()); + return new ReadAndIfReadyCheckStep(getName(), callback, getNext()); } } - protected abstract ResponseStep resumeIfReady(Callback callback); - private String getNamespace() { return getMetadata(initialResource).getNamespace(); } @@ -200,28 +205,82 @@ public String getName() { return initialResource != null ? getMetadata(initialResource).getName() : resourceName; } + private DefaultResponseStep resumeIfReady(Callback callback) { + return new DefaultResponseStep<>(null) { + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); + if ((callResponse != null) && (callResponse.getResult() instanceof V1Pod)) { + V1Pod pod = (V1Pod) callResponse.getResult(); + Optional.ofNullable(info) + .ifPresent(i -> i.setServerPodFromEvent(getPodLabel(pod, LabelConstants.SERVERNAME_LABEL), pod)); + } + if (isReady(callResponse.getResult())) { + callback.proceedFromWait(callResponse.getResult()); + Optional.ofNullable(info).ifPresent(i -> i.resetWatchBackstopRecheckCount()); + return doNext(packet); + } + if (shouldWait(callResponse, info)) { + // Watch backstop recheck count is less than or equal to the configured recheck count, delay. + return doDelay(createReadAndIfReadyCheckStep(callback), packet, + getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); + } else { + // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. + return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), + info.getNamespace(), new MakeRightDomainStep(null)), packet); + } + } + + private boolean shouldWait(CallResponse callResponse, DomainPresenceInfo info) { + return info == null || (info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount()) + || ((callResponse != null) && (callResponse.getResult() instanceof V1Job)); + } + + private String getPodLabel(V1Pod pod, String labelName) { + return Optional.ofNullable(pod) + .map(V1Pod::getMetadata) + .map(V1ObjectMeta::getLabels) + .map(m -> m.get(labelName)) + .orElse(null); + } + }; + } private class ReadAndIfReadyCheckStep extends Step { private final Callback callback; private final String resourceName; - private final ResponseStep responseStep; - ReadAndIfReadyCheckStep(String resourceName, Callback callback, ResponseStep responseStep, Step next) { + ReadAndIfReadyCheckStep(String resourceName, Callback callback, Step next) { super(next); this.callback = callback; this.resourceName = resourceName; - this.responseStep = responseStep; } @Override public NextAction apply(Packet packet) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); return doNext(createReadAsyncStep(resourceName, info.getNamespace(), - info.getDomainUid(), responseStep), packet); + info.getDomainUid(), resumeIfReady(callback)), packet); + } + + } + + private class MakeRightDomainStep extends DefaultResponseStep { + MakeRightDomainStep(Step next) { + super(next); + } + + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + MakeRightDomainOperation makeRightDomainOperation = + (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); + makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); + makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); + return super.onSuccess(packet, callResponse); } } - class Callback implements Consumer { + private class Callback implements Consumer { private final AsyncFiber fiber; private final Packet packet; private final AtomicBoolean didResume = new AtomicBoolean(false); @@ -240,7 +299,7 @@ public void accept(T resource) { } // The resource has now either completed or failed, so we can continue processing. - void proceedFromWait(T resource) { + private void proceedFromWait(T resource) { removeCallback(getName(), this); if (mayResumeFiber()) { handleResourceReady(fiber, packet, resource); From 291d6d290b4c3b609798aa04d5ac433b4601f4de Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Thu, 10 Jun 2021 17:24:40 +0000 Subject: [PATCH 04/23] Fix to not increment count for introspector job. --- .../java/oracle/kubernetes/operator/WaitForReadyStep.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 58a00da7d41..cf3f34a3f0f 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -232,8 +232,9 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { } private boolean shouldWait(CallResponse callResponse, DomainPresenceInfo info) { - return info == null || (info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount()) - || ((callResponse != null) && (callResponse.getResult() instanceof V1Job)); + return ((callResponse != null) && (callResponse.getResult() instanceof V1Job)) + || (info == null) + || (info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount()); } private String getPodLabel(V1Pod pod, String labelName) { From 6d106c7c8fee29cf787d5084ff17a149f58a710d Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Thu, 10 Jun 2021 21:53:21 +0000 Subject: [PATCH 05/23] Potential fix for the race condition in the introspector job completion detection. --- .../oracle/kubernetes/operator/WaitForReadyStep.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index cf3f34a3f0f..35125571b86 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -215,7 +215,7 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { Optional.ofNullable(info) .ifPresent(i -> i.setServerPodFromEvent(getPodLabel(pod, LabelConstants.SERVERNAME_LABEL), pod)); } - if (isReady(callResponse.getResult())) { + if (isReady(callResponse.getResult()) || callback.didResume.get()) { callback.proceedFromWait(callResponse.getResult()); Optional.ofNullable(info).ifPresent(i -> i.resetWatchBackstopRecheckCount()); return doNext(packet); @@ -227,7 +227,7 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { } else { // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), - info.getNamespace(), new MakeRightDomainStep(null)), packet); + info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); } } @@ -267,8 +267,11 @@ public NextAction apply(Packet packet) { } private class MakeRightDomainStep extends DefaultResponseStep { - MakeRightDomainStep(Step next) { + private final Callback callback; + + MakeRightDomainStep(Callback callback, Step next) { super(next); + this.callback = callback; } @Override @@ -276,6 +279,7 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { MakeRightDomainOperation makeRightDomainOperation = (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); + callback.fiber.terminate(null, packet); makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); return super.onSuccess(packet, callResponse); } From 3fb128f14ffc9259bafd6d5ece80bb2f1d720527 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Thu, 10 Jun 2021 22:59:11 +0000 Subject: [PATCH 06/23] Reduce timeout interval to 2 min. --- .../java/oracle/kubernetes/operator/TuningParametersImpl.java | 2 +- .../main/java/oracle/kubernetes/operator/WaitForReadyStep.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java index 749c32f69f7..3ab4ed06bf5 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java @@ -76,7 +76,7 @@ private void update() { (int) readTuningParameter("watchLifetime", 300), (int) readTuningParameter("watchMinimumDelay", 5), (int) readTuningParameter("watchBackstopRecheckDelaySeconds", 5), - (int) readTuningParameter("watchBackstopRecheckCount", 60)); + (int) readTuningParameter("watchBackstopRecheckCount", 24)); PodTuning pod = new PodTuning( diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 35125571b86..843e99bc6f3 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -32,7 +32,7 @@ */ abstract class WaitForReadyStep extends Step { private static final int DEFAULT_RECHECK_SECONDS = 5; - private static final int DEFAULT_RECHECK_COUNT = 60; + private static final int DEFAULT_RECHECK_COUNT = 24; static int getWatchBackstopRecheckDelaySeconds() { return Optional.ofNullable(TuningParameters.getInstance()) From 70eab49be16f5fa2af1f92546ef27263f252c118 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Fri, 11 Jun 2021 00:12:25 +0000 Subject: [PATCH 07/23] Add previously removed event validation. --- .../weblogic/kubernetes/ItParameterizedDomain.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java b/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java index 8bae052c138..96d51d6f6dd 100644 --- a/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java +++ b/integration-tests/src/test/java/oracle/weblogic/kubernetes/ItParameterizedDomain.java @@ -137,6 +137,7 @@ import static oracle.weblogic.kubernetes.utils.DeployUtil.deployUsingWlst; import static oracle.weblogic.kubernetes.utils.FileUtils.doesFileExistInPod; import static oracle.weblogic.kubernetes.utils.K8sEvents.DOMAIN_CHANGED; +import static oracle.weblogic.kubernetes.utils.K8sEvents.DOMAIN_PROCESSING_COMPLETED; import static oracle.weblogic.kubernetes.utils.K8sEvents.DOMAIN_PROCESSING_STARTING; import static oracle.weblogic.kubernetes.utils.K8sEvents.POD_STARTED; import static oracle.weblogic.kubernetes.utils.K8sEvents.POD_TERMINATED; @@ -739,6 +740,16 @@ public void testMultiClustersRollingRestart() { .until(checkDomainEvent(opNamespace, miiDomainNamespace, miiDomainUid, DOMAIN_PROCESSING_STARTING, "Normal", timestamp)); + withStandardRetryPolicy + .conditionEvaluationListener( + condition -> logger.info("Waiting for domain event {0} to be logged " + + "(elapsed time {1}ms, remaining time {2}ms)", + DOMAIN_PROCESSING_COMPLETED, + condition.getElapsedTimeInMS(), + condition.getRemainingTimeInMS())) + .until(checkDomainEvent(opNamespace, miiDomainNamespace, miiDomainUid, + DOMAIN_PROCESSING_COMPLETED, "Normal", timestamp)); + // Verify that pod termination and started events are logged only once for each managed server in each cluster for (int i = 1; i <= NUMBER_OF_CLUSTERS_MIIDOMAIN; i++) { for (int j = 1; j <= replicaCount; j++) { From 4dbbe1c4cead6a4ac9d7fad4144c70a92d9836ef Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Sat, 12 Jun 2021 02:34:41 +0000 Subject: [PATCH 08/23] Changes to check if the cached pod is not found on read then execute make-right domain. --- .../kubernetes/operator/JobWatcher.java | 7 +++++ .../kubernetes/operator/PodWatcher.java | 18 +++++++++++ .../kubernetes/operator/WaitForReadyStep.java | 30 +++++++++++++++---- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index 250792f2552..befbbe21eb8 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -25,7 +25,9 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; +import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; +import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.ResponseStep; import oracle.kubernetes.operator.logging.LoggingFacade; @@ -243,6 +245,11 @@ boolean isReady(V1Job job) { return isComplete(job) || isFailed(job); } + @Override + boolean onReadNotFoundForCachedPod(CallResponse callResponse, DomainPresenceInfo info, String serverName) { + return false; + } + // Ignore modified callbacks from different jobs (identified by having different creation times) or those // where the job is not yet ready. @Override diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index 9f34fd907b9..a3440e82b69 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -28,7 +28,9 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; +import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; +import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.LegalNames; import oracle.kubernetes.operator.helpers.PodHelper; @@ -360,6 +362,17 @@ protected void removeCallback(String podName, Consumer callback) { protected void logWaiting(String name) { LOGGER.fine(MessageKeys.WAITING_FOR_POD_READY, name); } + + @Override + protected boolean onReadNotFoundForCachedPod(CallResponse callResponse, + DomainPresenceInfo info, String serverName) { + if ((info.getServerPod(serverName) != null) && callResponse.getResult() == null) { + // Initial resource is not null but live info is null. + return true; + } + return false; + } + } private class WaitForPodDeleteStep extends WaitForPodStatusStep { @@ -367,6 +380,11 @@ private WaitForPodDeleteStep(V1Pod pod, Step next) { super(pod, next); } + @Override + protected boolean onReadNotFoundForCachedPod(CallResponse callResponse, DomainPresenceInfo info, String name) { + return false; + } + // A pod is considered deleted when reading its value from Kubernetes returns null. @Override protected boolean isReady(V1Pod result) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 843e99bc6f3..2ae28412c5c 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -23,6 +23,7 @@ import oracle.kubernetes.weblogic.domain.model.Domain; import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; +import static oracle.kubernetes.operator.ProcessingConstants.SERVER_NAME; import static oracle.kubernetes.operator.helpers.KubernetesUtils.getDomainUidLabel; /** @@ -73,6 +74,8 @@ static int getWatchBackstopRecheckCount() { */ abstract boolean isReady(T resource); + abstract boolean onReadNotFoundForCachedPod(CallResponse callResponse, DomainPresenceInfo info, String serverName); + /** * Returns true if the callback for this resource should be processed. This is typically used to exclude * resources which have changed but are not yet ready, or else different instances with the same name. @@ -209,12 +212,19 @@ private DefaultResponseStep resumeIfReady(Callback callback) { return new DefaultResponseStep<>(null) { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); - if ((callResponse != null) && (callResponse.getResult() instanceof V1Pod)) { - V1Pod pod = (V1Pod) callResponse.getResult(); - Optional.ofNullable(info) - .ifPresent(i -> i.setServerPodFromEvent(getPodLabel(pod, LabelConstants.SERVERNAME_LABEL), pod)); + if (callResponse != null) { + if (callResponse.getResult() instanceof V1Pod) { + Optional.ofNullable(info) + .ifPresent(i -> i.setServerPodFromEvent(getPodLabel((V1Pod) callResponse.getResult(), + LabelConstants.SERVERNAME_LABEL), (V1Pod) callResponse.getResult())); + } + if (isMakeRightNeeded(callResponse, info, (String)packet.get(SERVER_NAME))) + return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), + info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); } + if (isReady(callResponse.getResult()) || callback.didResume.get()) { callback.proceedFromWait(callResponse.getResult()); Optional.ofNullable(info).ifPresent(i -> i.resetWatchBackstopRecheckCount()); @@ -231,6 +241,14 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { } } + private boolean isMakeRightNeeded(CallResponse callResponse, + DomainPresenceInfo info, String name) { + if ((info != null) && onReadNotFoundForCachedPod(callResponse, info, name)) { + return true; + } + return false; + } + private boolean shouldWait(CallResponse callResponse, DomainPresenceInfo info) { return ((callResponse != null) && (callResponse.getResult() instanceof V1Job)) || (info == null) @@ -276,11 +294,13 @@ private class MakeRightDomainStep extends DefaultResponseStep { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + String name = initialResource == null ? resourceName : getMetadata(initialResource).getName(); MakeRightDomainOperation makeRightDomainOperation = (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); - callback.fiber.terminate(null, packet); makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); + removeCallback(name, callback); + callback.fiber.terminate(null, packet); return super.onSuccess(packet, callResponse); } } From 240130d5f636a0037ada65dbe3fdfb1063bef0a8 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Sat, 12 Jun 2021 03:05:21 +0000 Subject: [PATCH 09/23] Fix checkstyle error. --- .../main/java/oracle/kubernetes/operator/WaitForReadyStep.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 2ae28412c5c..992455b5c96 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -220,9 +220,10 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { .ifPresent(i -> i.setServerPodFromEvent(getPodLabel((V1Pod) callResponse.getResult(), LabelConstants.SERVERNAME_LABEL), (V1Pod) callResponse.getResult())); } - if (isMakeRightNeeded(callResponse, info, (String)packet.get(SERVER_NAME))) + if (isMakeRightNeeded(callResponse, info, (String)packet.get(SERVER_NAME))) { return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); + } } if (isReady(callResponse.getResult()) || callback.didResume.get()) { From b0352574318ff0e453f718a6dacef0d03c44ee69 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Sat, 12 Jun 2021 15:41:11 +0000 Subject: [PATCH 10/23] Minor refactoring and increase the timeout value. --- .../kubernetes/operator/JobWatcher.java | 4 +- .../kubernetes/operator/PodWatcher.java | 14 ++----- .../operator/TuningParametersImpl.java | 2 +- .../kubernetes/operator/WaitForReadyStep.java | 40 ++++++++----------- 4 files changed, 23 insertions(+), 37 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index befbbe21eb8..690aa24f776 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -25,9 +25,7 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; -import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; -import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.ResponseStep; import oracle.kubernetes.operator.logging.LoggingFacade; @@ -246,7 +244,7 @@ boolean isReady(V1Job job) { } @Override - boolean onReadNotFoundForCachedPod(CallResponse callResponse, DomainPresenceInfo info, String serverName) { + boolean onReadNotFoundForCachedResource(V1Job cachedJob, boolean isNotFoundOnRead) { return false; } diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index a3440e82b69..6a17104a16d 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -28,9 +28,7 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; -import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; -import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.LegalNames; import oracle.kubernetes.operator.helpers.PodHelper; @@ -364,13 +362,9 @@ protected void logWaiting(String name) { } @Override - protected boolean onReadNotFoundForCachedPod(CallResponse callResponse, - DomainPresenceInfo info, String serverName) { - if ((info.getServerPod(serverName) != null) && callResponse.getResult() == null) { - // Initial resource is not null but live info is null. - return true; - } - return false; + protected boolean onReadNotFoundForCachedResource(V1Pod cachedPod, boolean isNotFoundOnRead) { + // Return true if cached pod is not null but pod not found in explicit read, false otherwise. + return ((cachedPod != null) && isNotFoundOnRead) ? true : false; } } @@ -381,7 +375,7 @@ private WaitForPodDeleteStep(V1Pod pod, Step next) { } @Override - protected boolean onReadNotFoundForCachedPod(CallResponse callResponse, DomainPresenceInfo info, String name) { + protected boolean onReadNotFoundForCachedResource(V1Pod cachedPod, boolean isNotFoundOnRead) { return false; } diff --git a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java index 3ab4ed06bf5..749c32f69f7 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/TuningParametersImpl.java @@ -76,7 +76,7 @@ private void update() { (int) readTuningParameter("watchLifetime", 300), (int) readTuningParameter("watchMinimumDelay", 5), (int) readTuningParameter("watchBackstopRecheckDelaySeconds", 5), - (int) readTuningParameter("watchBackstopRecheckCount", 24)); + (int) readTuningParameter("watchBackstopRecheckCount", 60)); PodTuning pod = new PodTuning( diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 992455b5c96..64b63d9bf22 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -33,7 +33,7 @@ */ abstract class WaitForReadyStep extends Step { private static final int DEFAULT_RECHECK_SECONDS = 5; - private static final int DEFAULT_RECHECK_COUNT = 24; + private static final int DEFAULT_RECHECK_COUNT = 60; static int getWatchBackstopRecheckDelaySeconds() { return Optional.ofNullable(TuningParameters.getInstance()) @@ -74,7 +74,14 @@ static int getWatchBackstopRecheckCount() { */ abstract boolean isReady(T resource); - abstract boolean onReadNotFoundForCachedPod(CallResponse callResponse, DomainPresenceInfo info, String serverName); + /** + * Returns true if the cached pod is not found during periodic listing. + * @param cachedResource cached resource to check + * @param isNotFoundOnRead Boolean indicating if resource is not found in call response. + * + * @return true if cached pod not found on read + */ + abstract boolean onReadNotFoundForCachedResource(T cachedResource, boolean isNotFoundOnRead); /** * Returns true if the callback for this resource should be processed. This is typically used to exclude @@ -215,14 +222,13 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); if (callResponse != null) { - if (callResponse.getResult() instanceof V1Pod) { - Optional.ofNullable(info) - .ifPresent(i -> i.setServerPodFromEvent(getPodLabel((V1Pod) callResponse.getResult(), - LabelConstants.SERVERNAME_LABEL), (V1Pod) callResponse.getResult())); - } - if (isMakeRightNeeded(callResponse, info, (String)packet.get(SERVER_NAME))) { - return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), + if ((info != null) && (callResponse.getResult() instanceof V1Pod)) { + String serverName = (String)packet.get(SERVER_NAME); + info.setServerPodFromEvent(serverName, (V1Pod) callResponse.getResult()); + if (onReadNotFoundForCachedResource((T) info.getServerPod(serverName), isNotFoundOnRead(callResponse))) { + return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); + } } } @@ -242,12 +248,8 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { } } - private boolean isMakeRightNeeded(CallResponse callResponse, - DomainPresenceInfo info, String name) { - if ((info != null) && onReadNotFoundForCachedPod(callResponse, info, name)) { - return true; - } - return false; + private boolean isNotFoundOnRead(CallResponse callResponse) { + return callResponse.getResult() == null; } private boolean shouldWait(CallResponse callResponse, DomainPresenceInfo info) { @@ -255,14 +257,6 @@ private boolean shouldWait(CallResponse callResponse, DomainPresenceInfo info || (info == null) || (info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount()); } - - private String getPodLabel(V1Pod pod, String labelName) { - return Optional.ofNullable(pod) - .map(V1Pod::getMetadata) - .map(V1ObjectMeta::getLabels) - .map(m -> m.get(labelName)) - .orElse(null); - } }; } From e988050a2189efdf8527b301ffd8bbd9d21ff89a Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Sat, 12 Jun 2021 16:00:43 +0000 Subject: [PATCH 11/23] Fix comments and minor change. --- .../kubernetes/operator/WaitForReadyStep.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 64b63d9bf22..c442211f397 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -75,11 +75,11 @@ static int getWatchBackstopRecheckCount() { abstract boolean isReady(T resource); /** - * Returns true if the cached pod is not found during periodic listing. + * Returns true if the cached resource is not found during periodic listing. * @param cachedResource cached resource to check * @param isNotFoundOnRead Boolean indicating if resource is not found in call response. * - * @return true if cached pod not found on read + * @return true if cached resource not found on read */ abstract boolean onReadNotFoundForCachedResource(T cachedResource, boolean isNotFoundOnRead); @@ -221,14 +221,13 @@ private DefaultResponseStep resumeIfReady(Callback callback) { public NextAction onSuccess(Packet packet, CallResponse callResponse) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); - if (callResponse != null) { - if ((info != null) && (callResponse.getResult() instanceof V1Pod)) { - String serverName = (String)packet.get(SERVER_NAME); + String serverName = (String)packet.get(SERVER_NAME); + if ((info != null) && (callResponse != null)) { + if (callResponse.getResult() instanceof V1Pod) { info.setServerPodFromEvent(serverName, (V1Pod) callResponse.getResult()); - if (onReadNotFoundForCachedResource((T) info.getServerPod(serverName), isNotFoundOnRead(callResponse))) { + } else if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); - } } } @@ -248,6 +247,10 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { } } + private T getServerPod(DomainPresenceInfo info, String serverName) { + return (T) info.getServerPod(serverName); + } + private boolean isNotFoundOnRead(CallResponse callResponse) { return callResponse.getResult() == null; } From b00989866374c820fa65d417e080770ee0fcb06e Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Mon, 14 Jun 2021 00:59:10 +0000 Subject: [PATCH 12/23] Minor changes. --- .../kubernetes/operator/WaitForReadyStep.java | 23 +++++++++++++++---- .../operator/logging/MessageKeys.java | 1 + .../src/main/resources/Operator.properties | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index c442211f397..308e70b68f6 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -15,6 +15,8 @@ import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.ResponseStep; +import oracle.kubernetes.operator.logging.LoggingFacade; +import oracle.kubernetes.operator.logging.LoggingFactory; import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.work.AsyncFiber; import oracle.kubernetes.operator.work.NextAction; @@ -25,6 +27,7 @@ import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; import static oracle.kubernetes.operator.ProcessingConstants.SERVER_NAME; import static oracle.kubernetes.operator.helpers.KubernetesUtils.getDomainUidLabel; +import static oracle.kubernetes.operator.logging.MessageKeys.EXECUTE_MAKE_RIGHT_DOMAIN; /** * This class is the base for steps that must suspend while waiting for a resource to become ready. It is typically @@ -32,6 +35,7 @@ * @param the type of resource handled by this step */ abstract class WaitForReadyStep extends Step { + private static final LoggingFacade LOGGER = LoggingFactory.getLogger("Operator", "Operator"); private static final int DEFAULT_RECHECK_SECONDS = 5; private static final int DEFAULT_RECHECK_COUNT = 60; @@ -221,12 +225,14 @@ private DefaultResponseStep resumeIfReady(Callback callback) { public NextAction onSuccess(Packet packet, CallResponse callResponse) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); - String serverName = (String)packet.get(SERVER_NAME); if ((info != null) && (callResponse != null)) { + String serverName = (String)packet.get(SERVER_NAME); if (callResponse.getResult() instanceof V1Pod) { - info.setServerPodFromEvent(serverName, (V1Pod) callResponse.getResult()); + info.setServerPodFromEvent(getPodLabel((V1Pod) callResponse.getResult(), + LabelConstants.SERVERNAME_LABEL), (V1Pod) callResponse.getResult()); } else if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { - return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); + return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); } } @@ -241,14 +247,23 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { return doDelay(createReadAndIfReadyCheckStep(callback), packet, getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); } else { + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); } } + private String getPodLabel(V1Pod pod, String labelName) { + return Optional.ofNullable(pod) + .map(V1Pod::getMetadata) + .map(V1ObjectMeta::getLabels) + .map(m -> m.get(labelName)) + .orElse(null); + } + private T getServerPod(DomainPresenceInfo info, String serverName) { - return (T) info.getServerPod(serverName); + return Optional.ofNullable(serverName).map(s -> (T) info.getServerPod(s)).orElse(null); } private boolean isNotFoundOnRead(CallResponse callResponse) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java b/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java index b73ee66d0c7..81c8f42234a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java +++ b/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java @@ -140,6 +140,7 @@ public class MessageKeys { public static final String HTTP_REQUEST_GOT_THROWABLE = "WLSKO-0189"; public static final String DOMAIN_ROLL_STARTING = "WLSKO-0190"; public static final String DOMAIN_ROLL_COMPLETED = "WLSKO-0191"; + public static final String EXECUTE_MAKE_RIGHT_DOMAIN="WLSKO-0192"; // domain status messages public static final String DUPLICATE_SERVER_NAME_FOUND = "WLSDO-0001"; diff --git a/operator/src/main/resources/Operator.properties b/operator/src/main/resources/Operator.properties index 87d432e3c77..5d7410355d4 100644 --- a/operator/src/main/resources/Operator.properties +++ b/operator/src/main/resources/Operator.properties @@ -139,6 +139,7 @@ WLSKO-0187=Stop managing namespace {0} WLSKO-0189=HTTP request method {0} to {1} failed with exception {2}. WLSKO-0190=Rolling restart WebLogic server pods in domain {0} because {1} WLSKO-0191=Rolling restart of domain {0} completed +WLSKO-0192=Executing make right domain operation, watch backstop recheck count is {0} # Domain status messages From 9a913de3aa37feea4702cb8ffe251af910a4925c Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Mon, 14 Jun 2021 02:22:36 +0000 Subject: [PATCH 13/23] Fix checkstyle error. --- .../java/oracle/kubernetes/operator/logging/MessageKeys.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java b/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java index 81c8f42234a..94eaa3c1357 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java +++ b/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java @@ -140,7 +140,7 @@ public class MessageKeys { public static final String HTTP_REQUEST_GOT_THROWABLE = "WLSKO-0189"; public static final String DOMAIN_ROLL_STARTING = "WLSKO-0190"; public static final String DOMAIN_ROLL_COMPLETED = "WLSKO-0191"; - public static final String EXECUTE_MAKE_RIGHT_DOMAIN="WLSKO-0192"; + public static final String EXECUTE_MAKE_RIGHT_DOMAIN = "WLSKO-0192"; // domain status messages public static final String DUPLICATE_SERVER_NAME_FOUND = "WLSDO-0001"; From 2093c8fb065983ad6c8750928db172375d5b18af Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Mon, 14 Jun 2021 15:53:38 +0000 Subject: [PATCH 14/23] PR review comments - create new MakeRightDomainOperation instead of using the one in the packet. --- .../oracle/kubernetes/operator/DomainProcessorImpl.java | 8 +------- .../src/main/java/oracle/kubernetes/operator/Main.java | 9 +++++++-- .../kubernetes/operator/MakeRightDomainOperation.java | 2 -- .../oracle/kubernetes/operator/WaitForReadyStep.java | 4 +--- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java index 84bb9798d0e..57164921262 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java @@ -746,7 +746,7 @@ private void addServerToMaps(Map serverHealthMap, */ class MakeRightDomainOperationImpl implements MakeRightDomainOperation { - private DomainPresenceInfo liveInfo; + private final DomainPresenceInfo liveInfo; private boolean explicitRecheck; private boolean deleting; private boolean willInterrupt; @@ -851,12 +851,6 @@ public void setInspectionRun() { inspectionRun = true; } - @Override - public void setLiveInfo(DomainPresenceInfo info) { - this.liveInfo = info; - } - - @Override public boolean wasInspectionRun() { return inspectionRun; diff --git a/operator/src/main/java/oracle/kubernetes/operator/Main.java b/operator/src/main/java/oracle/kubernetes/operator/Main.java index 5282ad38bfb..b6e28fa7be4 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/Main.java +++ b/operator/src/main/java/oracle/kubernetes/operator/Main.java @@ -33,6 +33,7 @@ import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.ClientPool; import oracle.kubernetes.operator.helpers.CrdHelper; +import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.HealthCheckHelper; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.KubernetesVersion; @@ -78,7 +79,7 @@ public class Main { new AtomicReference<>(SystemClock.now()); private static final Semaphore shutdownSignal = new Semaphore(0); private static final int DEFAULT_STUCK_POD_RECHECK_SECONDS = 30; - + private static final Main main; private final MainDelegate delegate; private final StuckPodProcessing stuckPodProcessing; private NamespaceWatcher namespaceWatcher; @@ -103,6 +104,7 @@ private static String getConfiguredServiceAccount() { LOGGER.warning(MessageKeys.EXCEPTION, e); throw new RuntimeException(e); } + main = createMain(getBuildProperties()); } static { @@ -123,6 +125,10 @@ Object getOperatorNamespaceEventWatcher() { return operatorNamespaceEventWatcher; } + public static MakeRightDomainOperation createMakeRightOperation(DomainPresenceInfo info) { + return main.delegate.getDomainProcessor().createMakeRightOperation(info); + } + static class MainDelegateImpl implements MainDelegate, DomainProcessorDelegate { private final String serviceAccountName = Optional.ofNullable(getConfiguredServiceAccount()).orElse("default"); @@ -256,7 +262,6 @@ public ScheduledFuture scheduleWithFixedDelay(Runnable command, long initialD * @param args none, ignored */ public static void main(String[] args) { - Main main = createMain(getBuildProperties()); try { main.startOperator(main::completeBegin); diff --git a/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java b/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java index 4bc55b66c6d..df2414966fe 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java +++ b/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java @@ -36,8 +36,6 @@ public interface MakeRightDomainOperation { void setInspectionRun(); - void setLiveInfo(DomainPresenceInfo info); - boolean wasInspectionRun(); private static boolean wasInspectionRun(Packet packet) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 308e70b68f6..0e3867e8b66 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -24,7 +24,6 @@ import oracle.kubernetes.operator.work.Step; import oracle.kubernetes.weblogic.domain.model.Domain; -import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; import static oracle.kubernetes.operator.ProcessingConstants.SERVER_NAME; import static oracle.kubernetes.operator.helpers.KubernetesUtils.getDomainUidLabel; import static oracle.kubernetes.operator.logging.MessageKeys.EXECUTE_MAKE_RIGHT_DOMAIN; @@ -309,8 +308,7 @@ private class MakeRightDomainStep extends DefaultResponseStep { public NextAction onSuccess(Packet packet, CallResponse callResponse) { String name = initialResource == null ? resourceName : getMetadata(initialResource).getName(); MakeRightDomainOperation makeRightDomainOperation = - (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); - makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); + Main.createMakeRightOperation(new DomainPresenceInfo((Domain)callResponse.getResult())); makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); removeCallback(name, callback); callback.fiber.terminate(null, packet); From b3f23622e29f3e9bca017e68c8107e4ae7f0c64c Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Mon, 14 Jun 2021 19:07:46 +0000 Subject: [PATCH 15/23] Revert previous change to create a new make-right operation and implement a clear method instead. --- .../operator/DomainProcessorImpl.java | 18 +++++++++++++++++- .../java/oracle/kubernetes/operator/Main.java | 9 ++------- .../operator/MakeRightDomainOperation.java | 4 ++++ .../kubernetes/operator/WaitForReadyStep.java | 5 ++++- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java index 57164921262..d259106dee6 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java +++ b/operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java @@ -746,7 +746,7 @@ private void addServerToMaps(Map serverHealthMap, */ class MakeRightDomainOperationImpl implements MakeRightDomainOperation { - private final DomainPresenceInfo liveInfo; + private DomainPresenceInfo liveInfo; private boolean explicitRecheck; private boolean deleting; private boolean willInterrupt; @@ -851,6 +851,22 @@ public void setInspectionRun() { inspectionRun = true; } + @Override + public void setLiveInfo(DomainPresenceInfo info) { + this.liveInfo = info; + } + + @Override + public void clear() { + this.liveInfo = null; + this.eventData = null; + this.explicitRecheck = false; + this.deleting = false; + this.willInterrupt = false; + this.inspectionRun = false; + } + + @Override public boolean wasInspectionRun() { return inspectionRun; diff --git a/operator/src/main/java/oracle/kubernetes/operator/Main.java b/operator/src/main/java/oracle/kubernetes/operator/Main.java index b6e28fa7be4..5282ad38bfb 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/Main.java +++ b/operator/src/main/java/oracle/kubernetes/operator/Main.java @@ -33,7 +33,6 @@ import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.ClientPool; import oracle.kubernetes.operator.helpers.CrdHelper; -import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.HealthCheckHelper; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.KubernetesVersion; @@ -79,7 +78,7 @@ public class Main { new AtomicReference<>(SystemClock.now()); private static final Semaphore shutdownSignal = new Semaphore(0); private static final int DEFAULT_STUCK_POD_RECHECK_SECONDS = 30; - private static final Main main; + private final MainDelegate delegate; private final StuckPodProcessing stuckPodProcessing; private NamespaceWatcher namespaceWatcher; @@ -104,7 +103,6 @@ private static String getConfiguredServiceAccount() { LOGGER.warning(MessageKeys.EXCEPTION, e); throw new RuntimeException(e); } - main = createMain(getBuildProperties()); } static { @@ -125,10 +123,6 @@ Object getOperatorNamespaceEventWatcher() { return operatorNamespaceEventWatcher; } - public static MakeRightDomainOperation createMakeRightOperation(DomainPresenceInfo info) { - return main.delegate.getDomainProcessor().createMakeRightOperation(info); - } - static class MainDelegateImpl implements MainDelegate, DomainProcessorDelegate { private final String serviceAccountName = Optional.ofNullable(getConfiguredServiceAccount()).orElse("default"); @@ -262,6 +256,7 @@ public ScheduledFuture scheduleWithFixedDelay(Runnable command, long initialD * @param args none, ignored */ public static void main(String[] args) { + Main main = createMain(getBuildProperties()); try { main.startOperator(main::completeBegin); diff --git a/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java b/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java index df2414966fe..1ddde62fb9a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java +++ b/operator/src/main/java/oracle/kubernetes/operator/MakeRightDomainOperation.java @@ -36,6 +36,10 @@ public interface MakeRightDomainOperation { void setInspectionRun(); + void setLiveInfo(DomainPresenceInfo info); + + void clear(); + boolean wasInspectionRun(); private static boolean wasInspectionRun(Packet packet) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 0e3867e8b66..ef2ee3eed8f 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -24,6 +24,7 @@ import oracle.kubernetes.operator.work.Step; import oracle.kubernetes.weblogic.domain.model.Domain; +import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; import static oracle.kubernetes.operator.ProcessingConstants.SERVER_NAME; import static oracle.kubernetes.operator.helpers.KubernetesUtils.getDomainUidLabel; import static oracle.kubernetes.operator.logging.MessageKeys.EXECUTE_MAKE_RIGHT_DOMAIN; @@ -308,7 +309,9 @@ private class MakeRightDomainStep extends DefaultResponseStep { public NextAction onSuccess(Packet packet, CallResponse callResponse) { String name = initialResource == null ? resourceName : getMetadata(initialResource).getName(); MakeRightDomainOperation makeRightDomainOperation = - Main.createMakeRightOperation(new DomainPresenceInfo((Domain)callResponse.getResult())); + (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); + makeRightDomainOperation.clear(); + makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); removeCallback(name, callback); callback.fiber.terminate(null, packet); From b4710585d15306f45763b1f39d2b317a2124cdcd Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Mon, 14 Jun 2021 23:16:23 +0000 Subject: [PATCH 16/23] Refactoring changes. --- .../kubernetes/operator/JobWatcher.java | 19 ++++ .../kubernetes/operator/PodWatcher.java | 68 ++++++++++++++- .../kubernetes/operator/WaitForReadyStep.java | 86 +++---------------- 3 files changed, 98 insertions(+), 75 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index 690aa24f776..b57e161ff63 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -12,6 +12,7 @@ import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.annotation.Nonnull; @@ -25,13 +26,16 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; +import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.ResponseStep; import oracle.kubernetes.operator.logging.LoggingFacade; import oracle.kubernetes.operator.logging.LoggingFactory; import oracle.kubernetes.operator.logging.MessageKeys; +import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.watcher.WatchListener; +import oracle.kubernetes.operator.work.NextAction; import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; import oracle.kubernetes.utils.SystemClock; @@ -307,6 +311,21 @@ Throwable createTerminationException(V1Job job) { void logWaiting(String name) { LOGGER.fine(MessageKeys.WAITING_FOR_JOB_READY, name); } + + @Override + protected DefaultResponseStep resumeIfReady(Callback callback) { + return new DefaultResponseStep<>(null) { + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + if (isReady(callResponse.getResult()) || callback.didResume.get()) { + callback.proceedFromWait(callResponse.getResult()); + return doNext(packet); + } + return doDelay(createReadAndIfReadyCheckStep(callback), packet, + getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); + } + }; + } } static class DeadlineExceededException extends Exception { diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index 6a17104a16d..1e155b40123 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.annotation.Nonnull; @@ -28,7 +29,9 @@ import io.kubernetes.client.util.Watchable; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.WatchBuilder; +import oracle.kubernetes.operator.calls.CallResponse; import oracle.kubernetes.operator.helpers.CallBuilder; +import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.KubernetesUtils; import oracle.kubernetes.operator.helpers.LegalNames; import oracle.kubernetes.operator.helpers.PodHelper; @@ -36,9 +39,15 @@ import oracle.kubernetes.operator.logging.LoggingFacade; import oracle.kubernetes.operator.logging.LoggingFactory; import oracle.kubernetes.operator.logging.MessageKeys; +import oracle.kubernetes.operator.steps.DefaultResponseStep; import oracle.kubernetes.operator.watcher.WatchListener; +import oracle.kubernetes.operator.work.NextAction; +import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; +import static oracle.kubernetes.operator.ProcessingConstants.SERVER_NAME; +import static oracle.kubernetes.operator.logging.MessageKeys.EXECUTE_MAKE_RIGHT_DOMAIN; + /** * Watches for changes to pods. */ @@ -322,6 +331,63 @@ V1ObjectMeta getMetadata(V1Pod pod) { Step createReadAsyncStep(String name, String namespace, String domainUid, ResponseStep responseStep) { return new CallBuilder().readPodAsync(name, namespace, domainUid, responseStep); } + + protected DefaultResponseStep resumeIfReady(Callback callback) { + return new DefaultResponseStep<>(null) { + @Override + public NextAction onSuccess(Packet packet, CallResponse callResponse) { + + DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); + if ((info != null) && (callResponse != null)) { + String serverName = (String)packet.get(SERVER_NAME); + info.setServerPodFromEvent(getPodLabel(callResponse.getResult()), callResponse.getResult()); + if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); + return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), + info.getNamespace(), new MakeRightDomainStep(callback,null)), packet); + } + } + + if (isReady(callResponse.getResult()) || callback.didResume.get()) { + callback.proceedFromWait(callResponse.getResult()); + info.resetWatchBackstopRecheckCount(); + return doNext(packet); + } + + if (shouldWait(info)) { + // Watch backstop recheck count is less than or equal to the configured recheck count, delay. + return doDelay(createReadAndIfReadyCheckStep(callback), packet, + getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); + } else { + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); + // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. + return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), + info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); + } + } + + private String getPodLabel(V1Pod pod) { + return Optional.ofNullable(pod) + .map(V1Pod::getMetadata) + .map(V1ObjectMeta::getLabels) + .map(m -> m.get(LabelConstants.SERVERNAME_LABEL)) + .orElse(null); + } + + private V1Pod getServerPod(DomainPresenceInfo info, String serverName) { + return Optional.ofNullable(serverName).map(info::getServerPod).orElse(null); + } + + private boolean isNotFoundOnRead(CallResponse callResponse) { + return callResponse.getResult() == null; + } + + private boolean shouldWait(DomainPresenceInfo info) { + return info == null || (info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount()); + } + }; + } + } private class WaitForPodReadyStep extends WaitForPodStatusStep { @@ -364,7 +430,7 @@ protected void logWaiting(String name) { @Override protected boolean onReadNotFoundForCachedResource(V1Pod cachedPod, boolean isNotFoundOnRead) { // Return true if cached pod is not null but pod not found in explicit read, false otherwise. - return ((cachedPod != null) && isNotFoundOnRead) ? true : false; + return (cachedPod != null) && isNotFoundOnRead; } } diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index ef2ee3eed8f..ca7e7113143 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -4,15 +4,11 @@ package oracle.kubernetes.operator; import java.util.Optional; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; -import io.kubernetes.client.openapi.models.V1Job; import io.kubernetes.client.openapi.models.V1ObjectMeta; -import io.kubernetes.client.openapi.models.V1Pod; import oracle.kubernetes.operator.calls.CallResponse; -import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.ResponseStep; import oracle.kubernetes.operator.logging.LoggingFacade; @@ -25,9 +21,7 @@ import oracle.kubernetes.weblogic.domain.model.Domain; import static oracle.kubernetes.operator.ProcessingConstants.MAKE_RIGHT_DOMAIN_OPERATION; -import static oracle.kubernetes.operator.ProcessingConstants.SERVER_NAME; import static oracle.kubernetes.operator.helpers.KubernetesUtils.getDomainUidLabel; -import static oracle.kubernetes.operator.logging.MessageKeys.EXECUTE_MAKE_RIGHT_DOMAIN; /** * This class is the base for steps that must suspend while waiting for a resource to become ready. It is typically @@ -199,14 +193,16 @@ private void checkUpdatedResource(Packet packet, AsyncFiber fiber, Callback call null); } - private Step createReadAndIfReadyCheckStep(Callback callback) { + Step createReadAndIfReadyCheckStep(Callback callback) { if (initialResource != null) { return createReadAsyncStep(getName(), getNamespace(), getDomainUid(), resumeIfReady(callback)); } else { - return new ReadAndIfReadyCheckStep(getName(), callback, getNext()); + return new ReadAndIfReadyCheckStep(getName(), resumeIfReady(callback), getNext()); } } + protected abstract ResponseStep resumeIfReady(Callback callback); + private String getNamespace() { return getMetadata(initialResource).getNamespace(); } @@ -219,85 +215,27 @@ public String getName() { return initialResource != null ? getMetadata(initialResource).getName() : resourceName; } - private DefaultResponseStep resumeIfReady(Callback callback) { - return new DefaultResponseStep<>(null) { - @Override - public NextAction onSuccess(Packet packet, CallResponse callResponse) { - - DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); - if ((info != null) && (callResponse != null)) { - String serverName = (String)packet.get(SERVER_NAME); - if (callResponse.getResult() instanceof V1Pod) { - info.setServerPodFromEvent(getPodLabel((V1Pod) callResponse.getResult(), - LabelConstants.SERVERNAME_LABEL), (V1Pod) callResponse.getResult()); - } else if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { - LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); - return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), - info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); - } - } - - if (isReady(callResponse.getResult()) || callback.didResume.get()) { - callback.proceedFromWait(callResponse.getResult()); - Optional.ofNullable(info).ifPresent(i -> i.resetWatchBackstopRecheckCount()); - return doNext(packet); - } - if (shouldWait(callResponse, info)) { - // Watch backstop recheck count is less than or equal to the configured recheck count, delay. - return doDelay(createReadAndIfReadyCheckStep(callback), packet, - getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); - } else { - LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); - // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. - return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), - info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); - } - } - - private String getPodLabel(V1Pod pod, String labelName) { - return Optional.ofNullable(pod) - .map(V1Pod::getMetadata) - .map(V1ObjectMeta::getLabels) - .map(m -> m.get(labelName)) - .orElse(null); - } - - private T getServerPod(DomainPresenceInfo info, String serverName) { - return Optional.ofNullable(serverName).map(s -> (T) info.getServerPod(s)).orElse(null); - } - - private boolean isNotFoundOnRead(CallResponse callResponse) { - return callResponse.getResult() == null; - } - - private boolean shouldWait(CallResponse callResponse, DomainPresenceInfo info) { - return ((callResponse != null) && (callResponse.getResult() instanceof V1Job)) - || (info == null) - || (info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount()); - } - }; - } private class ReadAndIfReadyCheckStep extends Step { - private final Callback callback; private final String resourceName; + private final ResponseStep responseStep; - ReadAndIfReadyCheckStep(String resourceName, Callback callback, Step next) { + ReadAndIfReadyCheckStep(String resourceName, ResponseStep responseStep, Step next) { super(next); - this.callback = callback; this.resourceName = resourceName; + this.responseStep = responseStep; } @Override public NextAction apply(Packet packet) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); return doNext(createReadAsyncStep(resourceName, info.getNamespace(), - info.getDomainUid(), resumeIfReady(callback)), packet); + info.getDomainUid(), responseStep), packet); } } - private class MakeRightDomainStep extends DefaultResponseStep { + class MakeRightDomainStep extends DefaultResponseStep { private final Callback callback; MakeRightDomainStep(Callback callback, Step next) { @@ -319,10 +257,10 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { } } - private class Callback implements Consumer { + class Callback implements Consumer { private final AsyncFiber fiber; private final Packet packet; - private final AtomicBoolean didResume = new AtomicBoolean(false); + final AtomicBoolean didResume = new AtomicBoolean(false); Callback(AsyncFiber fiber, Packet packet) { this.fiber = fiber; @@ -338,7 +276,7 @@ public void accept(T resource) { } // The resource has now either completed or failed, so we can continue processing. - private void proceedFromWait(T resource) { + void proceedFromWait(T resource) { removeCallback(getName(), this); if (mayResumeFiber()) { handleResourceReady(fiber, packet, resource); From 7b2bc9f352e95484f25d328798f9318e54347d37 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Tue, 15 Jun 2021 00:17:01 +0000 Subject: [PATCH 17/23] Check for null call result. --- .../src/main/java/oracle/kubernetes/operator/PodWatcher.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index 1e155b40123..54110812796 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -340,7 +340,8 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); if ((info != null) && (callResponse != null)) { String serverName = (String)packet.get(SERVER_NAME); - info.setServerPodFromEvent(getPodLabel(callResponse.getResult()), callResponse.getResult()); + Optional.ofNullable(callResponse.getResult()) + .ifPresent(result -> info.setServerPodFromEvent(getPodLabel(result), result)); if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), From 5a040ccb2d841ced5bfb27d3be55c0b3210d5f6d Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Tue, 15 Jun 2021 01:43:25 +0000 Subject: [PATCH 18/23] More refactoring changes. --- .../oracle/kubernetes/operator/JobWatcher.java | 2 +- .../oracle/kubernetes/operator/PodWatcher.java | 17 ++++++++--------- .../kubernetes/operator/WaitForReadyStep.java | 16 +++++++++++++++- .../operator/helpers/DomainPresenceInfo.java | 13 ------------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index b57e161ff63..31ef8b72902 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -317,7 +317,7 @@ protected DefaultResponseStep resumeIfReady(Callback callback) { return new DefaultResponseStep<>(null) { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { - if (isReady(callResponse.getResult()) || callback.didResume.get()) { + if (isReady(callResponse.getResult()) || callback.didResumeFiber()) { callback.proceedFromWait(callResponse.getResult()); return doNext(packet); } diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index 54110812796..faea26bf9a2 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -340,27 +340,26 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); if ((info != null) && (callResponse != null)) { String serverName = (String)packet.get(SERVER_NAME); - Optional.ofNullable(callResponse.getResult()) - .ifPresent(result -> info.setServerPodFromEvent(getPodLabel(result), result)); + Optional.ofNullable(callResponse.getResult()).ifPresent(result -> + info.setServerPodFromEvent(getPodLabel(result), result)); if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { - LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, callback.getRecheckCount()); return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback,null)), packet); } } - if (isReady(callResponse.getResult()) || callback.didResume.get()) { + if (isReady(callResponse.getResult()) || callback.didResumeFiber()) { callback.proceedFromWait(callResponse.getResult()); - info.resetWatchBackstopRecheckCount(); return doNext(packet); } - if (shouldWait(info)) { + if (shouldWait()) { // Watch backstop recheck count is less than or equal to the configured recheck count, delay. return doDelay(createReadAndIfReadyCheckStep(callback), packet, getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); } else { - LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, info.getWatchBackstopRecheckCount()); + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, callback.getRecheckCount()); // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); @@ -383,8 +382,8 @@ private boolean isNotFoundOnRead(CallResponse callResponse) { return callResponse.getResult() == null; } - private boolean shouldWait(DomainPresenceInfo info) { - return info == null || (info.incrementAndGetWatchBackstopRecheckCount() <= getWatchBackstopRecheckCount()); + private boolean shouldWait() { + return callback.incrementAndGetRecheckCount() <= getWatchBackstopRecheckCount(); } }; } diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index ca7e7113143..74e7c347930 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -5,6 +5,7 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import io.kubernetes.client.openapi.models.V1ObjectMeta; @@ -260,7 +261,8 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { class Callback implements Consumer { private final AsyncFiber fiber; private final Packet packet; - final AtomicBoolean didResume = new AtomicBoolean(false); + private final AtomicBoolean didResume = new AtomicBoolean(false); + private final AtomicInteger recheckCount = new AtomicInteger(0); Callback(AsyncFiber fiber, Packet packet) { this.fiber = fiber; @@ -289,6 +291,18 @@ void proceedFromWait(T resource) { private boolean mayResumeFiber() { return didResume.compareAndSet(false, true); } + + boolean didResumeFiber() { + return didResume.get(); + } + + int incrementAndGetRecheckCount() { + return recheckCount.incrementAndGet(); + } + + int getRecheckCount() { + return recheckCount.get(); + } } private void handleResourceReady(AsyncFiber fiber, Packet packet, T resource) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java index 6eb4c3c14a8..a13b7535308 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java @@ -55,7 +55,6 @@ public class DomainPresenceInfo { private final AtomicBoolean isDeleting = new AtomicBoolean(false); private final AtomicBoolean isPopulated = new AtomicBoolean(false); private final AtomicInteger retryCount = new AtomicInteger(0); - private final AtomicInteger watchBackstopRecheckCount = new AtomicInteger(0); private final AtomicReference> serverStartupInfo; private final AtomicReference> serverShutdownInfo; @@ -560,18 +559,6 @@ int getRetryCount() { return retryCount.get(); } - public void resetWatchBackstopRecheckCount() { - watchBackstopRecheckCount.set(0); - } - - public int incrementAndGetWatchBackstopRecheckCount() { - return watchBackstopRecheckCount.incrementAndGet(); - } - - public int getWatchBackstopRecheckCount() { - return watchBackstopRecheckCount.get(); - } - /** Sets the last completion time to now. */ public void complete() { resetFailureCount(); From b974ff81d563f443af831042083bad53f706bd08 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Tue, 15 Jun 2021 02:57:18 +0000 Subject: [PATCH 19/23] Added debug message. --- .../java/oracle/kubernetes/operator/JobWatcher.java | 2 +- .../java/oracle/kubernetes/operator/PodWatcher.java | 12 +++++++++--- .../kubernetes/operator/logging/MessageKeys.java | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index 31ef8b72902..e7d1c248c4c 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -204,7 +204,7 @@ public void receivedResponse(Watch.Response item) { switch (item.type) { case "ADDED": case "MODIFIED": - dispatchCallback(getJobName(item), item.object); + //dispatchCallback(getJobName(item), item.object); break; case "DELETED": case "ERROR": diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index faea26bf9a2..c49c7063d46 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -47,6 +47,7 @@ import static oracle.kubernetes.operator.ProcessingConstants.SERVER_NAME; import static oracle.kubernetes.operator.logging.MessageKeys.EXECUTE_MAKE_RIGHT_DOMAIN; +import static oracle.kubernetes.operator.logging.MessageKeys.LOG_WAITING_COUNT; /** * Watches for changes to pods. @@ -314,6 +315,8 @@ public Step waitForDelete(V1Pod pod, Step next) { private abstract static class WaitForPodStatusStep extends WaitForReadyStep { + public static final int RECHECK_DEBUG_COUNT = 10; + private WaitForPodStatusStep(V1Pod pod, Step next) { super(pod, next); } @@ -338,12 +341,12 @@ protected DefaultResponseStep resumeIfReady(Callback callback) { public NextAction onSuccess(Packet packet, CallResponse callResponse) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); + String serverName = (String)packet.get(SERVER_NAME); if ((info != null) && (callResponse != null)) { - String serverName = (String)packet.get(SERVER_NAME); Optional.ofNullable(callResponse.getResult()).ifPresent(result -> info.setServerPodFromEvent(getPodLabel(result), result)); if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { - LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, callback.getRecheckCount()); + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, serverName, callback.getRecheckCount()); return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback,null)), packet); } @@ -355,11 +358,14 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { } if (shouldWait()) { + if ((callback.getRecheckCount() % RECHECK_DEBUG_COUNT) == 0) { + LOGGER.fine(LOG_WAITING_COUNT, serverName, callback.getRecheckCount()); + } // Watch backstop recheck count is less than or equal to the configured recheck count, delay. return doDelay(createReadAndIfReadyCheckStep(callback), packet, getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); } else { - LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, callback.getRecheckCount()); + LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, serverName, callback.getRecheckCount()); // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); diff --git a/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java b/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java index 94eaa3c1357..4049ed381ef 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java +++ b/operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java @@ -141,6 +141,7 @@ public class MessageKeys { public static final String DOMAIN_ROLL_STARTING = "WLSKO-0190"; public static final String DOMAIN_ROLL_COMPLETED = "WLSKO-0191"; public static final String EXECUTE_MAKE_RIGHT_DOMAIN = "WLSKO-0192"; + public static final String LOG_WAITING_COUNT = "WLSKO-0193"; // domain status messages public static final String DUPLICATE_SERVER_NAME_FOUND = "WLSDO-0001"; From 0616eb6510596b9c9aaf50cb37c2a70ed73b2da9 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Tue, 15 Jun 2021 03:00:42 +0000 Subject: [PATCH 20/23] Minor changes. --- .../src/main/java/oracle/kubernetes/operator/JobWatcher.java | 2 +- operator/src/main/resources/Operator.properties | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java index e7d1c248c4c..31ef8b72902 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java @@ -204,7 +204,7 @@ public void receivedResponse(Watch.Response item) { switch (item.type) { case "ADDED": case "MODIFIED": - //dispatchCallback(getJobName(item), item.object); + dispatchCallback(getJobName(item), item.object); break; case "DELETED": case "ERROR": diff --git a/operator/src/main/resources/Operator.properties b/operator/src/main/resources/Operator.properties index 5d7410355d4..6d1d85acc8c 100644 --- a/operator/src/main/resources/Operator.properties +++ b/operator/src/main/resources/Operator.properties @@ -139,7 +139,8 @@ WLSKO-0187=Stop managing namespace {0} WLSKO-0189=HTTP request method {0} to {1} failed with exception {2}. WLSKO-0190=Rolling restart WebLogic server pods in domain {0} because {1} WLSKO-0191=Rolling restart of domain {0} completed -WLSKO-0192=Executing make right domain operation, watch backstop recheck count is {0} +WLSKO-0192=Executing make right domain operation, recheck count for server {0} is {1}. +WLSKO-0193=Waiting for server {0} to start, recheck count is {1}. # Domain status messages From 1c3b6a03630232c7f000d574b00f3dcbdc963eee Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Wed, 16 Jun 2021 03:27:27 +0000 Subject: [PATCH 21/23] added unit test to test that wait for ready timeout executes make right domain step. --- .../kubernetes/operator/PodWatcher.java | 11 ++--- .../kubernetes/operator/WaitForReadyStep.java | 41 ++++++++++++++----- .../kubernetes/operator/PodWatcherTest.java | 32 ++++++++++++++- .../kubernetes/operator/WatcherTestBase.java | 23 ++++++++++- .../helpers/TuningParametersStub.java | 2 +- 5 files changed, 90 insertions(+), 19 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index c49c7063d46..66b7e30ff78 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -336,19 +336,20 @@ Step createReadAsyncStep(String name, String namespace, String domainUid, Respon } protected DefaultResponseStep resumeIfReady(Callback callback) { - return new DefaultResponseStep<>(null) { + return new DefaultResponseStep<>(getNext()) { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); String serverName = (String)packet.get(SERVER_NAME); + String resource = initialResource == null ? resourceName : getMetadata(initialResource).getName(); if ((info != null) && (callResponse != null)) { Optional.ofNullable(callResponse.getResult()).ifPresent(result -> info.setServerPodFromEvent(getPodLabel(result), result)); if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, serverName, callback.getRecheckCount()); - return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), - info.getNamespace(), new MakeRightDomainStep(callback,null)), packet); + removeCallback(resource, callback); + return doNext(NEXT_STEP_FACTORY.createMakeDomainRightStep(callback, info, resource, getNext()), packet); } } @@ -366,9 +367,9 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { getWatchBackstopRecheckDelaySeconds(), TimeUnit.SECONDS); } else { LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, serverName, callback.getRecheckCount()); + removeCallback(resource, callback); // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. - return doNext(new CallBuilder().readDomainAsync(info.getDomainUid(), - info.getNamespace(), new MakeRightDomainStep(callback, null)), packet); + return doNext(NEXT_STEP_FACTORY.createMakeDomainRightStep(callback, info, resource, getNext()), packet); } } diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 74e7c347930..455cb4bbe61 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -10,6 +10,7 @@ import io.kubernetes.client.openapi.models.V1ObjectMeta; import oracle.kubernetes.operator.calls.CallResponse; +import oracle.kubernetes.operator.helpers.CallBuilder; import oracle.kubernetes.operator.helpers.DomainPresenceInfo; import oracle.kubernetes.operator.helpers.ResponseStep; import oracle.kubernetes.operator.logging.LoggingFacade; @@ -34,6 +35,15 @@ abstract class WaitForReadyStep extends Step { private static final int DEFAULT_RECHECK_SECONDS = 5; private static final int DEFAULT_RECHECK_COUNT = 60; + static NextStepFactory NEXT_STEP_FACTORY = + (callback, info, name, next) -> createMakeDomainRightStep(callback, info, name, next); + + protected static Step createMakeDomainRightStep(WaitForReadyStep.Callback callback, + DomainPresenceInfo info, String name, Step next) { + return new CallBuilder().readDomainAsync(info.getDomainUid(), + info.getNamespace(), new MakeRightDomainStep(callback, name, null)); + } + static int getWatchBackstopRecheckDelaySeconds() { return Optional.ofNullable(TuningParameters.getInstance()) .map(TuningParameters::getWatchTuning) @@ -48,8 +58,8 @@ static int getWatchBackstopRecheckCount() { .orElse(DEFAULT_RECHECK_COUNT); } - private final T initialResource; - private final String resourceName; + final T initialResource; + final String resourceName; /** * Creates a step which will only proceed once the specified resource is ready. @@ -236,24 +246,26 @@ public NextAction apply(Packet packet) { } - class MakeRightDomainStep extends DefaultResponseStep { - private final Callback callback; + static class MakeRightDomainStep extends DefaultResponseStep { + private final WaitForReadyStep.Callback callback; + private final String name; - MakeRightDomainStep(Callback callback, Step next) { + MakeRightDomainStep(WaitForReadyStep.Callback callback, String name, Step next) { super(next); this.callback = callback; + this.name = name; } @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { - String name = initialResource == null ? resourceName : getMetadata(initialResource).getName(); MakeRightDomainOperation makeRightDomainOperation = (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); - makeRightDomainOperation.clear(); - makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain)callResponse.getResult())); - makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); - removeCallback(name, callback); - callback.fiber.terminate(null, packet); + if (makeRightDomainOperation != null) { + makeRightDomainOperation.clear(); + makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain) callResponse.getResult())); + makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); + } + callback.fiber.terminate(new Exception("timeout exceeded"), packet); return super.onSuccess(packet, callResponse); } } @@ -311,4 +323,11 @@ private void handleResourceReady(AsyncFiber fiber, Packet packet, T resource) { fiber.terminate(createTerminationException(resource), packet); } } + + // an interface to provide a hook for unit testing. + interface NextStepFactory { + Step createMakeDomainRightStep(WaitForReadyStep.Callback callback, + DomainPresenceInfo info, String name, Step next); + } + } diff --git a/operator/src/test/java/oracle/kubernetes/operator/PodWatcherTest.java b/operator/src/test/java/oracle/kubernetes/operator/PodWatcherTest.java index 42e5a915251..fb5aeb53d7c 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/PodWatcherTest.java +++ b/operator/src/test/java/oracle/kubernetes/operator/PodWatcherTest.java @@ -34,7 +34,9 @@ import static oracle.kubernetes.operator.LabelConstants.CREATEDBYOPERATOR_LABEL; import static oracle.kubernetes.operator.LabelConstants.DOMAINUID_LABEL; import static oracle.kubernetes.operator.helpers.LegalNames.DEFAULT_INTROSPECTOR_JOB_NAME_SUFFIX; +import static oracle.kubernetes.operator.logging.MessageKeys.EXECUTE_MAKE_RIGHT_DOMAIN; import static oracle.kubernetes.operator.logging.MessageKeys.INTROSPECTOR_POD_FAILED; +import static oracle.kubernetes.utils.LogMatcher.containsFine; import static oracle.kubernetes.utils.LogMatcher.containsInfo; import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.hasEntry; @@ -69,7 +71,8 @@ public void setUp() throws Exception { private String[] getMessageKeys() { return new String[] { - getPodFailedMessageKey() + getPodFailedMessageKey(), + getMakeRightDomainStepKey() }; } @@ -77,6 +80,10 @@ private String getPodFailedMessageKey() { return INTROSPECTOR_POD_FAILED; } + private String getMakeRightDomainStepKey() { + return EXECUTE_MAKE_RIGHT_DOMAIN; + } + @Override public void receivedResponse(Watch.Response response) { recordCallBack(response); @@ -245,6 +252,16 @@ public void whenPodCreatedAndReadyLater_runNextStep() { assertThat(terminalStep.wasRun(), is(true)); } + @Test + public void whenPodCreatedAndNotReadyAfterTimeout_executeMakeRightDomain() { + executeWaitForReady(); + + testSupport.setTime(10, TimeUnit.SECONDS); + + assertThat(terminalStep.wasRun(), is(true)); + assertThat(logRecords, containsFine(getMakeRightDomainStepKey())); + } + @Test public void whenPodNotReadyLater_dontRunNextStep() { sendPodModifiedWatchAfterWaitForReady(this::dontChangePod); @@ -318,6 +335,19 @@ private void sendPodModifiedWatchAfterResourceCreatedAndWaitForReady(Function... modifiers) { diff --git a/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java b/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java index 567f02e169e..eb8d9af5196 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java +++ b/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java @@ -9,11 +9,15 @@ import java.util.concurrent.atomic.AtomicBoolean; import com.meterware.simplestub.Memento; +import com.meterware.simplestub.StaticStubSupport; import io.kubernetes.client.openapi.models.V1ObjectMeta; import io.kubernetes.client.util.Watch; import oracle.kubernetes.operator.TuningParameters.WatchTuning; import oracle.kubernetes.operator.builders.StubWatchFactory; import oracle.kubernetes.operator.builders.WatchEvent; +import oracle.kubernetes.operator.helpers.DomainPresenceInfo; +import oracle.kubernetes.operator.helpers.TuningParametersStub; +import oracle.kubernetes.operator.work.Step; import oracle.kubernetes.utils.TestUtils; import oracle.kubernetes.utils.TestUtils.ConsoleHandlerMemento; import org.junit.jupiter.api.AfterEach; @@ -80,8 +84,10 @@ protected ConsoleHandlerMemento configureOperatorLogger() { return TestUtils.silenceOperatorLogger().ignoringLoggedExceptions(hasNextException); } - final void addMemento(Memento memento) { + final void addMemento(Memento memento) throws NoSuchFieldException { mementos.add(memento); + mementos.add(TuningParametersStub.install()); + mementos.add(TestStepFactory.install()); } @AfterEach @@ -253,4 +259,19 @@ private Watcher createAndRunWatcher(String nameSpace, AtomicBoolean stopping, } protected abstract Watcher createWatcher(String ns, AtomicBoolean stopping, BigInteger rv); + + static class TestStepFactory implements WaitForReadyStep.NextStepFactory { + + private static TestStepFactory factory = new TestStepFactory(); + + private static Memento install() throws NoSuchFieldException { + return StaticStubSupport.install(WaitForReadyStep.class, "NEXT_STEP_FACTORY", factory); + } + + @Override + public Step createMakeDomainRightStep(WaitForReadyStep.Callback callback, + DomainPresenceInfo info, String name, Step next) { + return next; + } + } } diff --git a/operator/src/test/java/oracle/kubernetes/operator/helpers/TuningParametersStub.java b/operator/src/test/java/oracle/kubernetes/operator/helpers/TuningParametersStub.java index 3cddec8c6dc..e40bb720b4c 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/helpers/TuningParametersStub.java +++ b/operator/src/test/java/oracle/kubernetes/operator/helpers/TuningParametersStub.java @@ -79,7 +79,7 @@ public CallBuilderTuning getCallBuilderTuning() { @Override public WatchTuning getWatchTuning() { - return null; + return new TuningParameters.WatchTuning(30, 0, 5, 1); } @Override From a61ddea56a3654111a4e629f8e0436d17b834d77 Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Wed, 16 Jun 2021 03:30:54 +0000 Subject: [PATCH 22/23] Minor change. --- .../java/oracle/kubernetes/operator/WaitForReadyStep.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index 455cb4bbe61..fb71f0ca62a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -247,6 +247,7 @@ public NextAction apply(Packet packet) { } static class MakeRightDomainStep extends DefaultResponseStep { + public static final String WAIT_TIMEOUT_EXCEEDED = "Wait timeout exceeded"; private final WaitForReadyStep.Callback callback; private final String name; @@ -265,9 +266,10 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { makeRightDomainOperation.setLiveInfo(new DomainPresenceInfo((Domain) callResponse.getResult())); makeRightDomainOperation.withExplicitRecheck().interrupt().execute(); } - callback.fiber.terminate(new Exception("timeout exceeded"), packet); + callback.fiber.terminate(new Exception(WAIT_TIMEOUT_EXCEEDED), packet); return super.onSuccess(packet, callResponse); } + } class Callback implements Consumer { From 23b60ae2bebcc5d70fe555fac0cd1e47f3eaeb0a Mon Sep 17 00:00:00 2001 From: Anil Kedia Date: Wed, 16 Jun 2021 04:22:48 +0000 Subject: [PATCH 23/23] Remove unused variable and potential fix for integration test failure. --- .../java/oracle/kubernetes/operator/PodWatcher.java | 6 +++--- .../oracle/kubernetes/operator/WaitForReadyStep.java | 12 +++++------- .../oracle/kubernetes/operator/WatcherTestBase.java | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java index 66b7e30ff78..de5dc1881e4 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java +++ b/operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java @@ -349,13 +349,13 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { if (onReadNotFoundForCachedResource(getServerPod(info, serverName), isNotFoundOnRead(callResponse))) { LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, serverName, callback.getRecheckCount()); removeCallback(resource, callback); - return doNext(NEXT_STEP_FACTORY.createMakeDomainRightStep(callback, info, resource, getNext()), packet); + return doNext(NEXT_STEP_FACTORY.createMakeDomainRightStep(callback, info, getNext()), packet); } } if (isReady(callResponse.getResult()) || callback.didResumeFiber()) { callback.proceedFromWait(callResponse.getResult()); - return doNext(packet); + return null; } if (shouldWait()) { @@ -369,7 +369,7 @@ public NextAction onSuccess(Packet packet, CallResponse callResponse) { LOGGER.fine(EXECUTE_MAKE_RIGHT_DOMAIN, serverName, callback.getRecheckCount()); removeCallback(resource, callback); // Watch backstop recheck count is more than configured recheck count, proceed to make-right step. - return doNext(NEXT_STEP_FACTORY.createMakeDomainRightStep(callback, info, resource, getNext()), packet); + return doNext(NEXT_STEP_FACTORY.createMakeDomainRightStep(callback, info, getNext()), packet); } } diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index fb71f0ca62a..392028e5b52 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -36,12 +36,12 @@ abstract class WaitForReadyStep extends Step { private static final int DEFAULT_RECHECK_COUNT = 60; static NextStepFactory NEXT_STEP_FACTORY = - (callback, info, name, next) -> createMakeDomainRightStep(callback, info, name, next); + (callback, info, next) -> createMakeDomainRightStep(callback, info, next); protected static Step createMakeDomainRightStep(WaitForReadyStep.Callback callback, - DomainPresenceInfo info, String name, Step next) { + DomainPresenceInfo info, Step next) { return new CallBuilder().readDomainAsync(info.getDomainUid(), - info.getNamespace(), new MakeRightDomainStep(callback, name, null)); + info.getNamespace(), new MakeRightDomainStep(callback, null)); } static int getWatchBackstopRecheckDelaySeconds() { @@ -249,12 +249,10 @@ public NextAction apply(Packet packet) { static class MakeRightDomainStep extends DefaultResponseStep { public static final String WAIT_TIMEOUT_EXCEEDED = "Wait timeout exceeded"; private final WaitForReadyStep.Callback callback; - private final String name; - MakeRightDomainStep(WaitForReadyStep.Callback callback, String name, Step next) { + MakeRightDomainStep(WaitForReadyStep.Callback callback, Step next) { super(next); this.callback = callback; - this.name = name; } @Override @@ -329,7 +327,7 @@ private void handleResourceReady(AsyncFiber fiber, Packet packet, T resource) { // an interface to provide a hook for unit testing. interface NextStepFactory { Step createMakeDomainRightStep(WaitForReadyStep.Callback callback, - DomainPresenceInfo info, String name, Step next); + DomainPresenceInfo info, Step next); } } diff --git a/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java b/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java index eb8d9af5196..a19ea254be4 100644 --- a/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java +++ b/operator/src/test/java/oracle/kubernetes/operator/WatcherTestBase.java @@ -270,7 +270,7 @@ private static Memento install() throws NoSuchFieldException { @Override public Step createMakeDomainRightStep(WaitForReadyStep.Callback callback, - DomainPresenceInfo info, String name, Step next) { + DomainPresenceInfo info, Step next) { return next; } }