Skip to content

Commit 7e5c723

Browse files
committed
Cherry-pick branch 'owls-112700-cohchk-stopscript' into 'main'
1 parent 756cb63 commit 7e5c723

File tree

15 files changed

+175
-6
lines changed

15 files changed

+175
-6
lines changed

documentation/domains/Cluster.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@
376376
"Shutdown": {
377377
"type": "object",
378378
"properties": {
379+
"skipWaitingCohEndangeredState": {
380+
"default": false,
381+
"description": "For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false.",
382+
"type": "boolean"
383+
},
379384
"ignoreSessions": {
380385
"default": false,
381386
"description": "For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false.",

documentation/domains/Cluster.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ The specification of the operation of the WebLogic cluster. Required.
115115
| --- | --- | --- |
116116
| `ignoreSessions` | Boolean | For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false. |
117117
| `shutdownType` | string | Specifies how the operator will shut down server instances. Legal values are `Graceful` and `Forced`. Defaults to `Graceful`. |
118+
| `skipWaitingCohEndangeredState` | Boolean | For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false. |
118119
| `timeoutSeconds` | integer | For graceful shutdown only, number of seconds to wait before aborting in-flight work and shutting down the server. Defaults to 30 seconds. |
119120
| `waitForAllSessions` | Boolean | For graceful shutdown only, set to true to wait for all HTTP sessions during in-flight work handling; false to wait for non-persisted HTTP sessions only. Defaults to false. |
120121

documentation/domains/Domain.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,11 @@
11871187
"Shutdown": {
11881188
"type": "object",
11891189
"properties": {
1190+
"skipWaitingCohEndangeredState": {
1191+
"default": false,
1192+
"description": "For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false.",
1193+
"type": "boolean"
1194+
},
11901195
"ignoreSessions": {
11911196
"default": false,
11921197
"description": "For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false.",

documentation/domains/Domain.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ The current status of the operation of the WebLogic domain. Updated automaticall
278278
| --- | --- | --- |
279279
| `ignoreSessions` | Boolean | For graceful shutdown only, indicates to ignore pending HTTP sessions during in-flight work handling. Defaults to false. |
280280
| `shutdownType` | string | Specifies how the operator will shut down server instances. Legal values are `Graceful` and `Forced`. Defaults to `Graceful`. |
281+
| `skipWaitingCohEndangeredState` | Boolean | For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus in safe state before shutdown. By default, the operator will wait until it is safe to shutdown the Coherence Cache Cluster. Defaults to false. |
281282
| `timeoutSeconds` | integer | For graceful shutdown only, number of seconds to wait before aborting in-flight work and shutting down the server. Defaults to 30 seconds. |
282283
| `waitForAllSessions` | Boolean | For graceful shutdown only, set to true to wait for all HTTP sessions during in-flight work handling; false to wait for non-persisted HTTP sessions only. Defaults to false. |
283284

kubernetes/crd/cluster-crd.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1
55
kind: CustomResourceDefinition
66
metadata:
77
annotations:
8-
weblogic.sha256: 9ac551460201264f4eb69fabd7c54120c2e30492143ad81f47965567f2a7ae79
8+
weblogic.sha256: 7146de067298f75bf803e2c8acf9f88980d1d5770e375122bcbb1fe4e6f9a64b
99
name: clusters.weblogic.oracle
1010
spec:
1111
group: weblogic.oracle
@@ -2390,6 +2390,14 @@ spec:
23902390
description: Configures how the operator should shut down the
23912391
server instance.
23922392
properties:
2393+
skipWaitingCohEndangeredState:
2394+
default: false
2395+
description: For graceful shutdown only, set to true to skip
2396+
waiting for Coherence Cache Cluster service MBean HAStatus
2397+
in safe state before shutdown. By default, the operator
2398+
will wait until it is safe to shutdown the Coherence Cache
2399+
Cluster. Defaults to false.
2400+
type: boolean
23932401
ignoreSessions:
23942402
default: false
23952403
description: For graceful shutdown only, indicates to ignore

kubernetes/crd/domain-crd.yaml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1
55
kind: CustomResourceDefinition
66
metadata:
77
annotations:
8-
weblogic.sha256: 1b5fbb6128eb44897c07ef1304cf64bc8682f7f4ef2454dde6e861a04941b635
8+
weblogic.sha256: d33dc80911f1901a703fe95438c2e4fc39271775bfecddd35893f8422d97caa3
99
name: domains.weblogic.oracle
1010
spec:
1111
group: weblogic.oracle
@@ -3331,6 +3331,14 @@ spec:
33313331
description: Configures how the operator should shut down
33323332
the server instance.
33333333
properties:
3334+
skipWaitingCohEndangeredState:
3335+
default: false
3336+
description: For graceful shutdown only, set to true to
3337+
skip waiting for Coherence Cache Cluster service MBean
3338+
HAStatus in safe state before shutdown. By default,
3339+
the operator will wait until it is safe to shutdown
3340+
the Coherence Cache Cluster. Defaults to false.
3341+
type: boolean
33343342
ignoreSessions:
33353343
default: false
33363344
description: For graceful shutdown only, indicates to
@@ -6414,6 +6422,14 @@ spec:
64146422
description: Configures how the operator should shut down the
64156423
server instance.
64166424
properties:
6425+
skipWaitingCohEndangeredState:
6426+
default: false
6427+
description: For graceful shutdown only, set to true to skip
6428+
waiting for Coherence Cache Cluster service MBean HAStatus
6429+
in safe state before shutdown. By default, the operator
6430+
will wait until it is safe to shutdown the Coherence Cache
6431+
Cluster. Defaults to false.
6432+
type: boolean
64176433
ignoreSessions:
64186434
default: false
64196435
description: For graceful shutdown only, indicates to ignore
@@ -9155,6 +9171,14 @@ spec:
91559171
the server instance.
91569172
type: object
91579173
properties:
9174+
skipWaitingCohEndangeredState:
9175+
default: false
9176+
description: For graceful shutdown only, set to true
9177+
to skip waiting for Coherence Cache Cluster service
9178+
MBean HAStatus in safe state before shutdown. By default,
9179+
the operator will wait until it is safe to shutdown
9180+
the Coherence Cache Cluster. Defaults to false.
9181+
type: boolean
91589182
ignoreSessions:
91599183
default: false
91609184
description: For graceful shutdown only, indicates to

operator/src/main/java/oracle/kubernetes/operator/helpers/PodStepContext.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,11 @@ private void updateEnvForShutdown(List<V1EnvVar> env) {
613613
addDefaultEnvVarIfMissing(env, "SHUTDOWN_WAIT_FOR_ALL_SESSIONS",
614614
String.valueOf(shutdown.getWaitForAllSessions()));
615615
}
616+
if (!shutdown.getSkipWaitingCohEndangeredState()
617+
.equals(Shutdown.DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE)) {
618+
addDefaultEnvVarIfMissing(env, "SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE",
619+
String.valueOf(shutdown.getSkipWaitingCohEndangeredState()));
620+
}
616621
}
617622

618623
private Shutdown getShutdownSpec() {

operator/src/main/java/oracle/kubernetes/operator/steps/ShutdownManagedServerStep.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,16 @@ protected PortDetails getPortDetails() {
221221
private Integer getWlsServerPort() {
222222
Integer listenPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getListenPort)
223223
.orElse(null);
224-
224+
Integer adminPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getAdminPort)
225+
.orElse(null);
226+
Integer sslListenPort = Optional.ofNullable(getWlsServerConfig()).map(WlsServerConfig::getSslListenPort)
227+
.orElse(null);
228+
if (adminPort != null) {
229+
return adminPort;
230+
}
231+
if (sslListenPort != null) {
232+
return sslListenPort;
233+
}
225234
if (listenPort == null) {
226235
// This can only happen if the running server pod does not exist in the WLS Domain.
227236
// This is a rare case where the server was deleted from the WLS Domain config.

operator/src/main/java/oracle/kubernetes/weblogic/domain/model/BaseConfiguration.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,12 @@ void addLimitRequirement(String resource, String quantity) {
274274
serverPod.addLimitRequirement(resource, quantity);
275275
}
276276

277+
void setShutdown(Shutdown shutdown) {
278+
serverPod.setShutdown(shutdown.getShutdownType(),
279+
shutdown.getTimeoutSeconds(), shutdown.getIgnoreSessions(), shutdown.getWaitForAllSessions(),
280+
shutdown.getSkipWaitingCohEndangeredState());
281+
}
282+
277283
V1PodSecurityContext getPodSecurityContext() {
278284
return Optional.ofNullable(serverPod.getPodSecurityContext()).orElse(getDefaultPodSecurityContext());
279285
}

operator/src/main/java/oracle/kubernetes/weblogic/domain/model/ServerPod.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,12 +362,14 @@ Shutdown getShutdown() {
362362
return this.shutdown;
363363
}
364364

365-
void setShutdown(ShutdownType shutdownType, Long timeoutSeconds, Boolean ignoreSessions, Boolean waitForAllSessions) {
365+
void setShutdown(ShutdownType shutdownType, Long timeoutSeconds, Boolean ignoreSessions, Boolean waitForAllSessions,
366+
Boolean skipWaitingCohEndangeredState) {
366367
this.shutdown
367368
.shutdownType(shutdownType)
368369
.timeoutSeconds(timeoutSeconds)
369370
.ignoreSessions(ignoreSessions)
370-
.waitForAllSessions(waitForAllSessions);
371+
.waitForAllSessions(waitForAllSessions)
372+
.skipWaitingCohEndangeredState(skipWaitingCohEndangeredState);
371373
}
372374

373375
ProbeTuning getReadinessProbeTuning() {

operator/src/main/java/oracle/kubernetes/weblogic/domain/model/Shutdown.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ public class Shutdown {
1717
public static final Long DEFAULT_TIMEOUT = 30L;
1818
public static final Boolean DEFAULT_IGNORESESSIONS = Boolean.FALSE;
1919
public static final Boolean DEFAULT_WAIT_FOR_ALL_SESSIONS = Boolean.FALSE;
20+
public static final Boolean DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE = Boolean.FALSE;
2021

2122
@Description(
2223
"Specifies how the operator will shut down server instances."
@@ -44,6 +45,14 @@ public class Shutdown {
4445
@Default(boolDefault = false)
4546
private Boolean waitForAllSessions;
4647

48+
@Description(
49+
"For graceful shutdown only, set to true to skip waiting for Coherence Cache Cluster service MBean HAStatus"
50+
+ " in safe state before shutdown. By default, the operator will wait until it is"
51+
+ " safe to shutdown the Coherence Cache Cluster."
52+
+ " Defaults to false.")
53+
@Default(boolDefault = false)
54+
private Boolean skipWaitingCohEndangeredState;
55+
4756
void copyValues(Shutdown fromShutdown) {
4857
if (shutdownType == null) {
4958
shutdownType(fromShutdown.shutdownType);
@@ -58,6 +67,10 @@ void copyValues(Shutdown fromShutdown) {
5867
if (waitForAllSessions == null) {
5968
waitForAllSessions(fromShutdown.waitForAllSessions);
6069
}
70+
71+
if (skipWaitingCohEndangeredState == null) {
72+
skipWaitingCohEndangeredState(fromShutdown.skipWaitingCohEndangeredState);
73+
}
6174
}
6275

6376
public ShutdownType getShutdownType() {
@@ -96,13 +109,24 @@ public Shutdown waitForAllSessions(Boolean waitForAllSessions) {
96109
return this;
97110
}
98111

112+
public Boolean getSkipWaitingCohEndangeredState() {
113+
return Optional.ofNullable(skipWaitingCohEndangeredState)
114+
.orElse(DEFAULT_SKIP_WAIT_COH_ENDANGERED_STATE);
115+
}
116+
117+
public Shutdown skipWaitingCohEndangeredState(Boolean skipWaitingCohEndangeredState) {
118+
this.skipWaitingCohEndangeredState = skipWaitingCohEndangeredState;
119+
return this;
120+
}
121+
99122
@Override
100123
public String toString() {
101124
return new ToStringBuilder(this)
102125
.append("shutdownType", shutdownType)
103126
.append("timeoutSeconds", timeoutSeconds)
104127
.append("ignoreSessions", ignoreSessions)
105128
.append("waitForAllSessions", waitForAllSessions)
129+
.append("skipWaitingCohEndangeredState", skipWaitingCohEndangeredState)
106130
.toString();
107131
}
108132

@@ -123,6 +147,7 @@ public boolean equals(Object o) {
123147
.append(timeoutSeconds, that.timeoutSeconds)
124148
.append(ignoreSessions, that.ignoreSessions)
125149
.append(waitForAllSessions, that.waitForAllSessions)
150+
.append(skipWaitingCohEndangeredState, that.skipWaitingCohEndangeredState)
126151
.isEquals();
127152
}
128153

@@ -133,6 +158,7 @@ public int hashCode() {
133158
.append(timeoutSeconds)
134159
.append(ignoreSessions)
135160
.append(waitForAllSessions)
161+
.append(skipWaitingCohEndangeredState)
136162
.toHashCode();
137163
}
138164
}

operator/src/main/resources/scripts/stop-server.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,9 @@ def checkCoherenceClusterExist(configData):
106106
def waitUntilCoherenceSafe():
107107
print ('Shutdown: getting all service Coherence MBeans')
108108
query='Coherence:type=PartitionAssignment,service=*,*'
109-
109+
if 'SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE' in os.environ and \
110+
os.environ['SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE'] == 'true':
111+
return
110112
# By default, Coherence will use a single WebLogic Runtime MBean server to managed
111113
# its MBeans. That server will correspond to the current Coherence senior member,
112114
# which means that the Coherence MBeans will migrate to the oldest cluster member
@@ -169,6 +171,10 @@ def waitUntilServiceSafeToShutdown(objectName):
169171
if status != "ENDANGERED":
170172
break
171173

174+
safe_status_ha = _checkCacheSafeStatusHA(objectName)
175+
if safe_status_ha:
176+
break
177+
172178
# Coherence caches are ENDANGERED meaning that we may lose data
173179
print ('Shutdown: Waiting until it is safe to shutdown Coherence server ...')
174180
systime.sleep(5)
@@ -180,6 +186,57 @@ def waitUntilServiceSafeToShutdown(objectName):
180186
systime.sleep(10)
181187
pass
182188

189+
# Checking the Cache is safe to shutdown. The logic is incorporated from Coherence Operator
190+
# OperatorRestServer.areCacheServicesHA.
191+
192+
def _checkCacheSafeStatusHA(objectName):
193+
try:
194+
fields = ["StorageEnabled", "StorageEnabledCount", "BackupCount", "PartitionsAll",
195+
"OwnedPartitionsPrimary", "StatusHA", "OutgoingTransferCount", "PartitionsEndangered", "PartitionsVulnerable" ]
196+
197+
if objectName.getKeyProperty('service') == 'PartitionedCache':
198+
partitioned_cache_query = "Coherence:name=PartitionedCache,type=Service,*"
199+
partitioned_cache_mbeans = mbs.queryMBeans(ObjectName(partitioned_cache_query), None)
200+
for item in partitioned_cache_mbeans:
201+
fields_dict = {}
202+
attrs = mbs.getAttributes(item.getObjectName(), fields)
203+
for attr in attrs:
204+
fields_dict.update({ attr.getName(): attr.getValue()})
205+
206+
storage_enabled = fields_dict['StorageEnabled']
207+
storage_enabled_count = fields_dict['StorageEnabledCount']
208+
backup_count = fields_dict['BackupCount']
209+
status_ha = fields_dict['StatusHA']
210+
outgoing_transfer_count = fields_dict['OutgoingTransferCount']
211+
partitions_endangered = fields_dict['PartitionsEndangered']
212+
partitions_vulnerable = fields_dict['PartitionsVulnerable']
213+
partitions_all = fields_dict['PartitionsAll']
214+
owned_partitions_primary = fields_dict['OwnedPartitionsPrimary']
215+
216+
if storage_enabled:
217+
if storage_enabled_count > 1 and backup_count > 0 and "ENDANGERED" == status_ha:
218+
print("StatusHA check failed. Service %s has HA status of %s" % (objectName, status_ha))
219+
return False
220+
221+
if outgoing_transfer_count > 0:
222+
print("StatusHA check failed. Service %s distribution in progress" % (objectName))
223+
return False
224+
225+
persistent_query = "Coherence:service=PartitionedCache,type=Persistence,*"
226+
persistent_mbeans = mbs.queryMBeans(ObjectName(persistent_query), None)
227+
if persistent_mbeans.size() == 1:
228+
idle = mbs.getAttribute(persistent_mbeans[0].getObjectName(), 'Idle')
229+
if not idle:
230+
print("StatusHA check failed. Service %s persistence is not idle" % (objectName))
231+
return False
232+
233+
except:
234+
print("Error _checkCacheSafeStatusHA function:")
235+
dumpStack()
236+
237+
return True
238+
239+
183240

184241
#----------------------------------
185242
# Main script

operator/src/test/java/oracle/kubernetes/operator/helpers/ManagedPodHelperTest.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import oracle.kubernetes.operator.work.Step.StepAndPacket;
3636
import oracle.kubernetes.weblogic.domain.DomainConfigurator;
3737
import oracle.kubernetes.weblogic.domain.ServerConfigurator;
38+
import oracle.kubernetes.weblogic.domain.model.Shutdown;
3839
import org.hamcrest.Description;
3940
import org.hamcrest.TypeSafeDiagnosingMatcher;
4041
import org.junit.jupiter.api.Disabled;
@@ -1293,6 +1294,15 @@ void whenOnlyAdminAndSslPortsAvailable_monitoringExporterSpecifiesAdminPort() {
12931294
both(hasJavaOption("-DWLS_PORT=8001")).and(hasJavaOption("-DWLS_SECURE=true")));
12941295
}
12951296

1297+
@Test
1298+
void whenDomainSetShutdownSkippingCoherenceEndangeredStateHasEnvSet() {
1299+
Shutdown shutdown = new Shutdown();
1300+
shutdown.skipWaitingCohEndangeredState(true);
1301+
getConfigurator().withServerPodShutdownSpec(shutdown);
1302+
assertThat(
1303+
getCreatedPodSpecContainer().getEnv(),
1304+
allOf(hasEnvVar("SHUTDOWN_SKIP_WAIT_COH_ENDANGERED_STATE", "true")));
1305+
}
12961306

12971307
@Override
12981308
void setServerPort(int port) {

operator/src/test/java/oracle/kubernetes/weblogic/domain/DomainConfigurator.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import oracle.kubernetes.weblogic.domain.model.DomainSpec;
3333
import oracle.kubernetes.weblogic.domain.model.InitializeDomainOnPV;
3434
import oracle.kubernetes.weblogic.domain.model.Model;
35+
import oracle.kubernetes.weblogic.domain.model.Shutdown;
3536

3637
/**
3738
* Configures a domain, adding settings independently of the version of the domain representation.
@@ -424,6 +425,8 @@ public abstract DomainConfigurator withFluentdConfiguration(boolean watchIntrosp
424425
String credentialName, String fluentdConfig,
425426
List<String> args, List<String> command);
426427

428+
public abstract DomainConfigurator withServerPodShutdownSpec(Shutdown shutdown);
429+
427430
/**
428431
* Adds a default server configuration to the domain, if not already present.
429432
*

operator/src/test/java/oracle/kubernetes/weblogic/domain/model/DomainCommonConfigurator.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,12 @@ public DomainConfigurator withFluentdConfiguration(boolean watchIntrospectorLog,
239239
return this;
240240
}
241241

242+
@Override
243+
public DomainConfigurator withServerPodShutdownSpec(Shutdown shutdown) {
244+
getDomainSpec().setShutdown(shutdown);
245+
return this;
246+
}
247+
242248
private AdminServer getOrCreateAdminServer() {
243249
return getDomainSpec().getOrCreateAdminServer();
244250
}
@@ -821,6 +827,7 @@ public ServerConfigurator withPriorityClassName(String priorityClassName) {
821827
getDomainSpec().setPriorityClassName(priorityClassName);
822828
return this;
823829
}
830+
824831
}
825832

826833
class ClusterConfiguratorImpl implements ClusterConfigurator {

0 commit comments

Comments
 (0)