From d5de1f9d114808ef415e799c687efec2ea0f56f0 Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Mon, 20 Feb 2023 09:56:38 -0500 Subject: [PATCH 1/4] Make use of new keepalive messages from ansible-runner Make setting API configurable and process keepalive events when seen in the event callback Use env var in pod spec and make it specific to K8S --- awx/main/conf.py | 10 ++++++++++ awx/main/tasks/callback.py | 2 ++ awx/main/tasks/receptor.py | 4 ++++ awx/settings/defaults.py | 5 +++++ 4 files changed, 21 insertions(+) diff --git a/awx/main/conf.py b/awx/main/conf.py index dab0543a1a8b..99b995b113d5 100644 --- a/awx/main/conf.py +++ b/awx/main/conf.py @@ -282,6 +282,16 @@ placeholder={'HTTP_PROXY': 'myproxy.local:8080'}, ) +register( + 'AWX_RUNNER_KEEPALIVE_SECONDS', + field_class=fields.IntegerField, + label=_('K8S Ansible Runner Keep-Alive Message Interval'), + help_text=_('Only applies to K8S deployments and container_group jobs. If not 0, send a message every so-many seconds to keep connection open.'), + category=_('Jobs'), + category_slug='jobs', + placeholder=240, # intended to be under common 5 minute idle timeout +) + register( 'GALAXY_TASK_ENV', field_class=fields.KeyValueField, diff --git a/awx/main/tasks/callback.py b/awx/main/tasks/callback.py index 92bfc4036882..0046d07d823b 100644 --- a/awx/main/tasks/callback.py +++ b/awx/main/tasks/callback.py @@ -85,6 +85,8 @@ def event_handler(self, event_data): # which generate job events from two 'streams': # ansible-inventory and the awx.main.commands.inventory_import # logger + if event_data.get('event') == 'keepalive': + return if event_data.get(self.event_data_key, None): if self.event_data_key != 'job_id': diff --git a/awx/main/tasks/receptor.py b/awx/main/tasks/receptor.py index 006c805943c8..9cb4d49efe53 100644 --- a/awx/main/tasks/receptor.py +++ b/awx/main/tasks/receptor.py @@ -526,6 +526,10 @@ def pod_definition(self): pod_spec['spec']['containers'][0]['image'] = ee.image pod_spec['spec']['containers'][0]['args'] = ['ansible-runner', 'worker', '--private-data-dir=/runner'] + if settings.AWX_RUNNER_KEEPALIVE_SECONDS: + pod_spec['spec']['containers'][0].setdefault('env', []) + pod_spec['spec']['containers'][0]['env'].append({'name': 'ANSIBLE_RUNNER_KEEPALIVE_SECONDS', 'value': str(settings.AWX_RUNNER_KEEPALIVE_SECONDS)}) + # Enforce EE Pull Policy pull_options = {"always": "Always", "missing": "IfNotPresent", "never": "Never"} if self.task and self.task.instance.execution_environment: diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 4d18540bcda6..74a36b3e2de7 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -929,6 +929,11 @@ # Allow ansible-runner to save ansible output (may cause performance issues) AWX_RUNNER_SUPPRESS_OUTPUT_FILE = True +# https://github.com/ansible/ansible-runner/pull/1191/files +# Interval in seconds between the last message and keep-alive messages that +# ansible-runner will send +AWX_RUNNER_KEEPALIVE_SECONDS = 0 + # Delete completed work units in receptor RECEPTOR_RELEASE_WORK = True From 6fa22f5be29262d52e9ee95bc37dc133ccb54de9 Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Tue, 21 Feb 2023 16:17:48 -0500 Subject: [PATCH 2/4] Add UI for the new setting --- .../screens/Setting/Jobs/JobsEdit/JobsEdit.js | 5 +++++ .../Setting/Jobs/JobsEdit/JobsEdit.test.js | 1 + .../shared/data.allSettingOptions.json | 19 +++++++++++++++++++ .../Setting/shared/data.allSettings.json | 1 + .../Setting/shared/data.jobSettings.json | 1 + 5 files changed, 27 insertions(+) diff --git a/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.js b/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.js index 52e216e41efa..d258fdec4630 100644 --- a/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.js +++ b/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.js @@ -150,6 +150,11 @@ function JobsEdit() { type={options?.SCHEDULE_MAX_JOBS ? 'number' : undefined} isRequired={Boolean(options?.SCHEDULE_MAX_JOBS)} /> + ', () => { const { EVENT_STDOUT_MAX_BYTES_DISPLAY, STDOUT_MAX_BYTES_DISPLAY, + AWX_RUNNER_KEEPALIVE_SECONDS, ...jobRequest } = mockJobSettings; expect(SettingsAPI.updateAll).toHaveBeenCalledWith(jobRequest); diff --git a/awx/ui/src/screens/Setting/shared/data.allSettingOptions.json b/awx/ui/src/screens/Setting/shared/data.allSettingOptions.json index b654d1bd90d0..3eaf93eff035 100644 --- a/awx/ui/src/screens/Setting/shared/data.allSettingOptions.json +++ b/awx/ui/src/screens/Setting/shared/data.allSettingOptions.json @@ -344,6 +344,16 @@ "category_slug": "jobs", "default": 10 }, + "AWX_RUNNER_KEEPALIVE_SECONDS": { + "type": "integer", + "required": true, + "label": "K8S Ansible Runner Keep-Alive Message Interval", + "help_text": "Only applies to K8S deployments and container_group jobs. If not 0, send a message every so-many seconds to keep connection open.", + "category": "Jobs", + "category_slug": "jobs", + "placeholder": 240, + "default": 0 + }, "AWX_ANSIBLE_CALLBACK_PLUGINS": { "type": "list", "required": false, @@ -4098,6 +4108,15 @@ "category_slug": "jobs", "defined_in_file": false }, + "AWX_RUNNER_KEEPALIVE_SECONDS": { + "type": "integer", + "label": "K8S Ansible Runner Keep-Alive Message Interval", + "help_text": "Only applies to K8S deployments and container_group jobs. If not 0, send a message every so-many seconds to keep connection open.", + "category": "Jobs", + "category_slug": "jobs", + "placeholder": 240, + "default": 0 + }, "AWX_ANSIBLE_CALLBACK_PLUGINS": { "type": "list", "label": "Ansible Callback Plugins", diff --git a/awx/ui/src/screens/Setting/shared/data.allSettings.json b/awx/ui/src/screens/Setting/shared/data.allSettings.json index e5136f4b5842..b2eaea2a1245 100644 --- a/awx/ui/src/screens/Setting/shared/data.allSettings.json +++ b/awx/ui/src/screens/Setting/shared/data.allSettings.json @@ -51,6 +51,7 @@ "STDOUT_MAX_BYTES_DISPLAY":1048576, "EVENT_STDOUT_MAX_BYTES_DISPLAY":1024, "SCHEDULE_MAX_JOBS":10, + "AWX_RUNNER_KEEPALIVE_SECONDS": 0, "AWX_ANSIBLE_CALLBACK_PLUGINS":[], "DEFAULT_JOB_TIMEOUT":0, "DEFAULT_JOB_IDLE_TIMEOUT":0, diff --git a/awx/ui/src/screens/Setting/shared/data.jobSettings.json b/awx/ui/src/screens/Setting/shared/data.jobSettings.json index 29567a8f8cc1..6c001cca3f8b 100644 --- a/awx/ui/src/screens/Setting/shared/data.jobSettings.json +++ b/awx/ui/src/screens/Setting/shared/data.jobSettings.json @@ -19,6 +19,7 @@ "STDOUT_MAX_BYTES_DISPLAY": 1048576, "EVENT_STDOUT_MAX_BYTES_DISPLAY": 1024, "SCHEDULE_MAX_JOBS": 10, + "AWX_RUNNER_KEEPALIVE_SECONDS": 0, "AWX_ANSIBLE_CALLBACK_PLUGINS": [], "DEFAULT_JOB_TIMEOUT": 0, "DEFAULT_JOB_IDLE_TIMEOUT": 0, From b143df31834fe98afb2059999d07be81727dfa21 Mon Sep 17 00:00:00 2001 From: Michael Abashian Date: Tue, 21 Feb 2023 17:05:14 -0500 Subject: [PATCH 3/4] Fix broken UI test --- awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.test.js | 1 - 1 file changed, 1 deletion(-) diff --git a/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.test.js b/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.test.js index 0d9d2a9ca276..3f8b6dd220bb 100644 --- a/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.test.js +++ b/awx/ui/src/screens/Setting/Jobs/JobsEdit/JobsEdit.test.js @@ -79,7 +79,6 @@ describe('', () => { const { EVENT_STDOUT_MAX_BYTES_DISPLAY, STDOUT_MAX_BYTES_DISPLAY, - AWX_RUNNER_KEEPALIVE_SECONDS, ...jobRequest } = mockJobSettings; expect(SettingsAPI.updateAll).toHaveBeenCalledWith(jobRequest); From 90f54b98cd0183091228b78c46d289f175f94e93 Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Wed, 22 Feb 2023 14:32:30 -0500 Subject: [PATCH 4/4] Update keepalive setting help_text to be more direct Co-authored-by: Shane McDonald --- awx/main/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awx/main/conf.py b/awx/main/conf.py index 99b995b113d5..2dbf5e127e39 100644 --- a/awx/main/conf.py +++ b/awx/main/conf.py @@ -286,7 +286,7 @@ 'AWX_RUNNER_KEEPALIVE_SECONDS', field_class=fields.IntegerField, label=_('K8S Ansible Runner Keep-Alive Message Interval'), - help_text=_('Only applies to K8S deployments and container_group jobs. If not 0, send a message every so-many seconds to keep connection open.'), + help_text=_('Only applies to jobs running in a Container Group. If not 0, send a message every so-many seconds to keep connection open.'), category=_('Jobs'), category_slug='jobs', placeholder=240, # intended to be under common 5 minute idle timeout