From 53457cbc6a431561d299554842e2ad4affade13d Mon Sep 17 00:00:00 2001 From: Alputer Date: Fri, 28 Mar 2025 09:33:29 +0100 Subject: [PATCH] feat(openapi): add K8s resource requests and limits in reana.yaml (#486) Allow users to set CPU and memory requests/limits via `reana.yaml`. If not specified, default values configured by cluster admins will be applied. Closes reanahub/reana#883 --- reana_commons/api_client.py | 14 ++- reana_commons/config.py | 5 +- reana_commons/errors.py | 26 ++++- reana_commons/job_utils.py | 23 ++++- .../openapi_specifications/reana_server.json | 97 ++++++++++++++++++- .../schemas/reana_analysis_schema.json | 15 +++ tests/test_job_utils.py | 46 ++++++++- 7 files changed, 218 insertions(+), 8 deletions(-) diff --git a/reana_commons/api_client.py b/reana_commons/api_client.py index 7e342a43..a5cde10a 100644 --- a/reana_commons/api_client.py +++ b/reana_commons/api_client.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of REANA. -# Copyright (C) 2018, 2019, 2020, 2021, 2022 CERN. +# Copyright (C) 2018, 2019, 2020, 2021, 2022, 2025 CERN. # # REANA is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -99,6 +99,9 @@ def submit( # noqa: C901 compute_backend=None, kerberos=False, kubernetes_uid=None, + kubernetes_cpu_request=None, + kubernetes_cpu_limit=None, + kubernetes_memory_request=None, kubernetes_memory_limit=None, unpacked_img=False, voms_proxy=False, @@ -166,6 +169,15 @@ def submit( # noqa: C901 if kubernetes_uid: job_spec["kubernetes_uid"] = kubernetes_uid + if kubernetes_cpu_request: + job_spec["kubernetes_cpu_request"] = kubernetes_cpu_request + + if kubernetes_cpu_limit: + job_spec["kubernetes_cpu_limit"] = kubernetes_cpu_limit + + if kubernetes_memory_request: + job_spec["kubernetes_memory_request"] = kubernetes_memory_request + if kubernetes_memory_limit: job_spec["kubernetes_memory_limit"] = kubernetes_memory_limit diff --git a/reana_commons/config.py b/reana_commons/config.py index 87296b29..291600b7 100644 --- a/reana_commons/config.py +++ b/reana_commons/config.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of REANA. -# Copyright (C) 2018, 2019, 2020, 2021, 2022, 2023, 2024 CERN. +# Copyright (C) 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025 CERN. # # REANA is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -420,6 +420,9 @@ def default_workspace(): ) """Kubernetes valid memory format regular expression e.g. Ki, M, Gi, G, etc.""" +KUBERNETES_CPU_FORMAT = r"^(?P[1-9]\d*)m$|^(?P(0*[1-9]\d*(\.\d+)?|0*\.\d*[1-9]\d*))$" +"""Kubernetes valid CPU format regex. Supports formats such as "0.1" (or "100m"), "0.9" (or "900m"). Values must be greater than 0.""" + statuses = os.getenv("REANA_RUNTIME_KUBERNETES_KEEP_ALIVE_JOBS_WITH_STATUSES", []) REANA_RUNTIME_KUBERNETES_KEEP_ALIVE_JOBS_WITH_STATUSES = ( statuses.split(",") if statuses else statuses diff --git a/reana_commons/errors.py b/reana_commons/errors.py index 206fa8bd..6697f7b8 100644 --- a/reana_commons/errors.py +++ b/reana_commons/errors.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of REANA. -# Copyright (C) 2018, 2019, 2020, 2021, 2022 CERN. +# Copyright (C) 2018, 2019, 2020, 2021, 2022, 2025 CERN. # # REANA is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -86,6 +86,30 @@ def __init__(self, message): self.message = message +class REANAKubernetesWrongCPUFormat(Exception): + """Kubernetes CPU value has wrong format.""" + + def __init__(self, message): + """Initialize REANAKubernetesWrongCPUFormat exception.""" + self.message = message + + +class REANAKubernetesCPULimitExceeded(Exception): + """Kubernetes CPU value exceed max limit.""" + + def __init__(self, message): + """Initialize REANAKubernetesCPULimitExceeded exception.""" + self.message = message + + +class REANAKubernetesRequestExceedsLimit(Exception): + """Kubernetes resource request exceeds its corresponding limit.""" + + def __init__(self, message): + """Initialize REANAKubernetesRequestExceedsLimit exception.""" + self.message = message + + class REANAJobControllerSubmissionError(Exception): """REANA Job submission exception.""" diff --git a/reana_commons/job_utils.py b/reana_commons/job_utils.py index 239ad2b6..f5825780 100644 --- a/reana_commons/job_utils.py +++ b/reana_commons/job_utils.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of REANA. -# Copyright (C) 2021 CERN. +# Copyright (C) 2021, 2025 CERN. # # REANA is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -10,7 +10,7 @@ import base64 import re -from reana_commons.config import KUBERNETES_MEMORY_FORMAT +from reana_commons.config import KUBERNETES_CPU_FORMAT, KUBERNETES_MEMORY_FORMAT from reana_commons.errors import REANAKubernetesWrongMemoryFormat @@ -24,6 +24,11 @@ def deserialise_job_command(command): return base64.b64decode(command).decode("utf-8") +def validate_kubernetes_cpu(memory): + """Verify that provided value matches the Kubernetes cpu format.""" + return re.match(KUBERNETES_CPU_FORMAT, memory) is not None + + def validate_kubernetes_memory(memory): """Verify that provided value matches the Kubernetes memory format.""" return re.match(KUBERNETES_MEMORY_FORMAT, memory) is not None @@ -53,3 +58,17 @@ def kubernetes_memory_to_bytes(memory): } return value * multiplier[unit][power] + + +def kubernetes_cpu_to_millicores(cpu): + """Convert Kubernetes CPU format to millicores (mCPU).""" + match = re.match(KUBERNETES_CPU_FORMAT, str(cpu)) + if not match: + raise ValueError(f"Kubernetes CPU value '{cpu}' has wrong format.") + + cpu_values = match.groupdict() + + if cpu_values.get("value_millicpu"): + return int(cpu_values.get("value_millicpu")) + else: + return int(float(cpu_values.get("value_cpu")) * 1000) diff --git a/reana_commons/openapi_specifications/reana_server.json b/reana_commons/openapi_specifications/reana_server.json index 327c70a8..ec5d1b4e 100644 --- a/reana_commons/openapi_specifications/reana_server.json +++ b/reana_commons/openapi_specifications/reana_server.json @@ -2,7 +2,7 @@ "info": { "description": "Submit workflows to be run on REANA Cloud", "title": "REANA Server", - "version": "0.9.3" + "version": "0.9.4" }, "paths": { "/account/settings/linkedaccounts/": {}, @@ -366,14 +366,38 @@ "title": "Default workspace", "value": "/usr/share" }, + "kubernetes_cpu_limit": { + "title": "Default CPU limit for Kubernetes jobs", + "value": "2" + }, + "kubernetes_cpu_request": { + "title": "Default CPU request for Kubernetes jobs", + "value": "1" + }, + "kubernetes_max_cpu_limit": { + "title": "Maximum allowed CPU limit for Kubernetes jobs", + "value": "4" + }, + "kubernetes_max_cpu_request": { + "title": "Maximum allowed CPU request for Kubernetes jobs", + "value": "2" + }, "kubernetes_max_memory_limit": { "title": "Maximum allowed memory limit for Kubernetes jobs", - "value": "10Gi" + "value": "10Gi\"" + }, + "kubernetes_max_memory_request": { + "title": "Maximum allowed memory request for Kubernetes jobs", + "value": "5Gi" }, "kubernetes_memory_limit": { "title": "Default memory limit for Kubernetes jobs", "value": "3Gi" }, + "kubernetes_memory_request": { + "title": "Default memory request for Kubernetes jobs", + "value": "1Gi" + }, "maximum_kubernetes_jobs_timeout": { "title": "Maximum timeout for Kubernetes jobs", "value": "1209600" @@ -408,6 +432,28 @@ }, "type": "object" }, + "default_kubernetes_cpu_limit": { + "properties": { + "title": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "type": "object" + }, + "default_kubernetes_cpu_request": { + "properties": { + "title": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "type": "object" + }, "default_kubernetes_jobs_timeout": { "properties": { "title": { @@ -430,6 +476,17 @@ }, "type": "object" }, + "default_kubernetes_memory_request": { + "properties": { + "title": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "type": "object" + }, "default_workspace": { "properties": { "title": { @@ -441,6 +498,30 @@ }, "type": "object" }, + "kubernetes_max_cpu_limit": { + "properties": { + "title": { + "type": "string" + }, + "value": { + "type": "string", + "x-nullable": true + } + }, + "type": "object" + }, + "kubernetes_max_cpu_request": { + "properties": { + "title": { + "type": "string" + }, + "value": { + "type": "string", + "x-nullable": true + } + }, + "type": "object" + }, "kubernetes_max_memory_limit": { "properties": { "title": { @@ -453,6 +534,18 @@ }, "type": "object" }, + "kubernetes_max_memory_request": { + "properties": { + "title": { + "type": "string" + }, + "value": { + "type": "string", + "x-nullable": true + } + }, + "type": "object" + }, "maximum_interactive_session_inactivity_period": { "properties": { "title": { diff --git a/reana_commons/validation/schemas/reana_analysis_schema.json b/reana_commons/validation/schemas/reana_analysis_schema.json index 692659ef..ec273990 100644 --- a/reana_commons/validation/schemas/reana_analysis_schema.json +++ b/reana_commons/validation/schemas/reana_analysis_schema.json @@ -275,6 +275,21 @@ "title": "Kubernetes job timeout", "description": "Maximum time for the step to run (number of seconds)" }, + "kubernetes_cpu_request": { + "type": "string", + "title": "Kubernetes CPU request", + "description": "Kubernetes CPU request (e.g. 1 - read more about the expected CPU values on the official Kubernetes documentation: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-cpu)" + }, + "kubernetes_cpu_limit": { + "type": "string", + "title": "Kubernetes CPU limit", + "description": "Kubernetes CPU limit (e.g. 2 - read more about the expected CPU values on the official Kubernetes documentation: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-cpu)" + }, + "kubernetes_memory_request": { + "type": "string", + "title": "Kubernetes memory request", + "description": "Kubernetes memory request (e.g. 128Mi - read more about the expected memory values on the official Kubernetes documentation: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory)" + }, "kubernetes_memory_limit": { "type": "string", "title": "Kubernetes memory limit", diff --git a/tests/test_job_utils.py b/tests/test_job_utils.py index a6ad3b80..d12f6a3b 100644 --- a/tests/test_job_utils.py +++ b/tests/test_job_utils.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of REANA. -# Copyright (C) 2021 CERN. +# Copyright (C) 2021, 2025 CERN. # # REANA is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -13,8 +13,10 @@ from reana_commons.job_utils import ( deserialise_job_command, + kubernetes_cpu_to_millicores, kubernetes_memory_to_bytes, serialise_job_command, + validate_kubernetes_cpu, validate_kubernetes_memory, ) @@ -69,6 +71,48 @@ def test_job_serialisation_deserialisation(command_string, expected_output): ).decode("utf-8") +@pytest.mark.parametrize( + "cpu,output", + [ + ("100m", True), + ("250m", True), + ("1", True), + ("2.5", True), + ("0.1", True), + ("0.001", True), + ("1500m", True), + ("0m", False), + ("-100m", False), + ("1.5m", False), + ("2 cores", False), + ("one", False), + ("1000millicores", False), + ], +) +def test_validate_kubernetes_cpu_format(cpu, output): + """Test validation of K8s CPU format.""" + assert validate_kubernetes_cpu(cpu) is output + + +@pytest.mark.parametrize( + "k8s_cpu,millicores", + [ + (100, 100000), + (0.1, 100), + (1, 1000), + (1.5, 1500), + ("100m", 100), + ("250m", 250), + ("0.5", 500), + ("2.25", 2250), + ("1500m", 1500), + ], +) +def test_kubernetes_cpu_to_millicores(k8s_cpu, millicores): + """Test conversion of K8s CPU format to millicores.""" + assert kubernetes_cpu_to_millicores(k8s_cpu) == millicores + + @pytest.mark.parametrize( "memory,output", [