Open
Description
What happened + What you expected to happen
I have a springboot long service, every http request will trigger this method, every time an actor is created, kill it after execution.
But sometimes it executes successfully, sometimes it fails.
- my code:
private synchronized static void rayTest() {
System.setProperty("ray.address", "xxxx:6379");
System.setProperty("ray.job.code-search-path", "/opt/v1/");
if (!Ray.isInitialized()) {
Ray.init();
log.info("Ray client init success. {}", Ray.getRuntimeContext().getCurrentNodeId());
}
// Define a Python class.
PyActorClass actorClass = PyActorClass.of("ray_demo", "Counter");
// Create a Python actor and call actor method.
PyActorHandle actor = Ray.actor(actorClass).remote();
ObjectRef objRef1 = actor.task(PyActorMethod.of("increment", int.class)).remote();
Assert.assertEquals(objRef1.get(), 1);
System.out.println("increment count by java,result = " + objRef1.g
99C4
et());
ObjectRef objRef2 = actor.task(PyActorMethod.of("increment", int.class)).remote();
Assert.assertEquals(objRef2.get(), 2);
System.out.println("increment count by java,result = " + objRef2.get());
actor.kill();
}
# ray_demo.py
import ray
from typing import List
@ray.remote
class Counter(object):
def __init__(self):
self.value = 0
def increment(self):
self.value += 1
return self.value
- actor error:
1:job_id:e6050000
2023-07-20 04:59:19,150 ERROR worker.py:861 -- Worker exits with an exit code None. The worker may have exceeded K8s pod memory limits.
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 1796, in ray._raylet.task_execution_handler
File "python/ray/_raylet.pyx", line 1656, in ray._raylet.execute_task_with_cancellation_handler
File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/_private/function_manager.py", line 559, in load_actor_class
actor_class = self._load_actor_class_from_local(
File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/_private/function_manager.py", line 619, in _load_actor_class_from_local
object = self.load_function_or_class_from_local(module_name, class_name)
File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/_private/function_manager.py", line 139, in load_function_or_class_from_local
module = importlib.import_module(module_name)
File "/home/ray/anaconda3/lib/python3.8/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
File "<frozen importlib._bootstrap>", line 991, in _find_and_load
File "<frozen importlib._bootstrap>", line 973, in _find_and_load_unlocked
ModuleNotFoundError: No module named 'ray_demo'
An unexpected internal error occurred while the worker was executing a task.
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 1796, in ray._raylet.task_execution_handler
File "python/ray/_raylet.pyx", line 1656, in ray._raylet.execute_task_with_cancellation_handler
File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/_private/function_manager.py", line 559, in load_actor_class
actor_class = self._load_actor_class_from_local(
File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/_private/function_manager.py", line 619, in _load_actor_class_from_local
object = self.load_function_or_class_from_local(module_name, class_name)
File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/_private/function_manager.py", line 139, in load_function_or_class_from_local
module = importlib.import_module(module_name)
File "/home/ray/anaconda3/lib/python3.8/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
File "<frozen importlib._bootstrap>", line 991, in _find_and_load
File "<frozen importlib._bootstrap>", line 973, in _find_and_load_unlocked
ModuleNotFoundError: No module named 'ray_demo'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "python/ray/_raylet.pyx", line 1838, in ray._raylet.task_execution_handler
SystemExit
Versions / Dependencies
ray version: 2.5.0
python: 3.8.10
java version "1.8.0_301"
Reproduction script
It doesn't happen every time, but it can be reproduced many times.
Issue Severity
High: It blocks me from completing my task.