@@ -626,7 +626,7 @@ func (h *HatcherySwarm) listAwolWorkers(dockerClientName string, containers []ty
626
626
//Checking workers
627
627
oldContainers := []types.Container {}
628
628
for _ , c := range workers {
629
- if ! strings .Contains (c .Status , "Exited" ) && time .Now ().Add (- 1 * time .Minute ).Unix () < c .Created {
629
+ if ! strings .Contains (c .Status , "Exited" ) && time .Now ().Add (- 3 * time .Minute ).Unix () < c .Created {
630
630
log .Debug ("hatchery> swarm> listAwolWorkers> container %s(status=%s) is too young" , c .Names [0 ], c .Status )
631
631
continue
632
632
}
@@ -681,12 +681,25 @@ func (h *HatcherySwarm) killAwolWorker(ctx context.Context) error {
681
681
}
682
682
}
683
683
684
+ // creating a map of containers names
685
+ mContainers := map [string ]struct {}{}
686
+ for i := range containers {
687
+ name := strings .TrimPrefix (containers [i ].Names [0 ], "/" ) // docker returns name prefixed by a /
688
+ mContainers [name ] = struct {}{}
689
+ }
690
+
684
691
// Checking services
685
692
for _ , c := range containers {
686
- if c .Labels ["service_worker" ] == "" || c .Names [0 ] != c .Labels ["service_worker" ] {
693
+ // checks if the container is a service based on its labels
694
+ if c .Labels ["service_worker" ] == "" {
687
695
continue
688
696
}
689
- if ! strings .Contains (c .Status , "Exited" ) && time .Now ().Add (- 1 * time .Minute ).Unix () < c .Created {
697
+ // if the worker associated to this service is still alive do not kill the service
698
+ if _ , workerStillAlive := mContainers [c .Labels ["service_worker" ]]; workerStillAlive {
699
+ continue
700
+ }
701
+
702
+ if ! strings .Contains (c .Status , "Exited" ) && time .Now ().Add (- 3 * time .Minute ).Unix () < c .Created {
690
703
log .Debug ("hatchery> swarm> killAwolWorker> container %s(status=%s) is too young - service associated to worker %s" , c .Names [0 ], c .Status , c .Labels ["service_worker" ])
691
704
continue
692
705
}
0 commit comments