10000 Make container and image removal/stop idempotent by saschagrunert · Pull Request #8431 · cri-o/cri-o · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Make container an 8000 d image removal/stop idempotent #8431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions server/container_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"path/filepath"

"github.com/containers/storage"
"github.com/containers/storage/pkg/truncindex"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
types "k8s.io/cri-api/pkg/apis/runtime/v1"
Expand All @@ -26,6 +27,12 @@ func (s *Server) RemoveContainer(ctx context.Context, req *types.RemoveContainer
// save container description to print
c, err := s.GetContainerFromShortID(ctx, req.ContainerId)
if err != nil {
// The RemoveContainer RPC is idempotent, and must not return an error
// if the container has already been removed. Ref:
// https://github.com/kubernetes/cri-api/blob/c20fa40/pkg/apis/runtime/v1/api.proto#L74-L75
if errors.Is(err, truncindex.ErrNotExist) {
return &types.RemoveContainerResponse{}, nil
}
return nil, status.Errorf(codes.NotFound, "could not find container %q: %v", req.ContainerId, err)
}

Expand Down
10 changes: 10 additions & 0 deletions server/container_remove_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ var _ = t.Describe("ContainerRemove", func() {
Expect(err).ToNot(HaveOccurred())
})

It("should succeed if container is not found", func() {
// Given
// When
_, err := sut.RemoveContainer(context.Background(),
&types.RemoveContainerRequest{ContainerId: "id"})

// Then
Expect(err).NotTo(HaveOccurred())
})

It("should fail on container remove error", func() {
// Given
// When
Expand Down
8 changes: 8 additions & 0 deletions server/container_stop.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package server

import (
"errors"
"fmt"

"github.com/containers/storage/pkg/truncindex"
"golang.org/x/net/context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
Expand All @@ -20,6 +22,12 @@ func (s *Server) StopContainer(ctx context.Context, req *types.StopContainerRequ
log.Infof(ctx, "Stopping container: %s (timeout: %ds)", req.ContainerId, req.Timeout)
c, err := s.GetContainerFromShortID(ctx, req.ContainerId)
if err != nil {
// The StopContainer RPC is idempotent, and must not return an error if
// the container has already been stopped. Ref:
// https://github.com/kubernetes/cri-api/blob/c20fa40/pkg/apis/runtime/v1/api.proto#L67-L68
if errors.Is(err, truncindex.ErrNotExist) {
return &types.StopContainerResponse{}, nil
}
return nil, status.Errorf(codes.NotFound, "could not find container %q: %v", req.ContainerId, err)
}

Expand Down
4 changes: 2 additions & 2 deletions server/container_stop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ var _ = t.Describe("ContainerStop", func() {
Expect(err).ToNot(HaveOccurred())
})

It("should fail with invalid container id", func() {
It("should succeed with not existing container ID", func() {
// Given
// When
_, err := sut.StopContainer(context.Background(),
&types.StopContainerRequest{ContainerId: "id"})

// Then
Expect(err).To(HaveOccurred())
Expect(err).NotTo(HaveOccurred())
})
})
})
43 changes: 29 additions & 14 deletions server/image_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
"errors"
"fmt"

storagetypes "github.com/containers/storage"
types "k8s.io/cri-api/pkg/apis/runtime/v1"

"github.com/cri-o/cri-o/internal/log"
"github.com/cri-o/cri-o/internal/storage"
)

// RemoveImage removes the image.
Expand All @@ -29,31 +31,36 @@
}

func (s *Server) removeImage(ctx context.Context, imageRef string) (untagErr error) {
var deleted bool
ctx, span := log.StartSpan(ctx)
defer span.End()

// FIXME: The CRI API definition says
// This call is idempotent, and must not return an error if the image has
// already been removed.
// and this code doesn’t seem to conform to that.

// Actually Kubelet is only ever calling this with full image IDs.
// So we don't really need to accept ID prefixes nor short names;
// or is there another user?!

if id := s.StorageImageServer().HeuristicallyTryResolvingStringAsIDPrefix(imageRef); id != nil {
return s.StorageImageServer().DeleteImage(s.config.SystemContext, *id)
if err := s.StorageImageServer().DeleteImage(s.config.SystemContext, *id); err != nil {
if errors.Is(err, storagetypes.ErrImageUnknown) {
// The RemoveImage RPC is idempotent, and must not return an
// error if the image has already been removed. Ref:
// https://github.com/kubernetes/cri-api/blob/c20fa40/pkg/apis/runtime/v1/api.proto#L156-L157
return nil
}

return fmt.Errorf("delete image: %w", err)

Check warning on line 46 in server/image_remove.go

View check run for this annotation

Codecov / codecov/patch

server/image_remove.go#L46

Added line #L46 was not covered by tests
}
return nil
}

var (
deleted bool
statusErr error
)
potentialMatches, err := s.StorageImageServer().CandidatesForPotentiallyShortImageName(s.config.SystemContext, imageRef)
if err != nil {
return err
}
for _, name := range potentialMatches {
status, err := s.StorageImageServer().ImageStatusByName(s.config.SystemContext, name)
if err != nil {
log.Errorf(ctx, "Error getting image status %s: %v", name, err)
var status *storage.ImageResult
status, statusErr = s.StorageImageServer().ImageStatusByName(s.config.SystemContext, name)
if statusErr != nil {
log.Errorf(ctx, "Error getting image status %s: %v", name, statusErr)

Check warning on line 63 in server/image_remove.go

View check run for this annotation

Codecov / codecov/patch

server/image_remove.go#L63

Added line #L63 was not covered by tests
continue
}
if status.MountPoint != "" {
Expand Down Expand Up @@ -82,5 +89,13 @@
if !deleted && untagErr != nil {
return untagErr
}

if errors.Is(statusErr, storagetypes.ErrNotAnImage) {
// The RemoveImage RPC is idempotent, and must not return an
// error if the image has already been removed. Ref:
// https://github.com/kubernetes/cri-api/blob/c20fa40/pkg/apis/runtime/v1/api.proto#L156-L157
return nil

Check warning on line 97 in server/image_remove.go

View check run for this annotation

Codecov / codecov/patch

server/image_remove.go#L97

Added line #L97 was not covered by tests
}

return nil
}
24 changes: 24 additions & 0 deletions server/image_remove_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package server_test

import (
"context"
"fmt"

storagetypes "github.com/containers/storage"
"github.com/golang/mock/gomock"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand Down Expand Up @@ -113,5 +115,27 @@ var _ = t.Describe("ImageRemove", func() {
// Then
Expect(err).To(HaveOccurred())
})

// https://github.com/kubernetes/cri-api/blob/c20fa40/pkg/apis/runtime/v1/api.proto#L156-L157
It("should succeed if image is not found", func() {
// Given
const testSHA256 = "2a03a6059f21e150ae84b0973863609494aad70f0a80eaeb64bddd8d92465812"
parsedTestSHA256, err := storage.ParseStorageImageIDFromOutOfProcessData(testSHA256)
Expect(err).ToNot(HaveOccurred())
gomock.InOrder(
imageServerMock.EXPECT().HeuristicallyTryResolvingStringAsIDPrefix(testSHA256).
Return(&parsedTestSHA256),
imageServerMock.EXPECT().DeleteImage(
gomock.Any(), parsedTestSHA256).
Return(fmt.Errorf("invalid image: %w", storagetypes.ErrImageUnknown)),
)

// When
_, err = sut.RemoveImage(context.Background(),
&types.RemoveImageRequest{Image: &types.ImageSpec{Image: testSHA256}})

// Then
Expect(err).ToNot(HaveOccurred())
})
})
})
3 changes: 1 addition & 2 deletions test/restore.bats
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ function teardown() {

start_crio

run -1 crictl stop "$ctr_id"
[[ "${output}" == *"not found"* ]]
crictl stop "$ctr_id"
}

@test "crio restore with bad state and pod removed" {
Expand Down
Loading
0