8000 Fix handling of file capabilities. by copybara-service[bot] · Pull Request #11704 · google/gvisor · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Fix handling of file capabilities. #11704

New iss 8000 ue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ iptables-tests: load-iptables $(RUNTIME_BIN)
@$(call install_runtime,$(RUNTIME),--net-raw)
@$(call test_runtime,$(RUNTIME),--test_env=TEST_NET_RAW=true //test/iptables:iptables_test)
@$(call install_runtime,$(RUNTIME)-nftables,--net-raw --reproduce-nftables)
@$(call test_runtime,$(RUNTIME)-nftables, --test_output=all //test/iptables:nftables_test --test_arg=$(RUNTIME)-nftables)
@$(call test_runtime,$(RUNTIME)-nftables,--test_env=TEST_NET_RAW=true --test_output=all //test/iptables:nftables_test)
.PHONY: iptables-tests

packetdrill-tests: load-packetdrill $(RUNTIME_BIN)
Expand Down
5 changes: 4 additions & 1 deletion pkg/sentry/kernel/auth/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ go_library(
"user_namespace_mutex.go",
],
marshal = True,
visibility = ["//pkg/sentry:internal"],
visibility = [
"//pkg/sentry:internal",
"//test/e2e:__subpackages__",
],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
Expand Down
110 changes: 77 additions & 33 deletions pkg/sentry/kernel/auth/capability_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ func CapabilitySetOfMany(cps []linux.Capability) CapabilitySet {
return CapabilitySet(cs)
}

// Add adds the given capability to the CapabilitySet.
func (cs *CapabilitySet) Add(cp linux.Capability) {
*cs |= CapabilitySetOf(cp)
}

// Clear removes the given capability from the CapabilitySet.
func (cs *CapabilitySet) Clear(cp linux.Capability) {
*cs &= ^CapabilitySetOf(cp)
}

// VfsCapDataOf returns a VfsCapData containing the file capabilities for the given slice of bytes.
// For each field of the cap data, which are in the structure of either vfs_cap_data or vfs_ns_cap_data,
// the bytes are ordered in little endian.
Expand All @@ -68,39 +78,28 @@ func VfsCapDataOf(data []byte) (linux.VfsNsCapData, error) {
return capData, nil
}

// CapsFromVfsCaps returns a copy of the given creds with new capability sets
// by applying the file capability that is specified by capData.
func CapsFromVfsCaps(capData linux.VfsNsCapData, creds *Credentials) (*Credentials, error) {
// If the real or effective user ID of the process is root,
// the file inheritable and permitted sets are ignored from
// `Capabilities and execution of programs by root` at capabilities(7).
if root := creds.UserNamespace.MapToKUID(RootUID); creds.EffectiveKUID == root || creds.RealKUID == root {
return creds, nil
}
effective := (capData.MagicEtc & linux.VFS_CAP_FLAGS_EFFECTIVE) > 0
permittedCaps := (CapabilitySet(capData.Permitted()) & creds.BoundingCaps) |
(CapabilitySet(capData.Inheritable()) & creds.InheritableCaps)
// P'(effective) = effective ? P'(permitted) : P'(ambient).
// The ambient capabilities has not supported yet in gVisor,
// set effective capabilities to 0 when effective bit is false.
effectiveCaps := CapabilitySet(0)
if effective {
effectiveCaps = permittedCaps
}
// Insufficient to execute correctly.
if (CapabilitySet(capData.Permitted()) & ^permittedCaps) != 0 {
return nil, linuxerr.EPERM
}
// If the capabilities don't change, it will return the creds'
// original copy.
if creds.PermittedCaps == permittedCaps && creds.EffectiveCaps == effectiveCaps {
return creds, nil
}
// The credentials object is immutable.
newCreds := creds.Fork()
newCreds.PermittedCaps = permittedCaps
newCreds.EffectiveCaps = effectiveCaps
return newCreds, nil
// HandleVfsCaps updates creds based on the given vfsCaps. It returns two
// booleans; the first indicates whether the effective flag is set, and the second
// second indicates whether the file capability is applied.
func HandleVfsCaps(vfsCaps linux.VfsNsCapData, creds *Credentials) (bool, bool, error) {
// gVisor does not support ID-mapped mounts and all filesystems are owned by
// the initial user namespace. So we an directly cast the root ID to KUID.
rootID := KUID(vfsCaps.RootID)
if !rootIDOwnsCurrentUserns(creds, rootID) {
// Linux skips vfs caps in this situation.
return false, false, nil
}
// Note that ambient capabilities are not yet supported in gVisor.
// P'(permitted) = (P(inheritable) & F(inheritable)) | (F(permitted) & P(bounding)) | P'(ambient)
creds.PermittedCaps = (CapabilitySet(vfsCaps.Permitted()) & creds.BoundingCaps) |
(CapabilitySet(vfsCaps.Inheritable()) & creds.InheritableCaps)
effective := (vfsCaps.MagicEtc & linux.VFS_CAP_FLAGS_EFFECTIVE) > 0
// Insufficient to execute correctly. Linux only returns EPERM when effective
// flag is set.
if effective && (CapabilitySet(vfsCaps.Permitted()) & ^creds.PermittedCaps) != 0 {
return effective, true, linuxerr.EPERM
}
return effective, true, nil
}

// FixupVfsCapDataOnSet may convert the given value to v3 file capabilities. It
Expand Down Expand Up @@ -174,6 +173,51 @@ func rootIDOwnsCurrentUserns(creds *Credentials, rootID KUID) bool {
return false
}

// HandlePrivilegedRoot updates creds for a privileged root user as per
// `Capabilities and execution of programs by root` in capabilities(7).
// It returns true if the file effective bit should be considered set.
func HandlePrivilegedRoot(creds *Credentials, hasVFSCaps bool, filename string) bool {
// gVisor currently does not support SECURE_NOROOT secure bit since
// PR_SET_SECUREBITS is not supported. So no need to check here.
root := creds.UserNamespace.MapToKUID(RootUID)
if hasVFSCaps && creds.RealKUID != root && creds.EffectiveKUID == root {
log.Warningf("File %q has both SUID bit and file capabilities set, not raising all capabilities.", filename)
return false
}
if creds.RealKUID == root || creds.EffectiveKUID == root {
// P'(permitted) = P(inheritable) | P(bounding)
creds.PermittedCaps = creds.BoundingCaps | creds.InheritableCaps
}
// Linux only sets the effective bit if the effective KUID is root.
return creds.EffectiveKUID == root
}

// UpdateCredsForNewTask updates creds for a new task as per capabilities(7).
func UpdateCredsForNewTask(creds *Credentials, fileCaps string, filename string) error {
// Clear the permitted capability set. It is initialized below via
// HandleVfsCaps() and HandlePrivilegedRoot().
creds.PermittedCaps = 0
hasVFSCaps := false
setEffective := false
< 8000 span class='blob-code-inner blob-code-marker ' data-code-marker="+"> if len(fileCaps) != 0 {
vfsCaps, err := VfsCapDataOf([]byte(fileCaps))
if err != nil {
return err
}
setEffective, hasVFSCaps, err = HandleVfsCaps(vfsCaps, creds)
if err != nil {
return err
}
}
setEffective = HandlePrivilegedRoot(creds, hasVFSCaps, filename) || setEffective
// P'(effective) = effective ? P'(permitted) : P'(ambient).
creds.EffectiveCaps = 0
if setEffective {
creds.EffectiveCaps = creds.PermittedCaps
}
return nil
}

// TaskCapabilities represents all the capability sets for a task. Each of these
// sets is explained in greater detail in capabilities(7).
type TaskCapabilities struct {
Expand Down
112 changes: 35 additions & 77 deletions pkg/sentry/kernel/auth/capability_set_test.go
5DA8
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,14 @@ import (
"gvisor.dev/gvisor/pkg/errors/linuxerr"
)

// capsEquals returns true when the given creds' capabilities match the given caps.
func capsEquals(creds *Credentials, caps TaskCapabilities) bool {
return creds.PermittedCaps == caps.PermittedCaps &&
creds.InheritableCaps == caps.InheritableCaps &&
creds.EffectiveCaps == caps.EffectiveCaps &&
creds.BoundingCaps == caps.BoundingCaps
}

// credentialsWithCaps returns a copy of creds with the given capabilities.
func credentialsWithCaps(creds *Credentials, permittedCaps, inheritableCaps, effectiveCaps, boundingCaps CapabilitySet) *Credentials {
newCreds := creds.Fork()
newCreds.PermittedCaps = permittedCaps
newCreds.InheritableCaps = inheritableCaps
newCreds.EffectiveCaps = effectiveCaps
newCreds.BoundingCaps = boundingCaps
return newCreds
// credentialsWithCaps creates a credentials object with the given capabilities.
func credentialsWithCaps(inheritable, bounding CapabilitySet) *Credentials {
creds := NewRootCredentials(NewRootUserNamespace())
creds.PermittedCaps = 0
creds.InheritableCaps = inheritable
creds.EffectiveCaps = 0
creds.BoundingCaps = bounding
return creds
}

func vfsNsCapDataFrom(effective bool, rootid uint32, permitted, inheritable CapabilitySet) linux.VfsNsCapData {
Expand Down Expand Up @@ -64,102 +56,68 @@ func vfsCapDataFrom(effective bool, permitted, inheritable CapabilitySet) linux.

func TestCapsFromVfsCaps(t *testing.T) {
for _, tst := range []struct {
name string
capData linux.VfsNsCapData
creds *Credentials
wantCaps TaskCapabilities
wantErr error
name string
capData linux.VfsNsCapData
creds *Credentials
wantPermitted CapabilitySet
wantEffective bool
wantErr error
}{
{
name: "TestRootCredential",
name: "TestSamePermittedAndInheritableCaps",
capData: vfsCapDataFrom(
true, // effective
CapabilitySetOf(linux.CAP_NET_ADMIN), // permitted
CapabilitySetOf(linux.CAP_NET_ADMIN)), // inheritable
creds: credentialsWithCaps(
NewRootCredentials(NewRootUserNamespace()),
AllCapabilities,
CapabilitySetOf(linux.CAP_NET_RAW),
AllCapabilities,
CapabilitySetOf(linux.CAP_SYSLOG)),
wantCaps: TaskCapabilities{
PermittedCaps: AllCapabilities,
InheritableCaps: CapabilitySetOf(linux.CAP_NET_RAW),
EffectiveCaps: AllCapabilities,
BoundingCaps: CapabilitySetOf(linux.CAP_SYSLOG),
},
creds: credentialsWithCaps(AllCapabilities, AllCapabilities),
wantPermitted: CapabilitySetOf(linux.CAP_NET_ADMIN),
wantEffective: true,
},
{
name: "TestPermittedAndInheritableCaps",
name: "TestDifferentPermittedAndInheritableCaps",
capData: vfsCapDataFrom(
true, // effective
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID}), // permitted
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETGID})), // inheritable
creds: credentialsWithCaps(
NewUserCredentials(123, 321, nil, nil, NewRootUserNamespace()),
AllCapabilities,
AllCapabilities,
AllCapabilities,
AllCapabilities),
wantCaps: TaskCapabilities{
PermittedCaps: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
InheritableCaps: AllCapabilities,
EffectiveCaps: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
BoundingCaps: AllCapabilities,
},
creds: credentialsWithCaps(AllCapabilities, AllCapabilities),
wantPermitted: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
wantEffective: true,
},
{
name: "TestEffectiveBitOff",
capData: vfsCapDataFrom(
false, // effective
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID}), // permitted
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETGID})), // inheritable
creds: credentialsWithCaps(
NewUserCredentials(123, 321, nil, nil, NewRootUserNamespace()),
AllCapabilities,
AllCapabilities,
AllCapabilities,
AllCapabilities),
wantCaps: TaskCapabilities{
PermittedCaps: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
InheritableCaps: AllCapabilities,
EffectiveCaps: 0,
BoundingCaps: AllCapabilities,
},
creds: credentialsWithCaps(AllCapabilities, AllCapabilities),
wantPermitted: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
wantEffective: false,
},
{
name: "TestInsufficientCaps",
capData: vfsCapDataFrom(
true, // effective
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID}), // permitted
CapabilitySetOf(linux.CAP_CHOWN)), // inheritable
creds: credentialsWithCaps(
NewUserCredentials(123, 321, nil, nil, NewRootUserNamespace()),
AllCapabilities,
AllCapabilities,
AllCapabilities,
CapabilitySetOf(linux.CAP_CHOWN)),
creds: credentialsWithCaps(AllCapabilities, CapabilitySetOf(linux.CAP_CHOWN)),
wantErr: linuxerr.EPERM,
},
} {
t.Run(tst.name, func(t *testing.T) {
newCreds, err := CapsFromVfsCaps(tst.capData, tst.creds)
setEff, _, err := HandleVfsCaps(tst.capData, tst.creds)
if err == nil {
if tst.wantErr != nil {
t.Errorf("CapsFromVfsCaps(%v, %v) returned unexpected error %v", tst.capData, tst.creds, tst.wantErr)
t.Errorf("CapsFromVfsCaps(%v) returned unexpected error %v", tst.capData, tst.wantErr)
}
if tst.creds.PermittedCaps != tst.wantPermitted {
t.Errorf("CapsFromVfsCaps(%v) set PermittedCaps to: %#x, want capabilities: %#x",
tst.capData, tst.creds.PermittedCaps, tst.wantPermitted)
}
if !capsEquals(newCreds, tst.wantCaps) {
t.Errorf("CapsFromVfsCaps(%v, %v) returned capabilities: %v, want capabilities: %v",
tst.capData, tst.creds,
TaskCapabilities{
PermittedCaps: newCreds.PermittedCaps,
InheritableCaps: newCreds.InheritableCaps,
EffectiveCaps: newCreds.EffectiveCaps,
BoundingCaps: newCreds.BoundingCaps,
}, tst.wantCaps)
if setEff != tst.wantEffective {
t.Errorf("CapsFromVfsCaps(%v) returned effective=%t, want: %t", tst.capData, setEff, tst.wantEffective)
}
} else if tst.wantErr == nil || tst.wantErr.Error() != err.Error() {
t.Errorf("CapsFromVfsCaps(%v, %v) returned error %v, wantErr: %v", tst.capData, tst.creds, err, tst.wantErr)
t.Errorf("CapsFromVfsCaps(%v) returned error %v, wantErr: %v", tst.capData, err, tst.wantErr)
}
})
}
Expand Down
13 changes: 2 additions & 11 deletions pkg/sentry/kernel/kernel.go
Original file line number Diff line number Diff line change
Expand Up @@ -1088,16 +1088,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
if se != nil {
return nil, 0, errors.New(se.String())
}
var vfsCaps linux.VfsNsCapData
if len(image.FileCaps()) != 0 {
var err error
vfsCaps, err = auth.VfsCapDataOf([]byte(image.FileCaps()))
if err != nil {
return nil, 0, err
}
}
creds, err := auth.CapsFromVfsCaps(vfsCaps, args.Credentials)
if err != nil {
if err := auth.UpdateCredsForNewTask(args.Credentials, image.FileCaps(), args.Filename); err != nil {
return nil, 0, err
}
args.FDTable.IncRef()
Expand All @@ -1109,7 +1100,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
TaskImage: image,
FSContext: fsContext,
FDTable: args.FDTable,
Credentials: creds,
Credentials: args.Credentials,
NetworkNamespace: k.RootNetworkNamespace(),
AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores),
UTSNamespace: args.UTSNamespace,
Expand Down
11 changes: 7 additions & 4 deletions pkg/sentry/loader/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,13 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V
return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
}
defer file.DecRef(ctx)
xattr, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: linux.XATTR_SECURITY_CAPABILITY, Size: linux.XATTR_CAPS_SZ_3})
fileCaps, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: linux.XATTR_SECURITY_CAPABILITY, Size: linux.XATTR_CAPS_SZ_3})
switch {
case linuxerr.Equals(linuxerr.ENODATA, err), linuxerr.Equals(linuxerr.ENOTSUP, err):
xattr = ""
case linuxerr.Equals(linuxerr.ENODATA, err), linuxerr.Equals(linuxerr.EOPNOTSUPP, err):
// Linux converts EOPNOTSUPP to ENODATA in
// security/commoncap.c:get_vfs_caps_from_disk(). We communicate the lack
// of file capabilities by an empty string.
fileCaps = ""
case err != nil:
return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to read file capabilities of %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
}
Expand Down Expand Up @@ -355,6 +358,6 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V
OS: loaded.os,
Arch: ac,
Name: name,
FileCaps: xattr,
FileCaps: fileCaps,
}, nil
}
1 change: 0 additions & 1 deletion runsc/container/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ go_test(
tags = ["requires-kvm"],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
"//pkg/cleanup",
"//pkg/log",
"//pkg/sentry/control",
Expand Down
3 changes: 1 addition & 2 deletions runsc/container/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
"gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
Expand Down Expand Up @@ -2482,7 +2481,7 @@ func TestMountSymlink(t *testing.T) {

// Check that --net-raw disables the CAP_NET_RAW capability.
func TestNetRaw(t *testing.T) {
capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10)
capNetRaw := strconv.FormatUint(uint64(auth.CapabilitySetOf(linux.CAP_NET_RAW)), 10)
app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
Expand Down
1 change: 0 additions & 1 deletion runsc/specutils/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
"//pkg/log",
"//pkg/sentry/devices/nvproxy/nvconf",
"//pkg/sentry/kernel/auth",
Expand Down
Loading
Loading
0