Skip to content

Commit 7d3a75b

Browse files
shailend-ggvisor-bot
authored andcommitted
Allow netlink sockets to send to RTMGRP_LINK
sendmsg() to the multicast group now works. connect() is still disallowed. This patch also reduces the length for which a netlink socket's mu is held. Previously, once it was taken in Socket.sendMsg(), it was held throughout Protocol.Receive() -> Socket.ProcessMessages() -> Socket.SendResponse(). When in fact the only field it was needed for was Socket.portID. PiperOrigin-RevId: 830546353
1 parent cd0901c commit 7d3a75b

File tree

20 files changed

+1221
-173
lines changed

20 files changed

+1221
-173
lines changed

pkg/abi/linux/netlink.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,8 @@ type NetlinkErrorMessage struct {
157157
Error int32
158158
Header NetlinkMessageHeader
159159
}
160+
161+
// RTNetlink multicast groups, from uapi/linux/rtnetlink.h.
162+
const (
163+
RTNLGRP_LINK = 1
164+
)

pkg/sentry/inet/BUILD

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ declare_mutex(
2626
prefix = "abstractSocketNamespace",
2727
)
2828

29+
declare_mutex(
30+
name = "nlmcast_table_mutex",
31+
out = "nlmcast_table_mutex.go",
32+
package = "inet",
33+
prefix = "nlmcastTable",
34+
)
35+
2936
go_library(
3037
name = "inet",
3138
srcs = [
@@ -35,6 +42,8 @@ go_library(
3542
"inet.go",
3643
"namespace.go",
3744
"namespace_refs.go",
45+
"nlmcast.go",
46+
"nlmcast_table_mutex.go",
3847
"test_stack.go",
3948
],
4049
deps = [

pkg/sentry/inet/inet.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ type Stack interface {
3232
Interfaces() map[int32]Interface
3333

3434
// RemoveInterface removes the specified network interface.
35-
RemoveInterface(idx int32) error
35+
RemoveInterface(ctx context.Context, idx int32) error
3636

3737
// InterfaceAddrs returns all network interface addresses as a mapping from
3838
// interface indexes to a slice of associated interface address properties.

pkg/sentry/inet/namespace.go

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,24 @@ type Namespace struct {
4545

4646
// abstractSockets tracks abstract sockets that are in use.
4747
abstractSockets AbstractSocketNamespace
48+
49+
// netlinkMcastTable manages multicast group membership for netlink sockets.
50+
netlinkMcastTable *McastTable
4851
}
4952

5053
// NewRootNamespace creates the root network namespace, with creator
5154
// allowing new network namespaces to be created. If creator is nil, no
5255
// networking will function if the network is namespaced.
5356
func NewRootNamespace(stack Stack, creator NetworkStackCreator, userNS *auth.UserNamespace) *Namespace {
5457
n := &Namespace{
55-
stack: stack,
56-
creator: creator,
57-
isRoot: true,
58-
userNS: userNS,
58+
stack: stack,
59+
creator: creator,
60+
isRoot: true,
61+
userNS: userNS,
62+
netlinkMcastTable: NewNetlinkMcastTable(),
63+
}
64+
if eventPublishingStack, ok := stack.(InterfaceEventPublisher); ok {
65+
eventPublishingStack.AddInterfaceEventSubscriber(n.netlinkMcastTable)
5966
}
6067
n.abstractSockets.init()
6168
return n
@@ -79,8 +86,9 @@ func (n *Namespace) GetInode() *nsfs.Inode {
7986
// NewNamespace creates a new network namespace from the root.
8087
func NewNamespace(root *Namespace, userNS *auth.UserNamespace) *Namespace {
8188
n := &Namespace{
82-
creator: root.creator,
83-
userNS: userNS,
89+
creator: root.creator,
90+
userNS: userNS,
91+
netlinkMcastTable: NewNetlinkMcastTable(),
8492
}
8593
n.init()
8694
return n
@@ -148,6 +156,9 @@ func (n *Namespace) init() {
148156
if err != nil {
149157
panic(err)
150158
}
159+
if eventPublishingStack, ok := n.stack.(InterfaceEventPublisher); ok {
160+
eventPublishingStack.AddInterfaceEventSubscriber(n.netlinkMcastTable)
161+
}
151162
}
152163
n.abstractSockets.init()
153164
}
@@ -162,6 +173,11 @@ func (n *Namespace) AbstractSockets() *AbstractSocketNamespace {
162173
return &n.abstractSockets
163174
}
164175

176+
// NetlinkMcastTable returns the netlink multicast group table.
177+
func (n *Namespace) NetlinkMcastTable() *McastTable {
178+
return n.netlinkMcastTable
179+
}
180+
165181
// NetworkStackCreator allows new instances of a network stack to be created. It
166182
// is used by the kernel to create new network namespaces when requested.
167183
type NetworkStackCreator interface {

pkg/sentry/inet/nlmcast.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// Copyright 2025 The gVisor Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package inet
16+
17+
import (
18+
"gvisor.dev/gvisor/pkg/abi/linux"
19+
"gvisor.dev/gvisor/pkg/context"
20+
)
21+
22+
const (
23+
routeProtocol = linux.NETLINK_ROUTE
24+
routeLinkMcastGroup = linux.RTNLGRP_LINK
25+
)
26+
27+
// InterfaceEventSubscriber allows clients to subscribe to events published by an inet.Stack.
28+
//
29+
// It is a rough parallel to the objects in Linux that subscribe to netdev
30+
// events by calling register_netdevice_notifier().
31+
type InterfaceEventSubscriber interface {
32+
// OnInterfaceChangeEvent is called by InterfaceEventPublishers when an interface change event takes place.
33+
OnInterfaceChangeEvent(ctx context.Context, idx int32, i Interface)
34+
35+
// OnInterfaceDeleteEvent is called by InterfaceEventPublishers when an interface delete event takes place.
36+
OnInterfaceDeleteEvent(ctx context.Context, idx int32, i Interface)
37+
}
38+
39+
// InterfaceEventPublisher is the interface event publishing aspect of an inet.Stack.
40+
//
41+
// The Linux parallel is how it notifies subscribers via call_netdev_notifiers().
42+
type InterfaceEventPublisher interface {
43+
AddInterfaceEventSubscriber(sub InterfaceEventSubscriber)
44+
}
45+
46+
// NetlinkSocket corresponds to a netlink socket.
47+
type NetlinkSocket interface {
48+
// Protocol returns the netlink protocol value.
49+
Protocol() int
50+
51+
// Groups returns the bitmap of multicast groups the socket is bound to.
52+
Groups() uint64
53+
54+
// HandleInterfaceChangeEvent is called on NetlinkSockets that are members of the RTNLGRP_LINK
55+
// multicast group when an interface is modified.
56+
HandleInterfaceChangeEvent(context.Context, int32, Interface)
57+
58+
// HandleInterfaceDeleteEvent is called on NetlinkSockets that are members of the RTNLGRP_LINK
59+
// multicast group when an interface is deleted.
60+
HandleInterfaceDeleteEvent(context.Context, int32, Interface)
61+
}
62+
63+
// McastTable holds multicast group membership information for netlink netlinkSocket.
64+
// It corresponds roughly to Linux's struct netlink_table.
65+
//
66+
// +stateify savable
67+
type McastTable struct {
68+
mu nlmcastTableMutex `state:"nosave"`
69+
socks map[int]map[NetlinkSocket]struct{}
70+
}
71+
72+
// WithTableLocked runs fn with the table mutex held.
73+
func (m *McastTable) WithTableLocked(fn func()) {
74+
m.mu.Lock()
75+
defer m.mu.Unlock()
76+
fn()
77+
}
78+
79+
// AddSocket adds a netlinkSocket to the multicast-group table.
80+
//
81+
// Preconditions: the netlink multicast table is locked.
82+
func (m *McastTable) AddSocket(s NetlinkSocket) {
83+
p := s.Protocol()
84+
if _, ok := m.socks[p]; !ok {
85+
m.socks[p] = make(map[NetlinkSocket]struct{})
86+
}
87+
if _, ok := m.socks[p][s]; ok {
88+
return
89+
}
90+
m.socks[p][s] = struct{}{}
91+
}
92+
93+
// RemoveSocket removes a netlinkSocket from the multicast-group table.
94+
//
95+
// Preconditions: the netlink multicast table is locked.
96+
func (m *McastTable) RemoveSocket(s NetlinkSocket) {
97+
p := s.Protocol()
98+
if _, ok := m.socks[p]; !ok {
99+
return
100+
}
101+
if _, ok := m.socks[p][s]; !ok {
102+
return
103+
}
104+
delete(m.socks[p], s)
105+
}
106+
107+
// ForEachMcastSock calls fn on all Netlink sockets that are members of the given multicast group.
108+
func (m *McastTable) ForEachMcastSock(protocol int, mcastGroup int, fn func(s NetlinkSocket)) {
109+
m.mu.Lock()
110+
defer m.mu.Unlock()
111+
if _, ok := m.socks[protocol]; !ok {
112+
return
113+
}
114+
for s := range m.socks[protocol] {
115+
// If the socket is not bound to the multicast group, skip it.
116+
if s.Groups()&(1<<(mcastGroup-1)) == 0 {
117+
continue
118+
}
119+
fn(s)
120+
}
121+
}
122+
123+
// OnInterfaceChangeEvent implements InterfaceEventSubscriber.OnInterfaceChangeEvent.
124+
func (m *McastTable) OnInterfaceChangeEvent(ctx context.Context, idx int32, i Interface) {
125+
// Relay the event to RTNLGRP_LINK subscribers.
126+
m.ForEachMcastSock(routeProtocol, routeLinkMcastGroup, func(s NetlinkSocket) {
127+
s.HandleInterfaceChangeEvent(ctx, idx, i)
128+
})
129+
}
130+
131+
// OnInterfaceDeleteEvent implements InterfaceEventSubscriber.OnInterfaceDeleteEvent.
132+
func (m *McastTable) OnInterfaceDeleteEvent(ctx context.Context, idx int32, i Interface) {
133+
// Relay the event to RTNLGRP_LINK subscribers.
134+
m.ForEachMcastSock(routeProtocol, routeLinkMcastGroup, func(s NetlinkSocket) {
135+
s.HandleInterfaceDeleteEvent(ctx, idx, i)
136+
})
137+
}
138+
139+
// NewNetlinkMcastTable creates a new McastTable.
140+
func NewNetlinkMcastTable() *McastTable {
141+
return &McastTable{
142+
socks: make(map[int]map[NetlinkSocket]struct{}),
143+
}
144+
}

pkg/sentry/inet/test_stack.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ func (s *TestStack) Destroy() {
6161
}
6262

6363
// RemoveInterface implements Stack.
64-
func (s *TestStack) RemoveInterface(idx int32) error {
64+
func (s *TestStack) RemoveInterface(ctx context.Context, idx int32) error {
6565
delete(s.InterfacesMap, idx)
6666
return nil
6767
}

pkg/sentry/socket/hostinet/stack.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ func (s *Stack) Interfaces() map[int32]inet.Interface {
152152
}
153153

154154
// RemoveInterface implements inet.Stack.RemoveInterface.
155-
func (*Stack) RemoveInterface(idx int32) error {
155+
func (*Stack) RemoveInterface(ctx context.Context, idx int32) error {
156156
return removeInterface(idx)
157157
}
158158

pkg/sentry/socket/netlink/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ go_library(
1515
deps = [
1616
"//pkg/abi/linux",
1717
"//pkg/abi/linux/errno",
18+
"//pkg/atomicbitops",
1819
"//pkg/context",
1920
"//pkg/errors/linuxerr",
2021
"//pkg/hostarch",
@@ -27,6 +28,7 @@ go_library(
2728
"//pkg/sentry/kernel/auth",
2829
"//pkg/sentry/ktime",
2930
"//pkg/sentry/socket",
31+
"//pkg/sentry/socket/control",
3032
"//pkg/sentry/socket/netlink/nlmsg",
3133
"//pkg/sentry/socket/netlink/port",
3234
"//pkg/sentry/socket/unix",

pkg/sentry/socket/netlink/netfilter/protocol.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ func (p *Protocol) Receive(ctx context.Context, s *netlink.Socket, buf []byte) *
8282
// TODO: b/434785410 - Support batch messages.
8383
if hdr.Type == linux.NFNL_MSG_BATCH_BEGIN {
8484
ms := nlmsg.NewMessageSet(s.GetPortID(), hdr.Seq)
85-
if err := p.receiveBatchMessage(ctx, s, ms, buf); err != nil {
85+
if err := p.receiveBatchMessage(ctx, ms, buf); err != nil {
8686
log.Debugf("Nftables: Failed to process batch message: %v", err)
8787
netlink.DumpErrorMessage(hdr, ms, err.GetError())
8888
}
@@ -1215,7 +1215,7 @@ func (p *Protocol) ProcessMessage(ctx context.Context, s *netlink.Socket, msg *n
12151215
}
12161216

12171217
// receiveBatchMessage processes a NETFILTER batch message.
1218-
func (p *Protocol) receiveBatchMessage(ctx context.Context, s *netlink.Socket, ms *nlmsg.MessageSet, buf []byte) *syserr.AnnotatedError {
1218+
func (p *Protocol) receiveBatchMessage(ctx context.Context, ms *nlmsg.MessageSet, buf []byte) *syserr.AnnotatedError {
12191219
// Linux ignores messages that are too small.
12201220
// From net/netfilter/nfnetlink.c:nfnetlink_rcv_skb_batch
12211221
if len(buf) < linux.NetlinkMessageHeaderSize+linux.SizeOfNetfilterGenMsg {
@@ -1254,7 +1254,7 @@ func (p *Protocol) receiveBatchMessage(ctx context.Context, s *netlink.Socket, m
12541254
// The resource ID is a 16-bit value that is stored in network byte order.
12551255
// We ensure that it is in host byte order before passing it for processing.
12561256
resID := nlmsg.NetToHostU16(nfGenMsg.ResourceID)
1257-
if err := p.processBatchMessage(ctx, s, buf, ms, hdr, resID); err != nil {
1257+
if err := p.processBatchMessage(ctx, buf, ms, hdr, resID); err != nil {
12581258
log.Debugf("Failed to process batch message: %v", err)
12591259
netlink.DumpErrorMessage(hdr, ms, err.GetError())
12601260
}
@@ -1263,7 +1263,7 @@ func (p *Protocol) receiveBatchMessage(ctx context.Context, s *netlink.Socket, m
12631263
}
12641264

12651265
// processBatchMessage processes a batch message.
1266-
func (p *Protocol) processBatchMessage(ctx context.Context, s *netlink.Socket, buf []byte, ms *nlmsg.MessageSet, batchHdr linux.NetlinkMessageHeader, subsysID uint16) *syserr.AnnotatedError {
1266+
func (p *Protocol) processBatchMessage(ctx context.Context, buf []byte, ms *nlmsg.MessageSet, batchHdr linux.NetlinkMessageHeader, subsysID uint16) *syserr.AnnotatedError {
12671267
if subsysID >= linux.NFNL_SUBSYS_COUNT {
12681268
return syserr.NewAnnotatedError(syserr.ErrInvalidArgument, fmt.Sprintf("Nftables: Unknown subsystem id %d", subsysID))
12691269
}

pkg/sentry/socket/netlink/provider.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"gvisor.dev/gvisor/pkg/abi/linux"
2121
"gvisor.dev/gvisor/pkg/context"
2222
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
23+
"gvisor.dev/gvisor/pkg/sentry/inet"
2324
"gvisor.dev/gvisor/pkg/sentry/kernel"
2425
"gvisor.dev/gvisor/pkg/sentry/socket"
2526
"gvisor.dev/gvisor/pkg/sentry/socket/netlink/nlmsg"
@@ -51,6 +52,18 @@ type Protocol interface {
5152
ProcessMessage(ctx context.Context, s *Socket, msg *nlmsg.Message, ms *nlmsg.MessageSet) *syserr.Error
5253
}
5354

55+
// RouteProtocol corresponds to the NETLINK_ROUTE family.
56+
type RouteProtocol interface {
57+
Protocol
58+
59+
// AddNewLinkMessage is called when an interface is mutated or created by the stack.
60+
// It is the rough equivalent of Linux's rtnetlink_event().
61+
AddNewLinkMessage(ms *nlmsg.MessageSet, idx int32, i inet.Interface)
62+
63+
// AddDelLinkMessage is called when an interface is deleted by the stack.
64+
AddDelLinkMessage(ms *nlmsg.MessageSet, idx int32, i inet.Interface)
65+
}
66+
5467
// Provider is a function that creates a new Protocol for a specific netlink
5568
// protocol.
5669
//

0 commit comments

Comments
 (0)