From 034b40fbb8944cd775d63a58d217f1ec8a862df8 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 10 Dec 2024 13:42:27 +0100 Subject: [PATCH] netdev-tc-offloads: Don't offload header modification on ip fragments. While offloading header modifications to TC, OVS is using {TCA_PEDIT} + {TCA_CSUM} combination as that it the only way to represent header rewrite. However, {TCA_CSUM} is unable to calculate L4 checksums for IP fragments. Since TC already applies fragmentation bit masking, this patch simply needs to prevent these packets from being processed through TC. Reported-at: https://issues.redhat.com/browse/FDP-545 Acked-by: Ilya Maximets Signed-off-by: Eelco Chaudron --- lib/netdev-offload-tc.c | 39 +++++++++++++++++ lib/tc.c | 5 ++- tests/system-traffic.at | 92 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 3be1c08d24f..44b056535f3 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1490,6 +1490,31 @@ parse_put_flow_ct_action(struct tc_flower *flower, return 0; } +/* This function returns true if the tc layer will add a l4 checksum action + * for this set action. Refer to the csum_update_flag() function for + * detailed logic. Note that even the kernel only supports updating TCP, + * UDP and ICMPv6. + */ +static bool +tc_will_add_l4_checksum(struct tc_flower *flower, int type) +{ + switch (type) { + case OVS_KEY_ATTR_IPV4: + case OVS_KEY_ATTR_IPV6: + case OVS_KEY_ATTR_TCP: + case OVS_KEY_ATTR_UDP: + switch (flower->key.ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_ICMPV6: + case IPPROTO_UDPLITE: + return true; + } + break; + } + return false; +} + static int parse_put_flow_set_masked_action(struct tc_flower *flower, struct tc_action *action, @@ -1522,6 +1547,14 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, return EOPNOTSUPP; } + if (flower->key.flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT + && tc_will_add_l4_checksum(flower, type)) { + VLOG_DBG_RL(&rl, "set action type %d not supported on fragments " + "due to checksum limitation", type); + ofpbuf_uninit(&set_buf); + return EOPNOTSUPP; + } + for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) { struct netlink_field *f = &set_flower_map[type][i]; @@ -2447,6 +2480,12 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, } mask->nw_frag = 0; + } else { + /* This scenario should not occur. Currently, all installed IP DP + * flows perform a fully masked match on the fragmentation bits. + * However, since TC depends on this behavior, we return ENOTSUPP + * for now in case this behavior changes in the future. */ + return EOPNOTSUPP; } if (key->nw_proto == IPPROTO_TCP) { diff --git a/lib/tc.c b/lib/tc.c index e55ba3b1bbc..2510d0a3ff2 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -2958,7 +2958,10 @@ csum_update_flag(struct tc_flower *flower, * eth(dst=),eth_type(0x0800) actions=set(ipv4(src=)) * we need to force a more specific flow as this can, for example, * need a recalculation of icmp checksum if the packet that passes - * is ICMPv6 and tcp checksum if its tcp. */ + * is ICMPv6 and tcp checksum if its tcp. + * + * This section of the code must be kept in sync with the pre-check + * function in netdev-offload-tc.c, tc_will_add_l4_checksum(). */ switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: diff --git a/tests/system-traffic.at b/tests/system-traffic.at index a327702bbdf..3cc46600a71 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -2525,6 +2525,98 @@ AT_CHECK([ovs-appctl revalidator/resume]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([datapath - mod_nw_src/set_field on IP fragments]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", 36:b1:ee:7c:01:03) +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", 36:b1:ee:7c:01:02) + +AT_DATA([flows.txt], [dnl + in_port=ovs-p0,ip,nw_src=10.1.1.1 actions=mod_nw_src=11.1.1.1,ovs-p1 + in_port=ovs-p0,ipv6,ipv6_src=fc00::1 actions=set_field:fc00::100->ipv6_src,ovs-p1 +]) + +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl -Oopenflow13 add-flows br0 flows.txt]) + +NETNS_DAEMONIZE([at_ns1], + [tcpdump -l -nn -xx -U -i p1 -w p1.pcap 2> tcpdump.err], + [tcpdump.pid]) +OVS_WAIT_UNTIL([grep "listening" tcpdump.err]) + +dnl IPv4 Packet content: +dnl Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02 +dnl Type: IPv4 (0x0800) +dnl Internet Protocol Version 4, Src: 10.1.1.1, Dst: 10.1.1.2 +dnl 0100 .... = Version: 4 +dnl .... 0101 = Header Length: 20 bytes (5) +dnl Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT) +dnl Total Length: 38 +dnl Identification: 0x0001 (1) +dnl 001. .... = Flags: 0x1, More fragments +dnl 0... .... = Reserved bit: Not set +dnl .0.. .... = Don't fragment: Not set +dnl ..1. .... = More fragments: Set +dnl ...0 0000 0000 0000 = Fragment Offset: 0 +dnl Time to Live: 64 +dnl Protocol: UDP (17) +dnl Header Checksum: 0x44c2 +dnl Data (18 bytes) +eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 08 00" +ip="45 00 00 26 00 01 20 00 40 11 44 c2 0a 01 01 01 0a 01 01 02" +data="0b c4 08 84 00 26 e9 64 01 02 03 04 05 06 07 08 09 0a" +packet="${eth} ${ip} ${data}" + +dnl We send each packet multiple times, one for learning, which will go to +dnl ovs-vswitchd, and the others will go through the actual datapath. +for i in 1 2 3 4 5; do + NS_CHECK_EXEC([at_ns0], + [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null]) +done + +dnl Update source address and checksums in original packet for comparison. +packet=$(echo "$packet" | sed -e 's/ //g' \ + -e 's/0a010101/0b010101/g' -e 's/44c2/43c2/g' -e 's/e964/e864/g') +OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5]) + +dnl Repeat similar test with IPv6. +dnl Packet content: +dnl Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02 +dnl Type: IPv6 (0x86dd) +dnl Internet Protocol Version 6, Src: fc00::1, Dst: fc00::2 +dnl Payload Length: 24 +dnl Next Header: Fragment Header for IPv6 (44) +dnl Hop Limit: 64 +dnl Fragment Header for IPv6 +dnl Next header: UDP (17) +dnl Reserved octet: 0x00 +dnl 0000 0000 0000 0... = Offset: 0 (0 bytes) +dnl .... .... .... .00. = Reserved bits: 0 +dnl .... .... .... ...1 = More Fragments: Yes +dnl Identification: 0x2316ab36 +dnl Data (16 bytes) +eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 86 dd" +ip="60 00 00 00 00 18 2c 40 fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ + fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 11 00 00 01 23 16 ab 36" +data="0b c4 08 84 00 26 07 65 01 02 03 04 05 06 07 08" +packet="${eth} ${ip} ${data}" + +for i in 1 2 3 4 5; do + NS_CHECK_EXEC([at_ns0], + [$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null]) +done + +dnl Update checksum and source address in original packet for comparison. +packet=$(echo "$packet" | sed -e 's/ //g' -e 's/0765/0666/g' -e \ + 's/fc000000000000000000000000000001/fc000000000000000000000000000100/g') +OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([MPLS]) AT_SETUP([mpls - encap header dp-support])