LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

Re: [PATCH net-next 3/3] ipvs: strip udp tunnel headers from icmp errors

To: Julian Anastasov <ja@xxxxxx>
Subject: Re: [PATCH net-next 3/3] ipvs: strip udp tunnel headers from icmp errors
Cc: lvs-devel@xxxxxxxxxxxxxxx, Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>, netfilter-devel@xxxxxxxxxxxxxxx, Jacky Hu <hengqing.hu@xxxxxxxxx>, jacky.hu@xxxxxxxxxxx, jason.niesz@xxxxxxxxxxx
From: Simon Horman <horms@xxxxxxxxxxxx>
Date: Wed, 3 Apr 2019 10:15:11 +0200
Hi Julian,

On Sun, Mar 31, 2019 at 01:26:21PM +0300, Julian Anastasov wrote:
> Recognize UDP tunnels in received ICMP errors and
> properly strip the tunnel headers. GUE is what we
> have for now.
> 
> Signed-off-by: Julian Anastasov <ja@xxxxxx>
> ---
>  net/netfilter/ipvs/ip_vs_core.c | 58 +++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index 4447ee512b88..0b624e8e7982 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -39,6 +39,7 @@
>  #include <net/tcp.h>
>  #include <net/udp.h>
>  #include <net/icmp.h>                   /* for icmp_send */
> +#include <net/gue.h>
>  #include <net/route.h>
>  #include <net/ip6_checksum.h>
>  #include <net/netns/generic.h>               /* net_generic() */
> @@ -1579,6 +1580,34 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, 
> struct sk_buff *skb,
>       return 1;
>  }
>  
> +/* Check the UDP tunnel and return its header length */
> +static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
> +                       unsigned int offset, __u16 af, __be16 dport,
> +                       const union nf_inet_addr *daddr, __u8 *proto)
> +{
> +     struct ip_vs_dest *dest = ip_vs_find_tunnel(ipvs, af, daddr, dport);
> +
> +     if (!dest)
> +             goto unk;
> +     if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
> +             struct guehdr _gueh, *gueh;
> +
> +             gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
> +             if (!gueh)
> +                     goto unk;
> +             if (gueh->control != 0 || gueh->version != 0)
> +                     goto unk;
> +             /* Later we can support also IPPROTO_IPV6 */
> +             if (gueh->proto_ctype != IPPROTO_IPIP)
> +                     goto unk;
> +             *proto = gueh->proto_ctype;
> +             return sizeof(struct guehdr) + (gueh->hlen << 2);

I think that the gue-specific portions of the above, which is most of
ipvs_udp_decap() should be a separate helper which is part of net/gue.h or
net/ipv4/fou.c - this seems to be a subset of the logic in gue_udp_recv().

> +     }
> +
> +unk:
> +     return -1;
> +}
> +
>  /*
>   *   Handle ICMP messages in the outside-to-inside direction (incoming).
>   *   Find any that might be relevant, check against existing connections,
> @@ -1658,8 +1687,37 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff 
> *skb, int *related,
>               if (cih == NULL)
>                       return NF_ACCEPT; /* The packet looks wrong, ignore */
>               ipip = true;
> +     } else if (cih->protocol == IPPROTO_UDP) {      /* Can be UDP encap */

Can we consider factoring out the logic in this else-if clause
out into a function to avoid the use of goto?

> +             struct udphdr _udph, *udph;
> +             __u8 iproto;
> +             int ulen;
> +
> +             if (unlikely(cih->frag_off & htons(IP_OFFSET)))
> +                     return NF_ACCEPT;

I think a comment is warranted regarding the behaviour implemented
for fragments.

> +             /* Error for our tunnel must arrive at LOCAL_IN */
> +             if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
> +                     return NF_ACCEPT;
> +             offset2 = offset + cih->ihl * 4;
> +             udph = skb_header_pointer(skb, offset2, sizeof(_udph), &_udph);
> +             if (!udph)
> +                     goto check;

> +             offset2 += sizeof(struct udphdr);
> +             ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, udph->dest,
> +                                   raddr, &iproto);
> +             if (ulen < 0)
> +                     goto check;
> +             /* Skip IP + UDP + ulen */
> +             offset = offset2 + ulen;
> +             /* Now we should be at the original IP header */
> +             cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
> +             if (cih && cih->version == 4 && cih->ihl >= 5 &&
> +                 iproto == IPPROTO_IPIP)
> +                     ipip = true;
> +             else
> +                     return NF_ACCEPT;

Skipping over tunnel headers and determining the inner IP protocol really
feels like something that should be handled code common to other UDP
encapsulation implementations.

>       }
>  
> +check:
>       pd = ip_vs_proto_data_get(ipvs, cih->protocol);
>       if (!pd)
>               return NF_ACCEPT;
> -- 
> 2.17.1
> 

<Prev in Thread] Current Thread [Next in Thread>