linux/net/rxrpc/peer_event.c
<<
>>
Prefs
   1/* Peer event handling, typically ICMP messages.
   2 *
   3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
   4 * Written by David Howells (dhowells@redhat.com)
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/net.h>
  14#include <linux/skbuff.h>
  15#include <linux/errqueue.h>
  16#include <linux/udp.h>
  17#include <linux/in.h>
  18#include <linux/in6.h>
  19#include <linux/icmp.h>
  20#include <net/sock.h>
  21#include <net/af_rxrpc.h>
  22#include <net/ip.h>
  23#include "ar-internal.h"
  24
  25static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
  26
  27/*
  28 * Find the peer associated with an ICMP packet.
  29 */
  30static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
  31                                                     const struct sk_buff *skb,
  32                                                     struct sockaddr_rxrpc *srx)
  33{
  34        struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
  35
  36        _enter("");
  37
  38        memset(srx, 0, sizeof(*srx));
  39        srx->transport_type = local->srx.transport_type;
  40        srx->transport_len = local->srx.transport_len;
  41        srx->transport.family = local->srx.transport.family;
  42
  43        /* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
  44         * versa?
  45         */
  46        switch (srx->transport.family) {
  47        case AF_INET:
  48                srx->transport.sin.sin_port = serr->port;
  49                switch (serr->ee.ee_origin) {
  50                case SO_EE_ORIGIN_ICMP:
  51                        _net("Rx ICMP");
  52                        memcpy(&srx->transport.sin.sin_addr,
  53                               skb_network_header(skb) + serr->addr_offset,
  54                               sizeof(struct in_addr));
  55                        break;
  56                case SO_EE_ORIGIN_ICMP6:
  57                        _net("Rx ICMP6 on v4 sock");
  58                        memcpy(&srx->transport.sin.sin_addr,
  59                               skb_network_header(skb) + serr->addr_offset + 12,
  60                               sizeof(struct in_addr));
  61                        break;
  62                default:
  63                        memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
  64                               sizeof(struct in_addr));
  65                        break;
  66                }
  67                break;
  68
  69#ifdef CONFIG_AF_RXRPC_IPV6
  70        case AF_INET6:
  71                srx->transport.sin6.sin6_port = serr->port;
  72                switch (serr->ee.ee_origin) {
  73                case SO_EE_ORIGIN_ICMP6:
  74                        _net("Rx ICMP6");
  75                        memcpy(&srx->transport.sin6.sin6_addr,
  76                               skb_network_header(skb) + serr->addr_offset,
  77                               sizeof(struct in6_addr));
  78                        break;
  79                case SO_EE_ORIGIN_ICMP:
  80                        _net("Rx ICMP on v6 sock");
  81                        srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
  82                        srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
  83                        srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
  84                        memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12,
  85                               skb_network_header(skb) + serr->addr_offset,
  86                               sizeof(struct in_addr));
  87                        break;
  88                default:
  89                        memcpy(&srx->transport.sin6.sin6_addr,
  90                               &ipv6_hdr(skb)->saddr,
  91                               sizeof(struct in6_addr));
  92                        break;
  93                }
  94                break;
  95#endif
  96
  97        default:
  98                BUG();
  99        }
 100
 101        return rxrpc_lookup_peer_rcu(local, srx);
 102}
 103
 104/*
 105 * Handle an MTU/fragmentation problem.
 106 */
 107static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr)
 108{
 109        u32 mtu = serr->ee.ee_info;
 110
 111        _net("Rx ICMP Fragmentation Needed (%d)", mtu);
 112
 113        /* wind down the local interface MTU */
 114        if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
 115                peer->if_mtu = mtu;
 116                _net("I/F MTU %u", mtu);
 117        }
 118
 119        if (mtu == 0) {
 120                /* they didn't give us a size, estimate one */
 121                mtu = peer->if_mtu;
 122                if (mtu > 1500) {
 123                        mtu >>= 1;
 124                        if (mtu < 1500)
 125                                mtu = 1500;
 126                } else {
 127                        mtu -= 100;
 128                        if (mtu < peer->hdrsize)
 129                                mtu = peer->hdrsize + 4;
 130                }
 131        }
 132
 133        if (mtu < peer->mtu) {
 134                spin_lock_bh(&peer->lock);
 135                peer->mtu = mtu;
 136                peer->maxdata = peer->mtu - peer->hdrsize;
 137                spin_unlock_bh(&peer->lock);
 138                _net("Net MTU %u (maxdata %u)",
 139                     peer->mtu, peer->maxdata);
 140        }
 141}
 142
 143/*
 144 * Handle an error received on the local endpoint.
 145 */
 146void rxrpc_error_report(struct sock *sk)
 147{
 148        struct sock_exterr_skb *serr;
 149        struct sockaddr_rxrpc srx;
 150        struct rxrpc_local *local = sk->sk_user_data;
 151        struct rxrpc_peer *peer;
 152        struct sk_buff *skb;
 153
 154        _enter("%p{%d}", sk, local->debug_id);
 155
 156        skb = sock_dequeue_err_skb(sk);
 157        if (!skb) {
 158                _leave("UDP socket errqueue empty");
 159                return;
 160        }
 161        rxrpc_new_skb(skb, rxrpc_skb_rx_received);
 162        serr = SKB_EXT_ERR(skb);
 163        if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
 164                _leave("UDP empty message");
 165                rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 166                return;
 167        }
 168
 169        rcu_read_lock();
 170        peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
 171        if (peer && !rxrpc_get_peer_maybe(peer))
 172                peer = NULL;
 173        if (!peer) {
 174                rcu_read_unlock();
 175                rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 176                _leave(" [no peer]");
 177                return;
 178        }
 179
 180        trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
 181
 182        if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
 183             serr->ee.ee_type == ICMP_DEST_UNREACH &&
 184             serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
 185                rxrpc_adjust_mtu(peer, serr);
 186                rcu_read_unlock();
 187                rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 188                rxrpc_put_peer(peer);
 189                _leave(" [MTU update]");
 190                return;
 191        }
 192
 193        rxrpc_store_error(peer, serr);
 194        rcu_read_unlock();
 195        rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 196
 197        /* The ref we obtained is passed off to the work item */
 198        __rxrpc_queue_peer_error(peer);
 199        _leave("");
 200}
 201
 202/*
 203 * Map an error report to error codes on the peer record.
 204 */
 205static void rxrpc_store_error(struct rxrpc_peer *peer,
 206                              struct sock_exterr_skb *serr)
 207{
 208        struct sock_extended_err *ee;
 209        int err;
 210
 211        _enter("");
 212
 213        ee = &serr->ee;
 214
 215        err = ee->ee_errno;
 216
 217        switch (ee->ee_origin) {
 218        case SO_EE_ORIGIN_ICMP:
 219                switch (ee->ee_type) {
 220                case ICMP_DEST_UNREACH:
 221                        switch (ee->ee_code) {
 222                        case ICMP_NET_UNREACH:
 223                                _net("Rx Received ICMP Network Unreachable");
 224                                break;
 225                        case ICMP_HOST_UNREACH:
 226                                _net("Rx Received ICMP Host Unreachable");
 227                                break;
 228                        case ICMP_PORT_UNREACH:
 229                                _net("Rx Received ICMP Port Unreachable");
 230                                break;
 231                        case ICMP_NET_UNKNOWN:
 232                                _net("Rx Received ICMP Unknown Network");
 233                                break;
 234                        case ICMP_HOST_UNKNOWN:
 235                                _net("Rx Received ICMP Unknown Host");
 236                                break;
 237                        default:
 238                                _net("Rx Received ICMP DestUnreach code=%u",
 239                                     ee->ee_code);
 240                                break;
 241                        }
 242                        break;
 243
 244                case ICMP_TIME_EXCEEDED:
 245                        _net("Rx Received ICMP TTL Exceeded");
 246                        break;
 247
 248                default:
 249                        _proto("Rx Received ICMP error { type=%u code=%u }",
 250                               ee->ee_type, ee->ee_code);
 251                        break;
 252                }
 253                break;
 254
 255        case SO_EE_ORIGIN_NONE:
 256        case SO_EE_ORIGIN_LOCAL:
 257                _proto("Rx Received local error { error=%d }", err);
 258                err += RXRPC_LOCAL_ERROR_OFFSET;
 259                break;
 260
 261        case SO_EE_ORIGIN_ICMP6:
 262        default:
 263                _proto("Rx Received error report { orig=%u }", ee->ee_origin);
 264                break;
 265        }
 266
 267        peer->error_report = err;
 268}
 269
 270/*
 271 * Distribute an error that occurred on a peer
 272 */
 273void rxrpc_peer_error_distributor(struct work_struct *work)
 274{
 275        struct rxrpc_peer *peer =
 276                container_of(work, struct rxrpc_peer, error_distributor);
 277        struct rxrpc_call *call;
 278        enum rxrpc_call_completion compl;
 279        int error;
 280
 281        _enter("");
 282
 283        error = READ_ONCE(peer->error_report);
 284        if (error < RXRPC_LOCAL_ERROR_OFFSET) {
 285                compl = RXRPC_CALL_NETWORK_ERROR;
 286        } else {
 287                compl = RXRPC_CALL_LOCAL_ERROR;
 288                error -= RXRPC_LOCAL_ERROR_OFFSET;
 289        }
 290
 291        _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error);
 292
 293        spin_lock_bh(&peer->lock);
 294
 295        while (!hlist_empty(&peer->error_targets)) {
 296                call = hlist_entry(peer->error_targets.first,
 297                                   struct rxrpc_call, error_link);
 298                hlist_del_init(&call->error_link);
 299                rxrpc_see_call(call);
 300
 301                if (rxrpc_set_call_completion(call, compl, 0, -error))
 302                        rxrpc_notify_socket(call);
 303        }
 304
 305        spin_unlock_bh(&peer->lock);
 306
 307        rxrpc_put_peer(peer);
 308        _leave("");
 309}
 310
 311/*
 312 * Add RTT information to cache.  This is called in softirq mode and has
 313 * exclusive access to the peer RTT data.
 314 */
 315void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
 316                        rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
 317                        ktime_t send_time, ktime_t resp_time)
 318{
 319        struct rxrpc_peer *peer = call->peer;
 320        s64 rtt;
 321        u64 sum = peer->rtt_sum, avg;
 322        u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage;
 323
 324        rtt = ktime_to_ns(ktime_sub(resp_time, send_time));
 325        if (rtt < 0)
 326                return;
 327
 328        /* Replace the oldest datum in the RTT buffer */
 329        sum -= peer->rtt_cache[cursor];
 330        sum += rtt;
 331        peer->rtt_cache[cursor] = rtt;
 332        peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1);
 333        peer->rtt_sum = sum;
 334        if (usage < RXRPC_RTT_CACHE_SIZE) {
 335                usage++;
 336                peer->rtt_usage = usage;
 337        }
 338
 339        /* Now recalculate the average */
 340        if (usage == RXRPC_RTT_CACHE_SIZE) {
 341                avg = sum / RXRPC_RTT_CACHE_SIZE;
 342        } else {
 343                avg = sum;
 344                do_div(avg, usage);
 345        }
 346
 347        peer->rtt = avg;
 348        trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
 349                           usage, avg);
 350}
 351
 352/*
 353 * Perform keep-alive pings with VERSION packets to keep any NAT alive.
 354 */
 355void rxrpc_peer_keepalive_worker(struct work_struct *work)
 356{
 357        struct rxrpc_net *rxnet =
 358                container_of(work, struct rxrpc_net, peer_keepalive_work);
 359        struct rxrpc_peer *peer;
 360        unsigned long delay;
 361        ktime_t base, now = ktime_get_real();
 362        s64 diff;
 363        u8 cursor, slot;
 364
 365        base = rxnet->peer_keepalive_base;
 366        cursor = rxnet->peer_keepalive_cursor;
 367
 368        _enter("%u,%lld", cursor, ktime_sub(now, base));
 369
 370next_bucket:
 371        diff = ktime_to_ns(ktime_sub(now, base));
 372        if (diff < 0)
 373                goto resched;
 374
 375        _debug("at %u", cursor);
 376        spin_lock_bh(&rxnet->peer_hash_lock);
 377next_peer:
 378        if (!rxnet->live) {
 379                spin_unlock_bh(&rxnet->peer_hash_lock);
 380                goto out;
 381        }
 382
 383        /* Everything in the bucket at the cursor is processed this second; the
 384         * bucket at cursor + 1 goes now + 1s and so on...
 385         */
 386        if (hlist_empty(&rxnet->peer_keepalive[cursor])) {
 387                if (hlist_empty(&rxnet->peer_keepalive_new)) {
 388                        spin_unlock_bh(&rxnet->peer_hash_lock);
 389                        goto emptied_bucket;
 390                }
 391
 392                hlist_move_list(&rxnet->peer_keepalive_new,
 393                                &rxnet->peer_keepalive[cursor]);
 394        }
 395
 396        peer = hlist_entry(rxnet->peer_keepalive[cursor].first,
 397                           struct rxrpc_peer, keepalive_link);
 398        hlist_del_init(&peer->keepalive_link);
 399        if (!rxrpc_get_peer_maybe(peer))
 400                goto next_peer;
 401
 402        spin_unlock_bh(&rxnet->peer_hash_lock);
 403
 404        _debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport);
 405
 406recalc:
 407        diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC);
 408        if (diff < -30 || diff > 30)
 409                goto send; /* LSW of 64-bit time probably wrapped on 32-bit */
 410        diff += RXRPC_KEEPALIVE_TIME - 1;
 411        if (diff < 0)
 412                goto send;
 413
 414        slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff;
 415        if (slot == 0)
 416                goto send;
 417
 418        /* A transmission to this peer occurred since last we examined it so
 419         * put it into the appropriate future bucket.
 420         */
 421        slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive);
 422        spin_lock_bh(&rxnet->peer_hash_lock);
 423        hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]);
 424        rxrpc_put_peer(peer);
 425        goto next_peer;
 426
 427send:
 428        rxrpc_send_keepalive(peer);
 429        now = ktime_get_real();
 430        goto recalc;
 431
 432emptied_bucket:
 433        cursor++;
 434        if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive))
 435                cursor = 0;
 436        base = ktime_add_ns(base, NSEC_PER_SEC);
 437        goto next_bucket;
 438
 439resched:
 440        rxnet->peer_keepalive_base = base;
 441        rxnet->peer_keepalive_cursor = cursor;
 442        delay = nsecs_to_jiffies(-diff) + 1;
 443        timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
 444out:
 445        _leave("");
 446}
 447