linux/fs/afs/fs_probe.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* AFS fileserver probing
   3 *
   4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
   5 * Written by David Howells (dhowells@redhat.com)
   6 */
   7
   8#include <linux/sched.h>
   9#include <linux/slab.h>
  10#include "afs_fs.h"
  11#include "internal.h"
  12#include "protocol_afs.h"
  13#include "protocol_yfs.h"
  14
  15static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
  16static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
  17
  18/*
  19 * Start the probe polling timer.  We have to supply it with an inc on the
  20 * outstanding server count.
  21 */
  22static void afs_schedule_fs_probe(struct afs_net *net,
  23                                  struct afs_server *server, bool fast)
  24{
  25        unsigned long atj;
  26
  27        if (!net->live)
  28                return;
  29
  30        atj = server->probed_at;
  31        atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
  32
  33        afs_inc_servers_outstanding(net);
  34        if (timer_reduce(&net->fs_probe_timer, atj))
  35                afs_dec_servers_outstanding(net);
  36}
  37
  38/*
  39 * Handle the completion of a set of probes.
  40 */
  41static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
  42{
  43        bool responded = server->probe.responded;
  44
  45        write_seqlock(&net->fs_lock);
  46        if (responded) {
  47                list_add_tail(&server->probe_link, &net->fs_probe_slow);
  48        } else {
  49                server->rtt = UINT_MAX;
  50                clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
  51                list_add_tail(&server->probe_link, &net->fs_probe_fast);
  52        }
  53        write_sequnlock(&net->fs_lock);
  54
  55        afs_schedule_fs_probe(net, server, !responded);
  56}
  57
  58/*
  59 * Handle the completion of a probe.
  60 */
  61static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
  62{
  63        _enter("");
  64
  65        if (atomic_dec_and_test(&server->probe_outstanding))
  66                afs_finished_fs_probe(net, server);
  67
  68        wake_up_all(&server->probe_wq);
  69}
  70
  71/*
  72 * Handle inability to send a probe due to ENOMEM when trying to allocate a
  73 * call struct.
  74 */
  75static void afs_fs_probe_not_done(struct afs_net *net,
  76                                  struct afs_server *server,
  77                                  struct afs_addr_cursor *ac)
  78{
  79        struct afs_addr_list *alist = ac->alist;
  80        unsigned int index = ac->index;
  81
  82        _enter("");
  83
  84        trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
  85        spin_lock(&server->probe_lock);
  86
  87        server->probe.local_failure = true;
  88        if (server->probe.error == 0)
  89                server->probe.error = -ENOMEM;
  90
  91        set_bit(index, &alist->failed);
  92
  93        spin_unlock(&server->probe_lock);
  94        return afs_done_one_fs_probe(net, server);
  95}
  96
  97/*
  98 * Process the result of probing a fileserver.  This is called after successful
  99 * or failed delivery of an FS.GetCapabilities operation.
 100 */
 101void afs_fileserver_probe_result(struct afs_call *call)
 102{
 103        struct afs_addr_list *alist = call->alist;
 104        struct afs_server *server = call->server;
 105        unsigned int index = call->addr_ix;
 106        unsigned int rtt_us = 0, cap0;
 107        int ret = call->error;
 108
 109        _enter("%pU,%u", &server->uuid, index);
 110
 111        spin_lock(&server->probe_lock);
 112
 113        switch (ret) {
 114        case 0:
 115                server->probe.error = 0;
 116                goto responded;
 117        case -ECONNABORTED:
 118                if (!server->probe.responded) {
 119                        server->probe.abort_code = call->abort_code;
 120                        server->probe.error = ret;
 121                }
 122                goto responded;
 123        case -ENOMEM:
 124        case -ENONET:
 125                clear_bit(index, &alist->responded);
 126                server->probe.local_failure = true;
 127                trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
 128                goto out;
 129        case -ECONNRESET: /* Responded, but call expired. */
 130        case -ERFKILL:
 131        case -EADDRNOTAVAIL:
 132        case -ENETUNREACH:
 133        case -EHOSTUNREACH:
 134        case -EHOSTDOWN:
 135        case -ECONNREFUSED:
 136        case -ETIMEDOUT:
 137        case -ETIME:
 138        default:
 139                clear_bit(index, &alist->responded);
 140                set_bit(index, &alist->failed);
 141                if (!server->probe.responded &&
 142                    (server->probe.error == 0 ||
 143                     server->probe.error == -ETIMEDOUT ||
 144                     server->probe.error == -ETIME))
 145                        server->probe.error = ret;
 146                trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
 147                goto out;
 148        }
 149
 150responded:
 151        clear_bit(index, &alist->failed);
 152
 153        if (call->service_id == YFS_FS_SERVICE) {
 154                server->probe.is_yfs = true;
 155                set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
 156                alist->addrs[index].srx_service = call->service_id;
 157        } else {
 158                server->probe.not_yfs = true;
 159                if (!server->probe.is_yfs) {
 160                        clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
 161                        alist->addrs[index].srx_service = call->service_id;
 162                }
 163                cap0 = ntohl(call->tmp);
 164                if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
 165                        set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
 166                else
 167                        clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
 168        }
 169
 170        if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
 171            rtt_us < server->probe.rtt) {
 172                server->probe.rtt = rtt_us;
 173                server->rtt = rtt_us;
 174                alist->preferred = index;
 175        }
 176
 177        smp_wmb(); /* Set rtt before responded. */
 178        server->probe.responded = true;
 179        set_bit(index, &alist->responded);
 180        set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
 181out:
 182        spin_unlock(&server->probe_lock);
 183
 184        _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
 185               &server->uuid, index, &alist->addrs[index].transport,
 186               rtt_us, ret);
 187
 188        return afs_done_one_fs_probe(call->net, server);
 189}
 190
 191/*
 192 * Probe one or all of a fileserver's addresses to find out the best route and
 193 * to query its capabilities.
 194 */
 195void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
 196                             struct key *key, bool all)
 197{
 198        struct afs_addr_cursor ac = {
 199                .index = 0,
 200        };
 201
 202        _enter("%pU", &server->uuid);
 203
 204        read_lock(&server->fs_lock);
 205        ac.alist = rcu_dereference_protected(server->addresses,
 206                                             lockdep_is_held(&server->fs_lock));
 207        afs_get_addrlist(ac.alist);
 208        read_unlock(&server->fs_lock);
 209
 210        server->probed_at = jiffies;
 211        atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
 212        memset(&server->probe, 0, sizeof(server->probe));
 213        server->probe.rtt = UINT_MAX;
 214
 215        ac.index = ac.alist->preferred;
 216        if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
 217                all = true;
 218
 219        if (all) {
 220                for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
 221                        if (!afs_fs_get_capabilities(net, server, &ac, key))
 222                                afs_fs_probe_not_done(net, server, &ac);
 223        } else {
 224                if (!afs_fs_get_capabilities(net, server, &ac, key))
 225                        afs_fs_probe_not_done(net, server, &ac);
 226        }
 227
 228        afs_put_addrlist(ac.alist);
 229}
 230
 231/*
 232 * Wait for the first as-yet untried fileserver to respond.
 233 */
 234int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
 235{
 236        struct wait_queue_entry *waits;
 237        struct afs_server *server;
 238        unsigned int rtt = UINT_MAX, rtt_s;
 239        bool have_responders = false;
 240        int pref = -1, i;
 241
 242        _enter("%u,%lx", slist->nr_servers, untried);
 243
 244        /* Only wait for servers that have a probe outstanding. */
 245        for (i = 0; i < slist->nr_servers; i++) {
 246                if (test_bit(i, &untried)) {
 247                        server = slist->servers[i].server;
 248                        if (!atomic_read(&server->probe_outstanding))
 249                                __clear_bit(i, &untried);
 250                        if (server->probe.responded)
 251                                have_responders = true;
 252                }
 253        }
 254        if (have_responders || !untried)
 255                return 0;
 256
 257        waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
 258        if (!waits)
 259                return -ENOMEM;
 260
 261        for (i = 0; i < slist->nr_servers; i++) {
 262                if (test_bit(i, &untried)) {
 263                        server = slist->servers[i].server;
 264                        init_waitqueue_entry(&waits[i], current);
 265                        add_wait_queue(&server->probe_wq, &waits[i]);
 266                }
 267        }
 268
 269        for (;;) {
 270                bool still_probing = false;
 271
 272                set_current_state(TASK_INTERRUPTIBLE);
 273                for (i = 0; i < slist->nr_servers; i++) {
 274                        if (test_bit(i, &untried)) {
 275                                server = slist->servers[i].server;
 276                                if (server->probe.responded)
 277                                        goto stop;
 278                                if (atomic_read(&server->probe_outstanding))
 279                                        still_probing = true;
 280                        }
 281                }
 282
 283                if (!still_probing || signal_pending(current))
 284                        goto stop;
 285                schedule();
 286        }
 287
 288stop:
 289        set_current_state(TASK_RUNNING);
 290
 291        for (i = 0; i < slist->nr_servers; i++) {
 292                if (test_bit(i, &untried)) {
 293                        server = slist->servers[i].server;
 294                        rtt_s = READ_ONCE(server->rtt);
 295                        if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
 296                            rtt_s < rtt) {
 297                                pref = i;
 298                                rtt = rtt_s;
 299                        }
 300
 301                        remove_wait_queue(&server->probe_wq, &waits[i]);
 302                }
 303        }
 304
 305        kfree(waits);
 306
 307        if (pref == -1 && signal_pending(current))
 308                return -ERESTARTSYS;
 309
 310        if (pref >= 0)
 311                slist->preferred = pref;
 312        return 0;
 313}
 314
 315/*
 316 * Probe timer.  We have an increment on fs_outstanding that we need to pass
 317 * along to the work item.
 318 */
 319void afs_fs_probe_timer(struct timer_list *timer)
 320{
 321        struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
 322
 323        if (!net->live || !queue_work(afs_wq, &net->fs_prober))
 324                afs_dec_servers_outstanding(net);
 325}
 326
 327/*
 328 * Dispatch a probe to a server.
 329 */
 330static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
 331        __releases(&net->fs_lock)
 332{
 333        struct key *key = NULL;
 334
 335        /* We remove it from the queues here - it will be added back to
 336         * one of the queues on the completion of the probe.
 337         */
 338        list_del_init(&server->probe_link);
 339
 340        afs_get_server(server, afs_server_trace_get_probe);
 341        write_sequnlock(&net->fs_lock);
 342
 343        afs_fs_probe_fileserver(net, server, key, all);
 344        afs_put_server(net, server, afs_server_trace_put_probe);
 345}
 346
 347/*
 348 * Probe a server immediately without waiting for its due time to come
 349 * round.  This is used when all of the addresses have been tried.
 350 */
 351void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
 352{
 353        write_seqlock(&net->fs_lock);
 354        if (!list_empty(&server->probe_link))
 355                return afs_dispatch_fs_probe(net, server, true);
 356        write_sequnlock(&net->fs_lock);
 357}
 358
 359/*
 360 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
 361 */
 362void afs_fs_probe_dispatcher(struct work_struct *work)
 363{
 364        struct afs_net *net = container_of(work, struct afs_net, fs_prober);
 365        struct afs_server *fast, *slow, *server;
 366        unsigned long nowj, timer_at, poll_at;
 367        bool first_pass = true, set_timer = false;
 368
 369        if (!net->live)
 370                return;
 371
 372        _enter("");
 373
 374        if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
 375                _leave(" [none]");
 376                return;
 377        }
 378
 379again:
 380        write_seqlock(&net->fs_lock);
 381
 382        fast = slow = server = NULL;
 383        nowj = jiffies;
 384        timer_at = nowj + MAX_JIFFY_OFFSET;
 385
 386        if (!list_empty(&net->fs_probe_fast)) {
 387                fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
 388                poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
 389                if (time_before(nowj, poll_at)) {
 390                        timer_at = poll_at;
 391                        set_timer = true;
 392                        fast = NULL;
 393                }
 394        }
 395
 396        if (!list_empty(&net->fs_probe_slow)) {
 397                slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
 398                poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
 399                if (time_before(nowj, poll_at)) {
 400                        if (time_before(poll_at, timer_at))
 401                            timer_at = poll_at;
 402                        set_timer = true;
 403                        slow = NULL;
 404                }
 405        }
 406
 407        server = fast ?: slow;
 408        if (server)
 409                _debug("probe %pU", &server->uuid);
 410
 411        if (server && (first_pass || !need_resched())) {
 412                afs_dispatch_fs_probe(net, server, server == fast);
 413                first_pass = false;
 414                goto again;
 415        }
 416
 417        write_sequnlock(&net->fs_lock);
 418
 419        if (server) {
 420                if (!queue_work(afs_wq, &net->fs_prober))
 421                        afs_dec_servers_outstanding(net);
 422                _leave(" [requeue]");
 423        } else if (set_timer) {
 424                if (timer_reduce(&net->fs_probe_timer, timer_at))
 425                        afs_dec_servers_outstanding(net);
 426                _leave(" [timer]");
 427        } else {
 428                afs_dec_servers_outstanding(net);
 429                _leave(" [quiesce]");
 430        }
 431}
 432
 433/*
 434 * Wait for a probe on a particular fileserver to complete for 2s.
 435 */
 436int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
 437{
 438        struct wait_queue_entry wait;
 439        unsigned long timo = 2 * HZ;
 440
 441        if (atomic_read(&server->probe_outstanding) == 0)
 442                goto dont_wait;
 443
 444        init_wait_entry(&wait, 0);
 445        for (;;) {
 446                prepare_to_wait_event(&server->probe_wq, &wait,
 447                                      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
 448                if (timo == 0 ||
 449                    server->probe.responded ||
 450                    atomic_read(&server->probe_outstanding) == 0 ||
 451                    (is_intr && signal_pending(current)))
 452                        break;
 453                timo = schedule_timeout(timo);
 454        }
 455
 456        finish_wait(&server->probe_wq, &wait);
 457
 458dont_wait:
 459        if (server->probe.responded)
 460                return 0;
 461        if (is_intr && signal_pending(current))
 462                return -ERESTARTSYS;
 463        if (timo == 0)
 464                return -ETIME;
 465        return -EDESTADDRREQ;
 466}
 467
 468/*
 469 * Clean up the probing when the namespace is killed off.
 470 */
 471void afs_fs_probe_cleanup(struct afs_net *net)
 472{
 473        if (del_timer_sync(&net->fs_probe_timer))
 474                afs_dec_servers_outstanding(net);
 475}
 476