linux/drivers/staging/lustre/lustre/ptlrpc/pinger.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.gnu.org/licenses/gpl-2.0.html
  19 *
  20 * GPL HEADER END
  21 */
  22/*
  23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24 * Use is subject to license terms.
  25 *
  26 * Copyright (c) 2011, 2015, Intel Corporation.
  27 */
  28/*
  29 * This file is part of Lustre, http://www.lustre.org/
  30 * Lustre is a trademark of Sun Microsystems, Inc.
  31 *
  32 * lustre/ptlrpc/pinger.c
  33 *
  34 * Portal-RPC reconnection and replay operations, for use in recovery.
  35 */
  36
  37#define DEBUG_SUBSYSTEM S_RPC
  38
  39#include <obd_support.h>
  40#include <obd_class.h>
  41#include "ptlrpc_internal.h"
  42
  43struct mutex pinger_mutex;
  44static LIST_HEAD(pinger_imports);
  45static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list);
  46
  47struct ptlrpc_request *
  48ptlrpc_prep_ping(struct obd_import *imp)
  49{
  50        struct ptlrpc_request *req;
  51
  52        req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING,
  53                                        LUSTRE_OBD_VERSION, OBD_PING);
  54        if (req) {
  55                ptlrpc_request_set_replen(req);
  56                req->rq_no_resend = 1;
  57                req->rq_no_delay = 1;
  58        }
  59        return req;
  60}
  61
  62int ptlrpc_obd_ping(struct obd_device *obd)
  63{
  64        int rc;
  65        struct ptlrpc_request *req;
  66
  67        req = ptlrpc_prep_ping(obd->u.cli.cl_import);
  68        if (!req)
  69                return -ENOMEM;
  70
  71        req->rq_send_state = LUSTRE_IMP_FULL;
  72
  73        rc = ptlrpc_queue_wait(req);
  74
  75        ptlrpc_req_finished(req);
  76
  77        return rc;
  78}
  79EXPORT_SYMBOL(ptlrpc_obd_ping);
  80
  81static int ptlrpc_ping(struct obd_import *imp)
  82{
  83        struct ptlrpc_request *req;
  84
  85        req = ptlrpc_prep_ping(imp);
  86        if (!req) {
  87                CERROR("OOM trying to ping %s->%s\n",
  88                       imp->imp_obd->obd_uuid.uuid,
  89                       obd2cli_tgt(imp->imp_obd));
  90                return -ENOMEM;
  91        }
  92
  93        DEBUG_REQ(D_INFO, req, "pinging %s->%s",
  94                  imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
  95        ptlrpcd_add_req(req);
  96
  97        return 0;
  98}
  99
 100static void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
 101{
 102        int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL;
 103
 104        if (imp->imp_state == LUSTRE_IMP_DISCON) {
 105                int dtime = max_t(int, CONNECTION_SWITCH_MIN,
 106                                  AT_OFF ? 0 :
 107                                  at_get(&imp->imp_at.iat_net_latency));
 108                time = min(time, dtime);
 109        }
 110        imp->imp_next_ping = cfs_time_shift(time);
 111}
 112
 113static inline int imp_is_deactive(struct obd_import *imp)
 114{
 115        return (imp->imp_deactive ||
 116                OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE));
 117}
 118
 119static inline int ptlrpc_next_reconnect(struct obd_import *imp)
 120{
 121        if (imp->imp_server_timeout)
 122                return cfs_time_shift(obd_timeout / 2);
 123        else
 124                return cfs_time_shift(obd_timeout);
 125}
 126
 127static long pinger_check_timeout(unsigned long time)
 128{
 129        struct timeout_item *item;
 130        unsigned long timeout = PING_INTERVAL;
 131
 132        /* The timeout list is a increase order sorted list */
 133        mutex_lock(&pinger_mutex);
 134        list_for_each_entry(item, &timeout_list, ti_chain) {
 135                int ti_timeout = item->ti_timeout;
 136
 137                if (timeout > ti_timeout)
 138                        timeout = ti_timeout;
 139                break;
 140        }
 141        mutex_unlock(&pinger_mutex);
 142
 143        return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)),
 144                                         cfs_time_current());
 145}
 146
 147static bool ir_up;
 148
 149void ptlrpc_pinger_ir_up(void)
 150{
 151        CDEBUG(D_HA, "IR up\n");
 152        ir_up = true;
 153}
 154EXPORT_SYMBOL(ptlrpc_pinger_ir_up);
 155
 156void ptlrpc_pinger_ir_down(void)
 157{
 158        CDEBUG(D_HA, "IR down\n");
 159        ir_up = false;
 160}
 161EXPORT_SYMBOL(ptlrpc_pinger_ir_down);
 162
 163static void ptlrpc_pinger_process_import(struct obd_import *imp,
 164                                         unsigned long this_ping)
 165{
 166        int level;
 167        int force;
 168        int force_next;
 169        int suppress;
 170
 171        spin_lock(&imp->imp_lock);
 172
 173        level = imp->imp_state;
 174        force = imp->imp_force_verify;
 175        force_next = imp->imp_force_next_verify;
 176        /*
 177         * This will be used below only if the import is "FULL".
 178         */
 179        suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS);
 180
 181        imp->imp_force_verify = 0;
 182
 183        if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) &&
 184            !force) {
 185                spin_unlock(&imp->imp_lock);
 186                return;
 187        }
 188
 189        imp->imp_force_next_verify = 0;
 190
 191        spin_unlock(&imp->imp_lock);
 192
 193        CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n",
 194               imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
 195               ptlrpc_import_state_name(level), level, force, force_next,
 196               imp->imp_deactive, imp->imp_pingable, suppress);
 197
 198        if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
 199                /* wait for a while before trying recovery again */
 200                imp->imp_next_ping = ptlrpc_next_reconnect(imp);
 201                if (!imp->imp_no_pinger_recover)
 202                        ptlrpc_initiate_recovery(imp);
 203        } else if (level != LUSTRE_IMP_FULL ||
 204                   imp->imp_obd->obd_no_recov ||
 205                   imp_is_deactive(imp)) {
 206                CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n",
 207                       imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
 208                       ptlrpc_import_state_name(level));
 209                if (force) {
 210                        spin_lock(&imp->imp_lock);
 211                        imp->imp_force_verify = 1;
 212                        spin_unlock(&imp->imp_lock);
 213                }
 214        } else if ((imp->imp_pingable && !suppress) || force_next || force) {
 215                ptlrpc_ping(imp);
 216        }
 217}
 218
 219static int ptlrpc_pinger_main(void *arg)
 220{
 221        struct ptlrpc_thread *thread = arg;
 222
 223        /* Record that the thread is running */
 224        thread_set_flags(thread, SVC_RUNNING);
 225        wake_up(&thread->t_ctl_waitq);
 226
 227        /* And now, loop forever, pinging as needed. */
 228        while (1) {
 229                unsigned long this_ping = cfs_time_current();
 230                struct l_wait_info lwi;
 231                long time_to_next_wake;
 232                struct timeout_item *item;
 233                struct list_head *iter;
 234
 235                mutex_lock(&pinger_mutex);
 236                list_for_each_entry(item, &timeout_list, ti_chain) {
 237                        item->ti_cb(item, item->ti_cb_data);
 238                }
 239                list_for_each(iter, &pinger_imports) {
 240                        struct obd_import *imp =
 241                                list_entry(iter, struct obd_import,
 242                                           imp_pinger_chain);
 243
 244                        ptlrpc_pinger_process_import(imp, this_ping);
 245                        /* obd_timeout might have changed */
 246                        if (imp->imp_pingable && imp->imp_next_ping &&
 247                            cfs_time_after(imp->imp_next_ping,
 248                                           cfs_time_add(this_ping,
 249                                                        cfs_time_seconds(PING_INTERVAL))))
 250                                ptlrpc_update_next_ping(imp, 0);
 251                }
 252                mutex_unlock(&pinger_mutex);
 253
 254                /* Wait until the next ping time, or until we're stopped. */
 255                time_to_next_wake = pinger_check_timeout(this_ping);
 256                /* The ping sent by ptlrpc_send_rpc may get sent out
 257                 * say .01 second after this.
 258                 * ptlrpc_pinger_sending_on_import will then set the
 259                 * next ping time to next_ping + .01 sec, which means
 260                 * we will SKIP the next ping at next_ping, and the
 261                 * ping will get sent 2 timeouts from now!  Beware.
 262                 */
 263                CDEBUG(D_INFO, "next wakeup in " CFS_DURATION_T " (%ld)\n",
 264                       time_to_next_wake,
 265                       cfs_time_add(this_ping,
 266                                    cfs_time_seconds(PING_INTERVAL)));
 267                if (time_to_next_wake > 0) {
 268                        lwi = LWI_TIMEOUT(max_t(long, time_to_next_wake,
 269                                                cfs_time_seconds(1)),
 270                                          NULL, NULL);
 271                        l_wait_event(thread->t_ctl_waitq,
 272                                     thread_is_stopping(thread) ||
 273                                     thread_is_event(thread),
 274                                     &lwi);
 275                        if (thread_test_and_clear_flags(thread, SVC_STOPPING))
 276                                break;
 277                        /* woken after adding import to reset timer */
 278                        thread_test_and_clear_flags(thread, SVC_EVENT);
 279                }
 280        }
 281
 282        thread_set_flags(thread, SVC_STOPPED);
 283        wake_up(&thread->t_ctl_waitq);
 284
 285        CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid());
 286        return 0;
 287}
 288
 289static struct ptlrpc_thread pinger_thread;
 290
 291int ptlrpc_start_pinger(void)
 292{
 293        struct l_wait_info lwi = { 0 };
 294        struct task_struct *task;
 295        int rc;
 296
 297        if (!thread_is_init(&pinger_thread) &&
 298            !thread_is_stopped(&pinger_thread))
 299                return -EALREADY;
 300
 301        init_waitqueue_head(&pinger_thread.t_ctl_waitq);
 302
 303        strcpy(pinger_thread.t_name, "ll_ping");
 304
 305        task = kthread_run(ptlrpc_pinger_main, &pinger_thread,
 306                           pinger_thread.t_name);
 307        if (IS_ERR(task)) {
 308                rc = PTR_ERR(task);
 309                CERROR("cannot start pinger thread: rc = %d\n", rc);
 310                return rc;
 311        }
 312        l_wait_event(pinger_thread.t_ctl_waitq,
 313                     thread_is_running(&pinger_thread), &lwi);
 314
 315        return 0;
 316}
 317
 318static int ptlrpc_pinger_remove_timeouts(void);
 319
 320int ptlrpc_stop_pinger(void)
 321{
 322        struct l_wait_info lwi = { 0 };
 323        int rc = 0;
 324
 325        if (thread_is_init(&pinger_thread) ||
 326            thread_is_stopped(&pinger_thread))
 327                return -EALREADY;
 328
 329        ptlrpc_pinger_remove_timeouts();
 330        thread_set_flags(&pinger_thread, SVC_STOPPING);
 331        wake_up(&pinger_thread.t_ctl_waitq);
 332
 333        l_wait_event(pinger_thread.t_ctl_waitq,
 334                     thread_is_stopped(&pinger_thread), &lwi);
 335
 336        return rc;
 337}
 338
 339void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 340{
 341        ptlrpc_update_next_ping(imp, 0);
 342}
 343
 344void ptlrpc_pinger_commit_expected(struct obd_import *imp)
 345{
 346        ptlrpc_update_next_ping(imp, 1);
 347        assert_spin_locked(&imp->imp_lock);
 348        /*
 349         * Avoid reading stale imp_connect_data.  When not sure if pings are
 350         * expected or not on next connection, we assume they are not and force
 351         * one anyway to guarantee the chance of updating
 352         * imp_peer_committed_transno.
 353         */
 354        if (imp->imp_state != LUSTRE_IMP_FULL ||
 355            OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS))
 356                imp->imp_force_next_verify = 1;
 357}
 358
 359int ptlrpc_pinger_add_import(struct obd_import *imp)
 360{
 361        if (!list_empty(&imp->imp_pinger_chain))
 362                return -EALREADY;
 363
 364        mutex_lock(&pinger_mutex);
 365        CDEBUG(D_HA, "adding pingable import %s->%s\n",
 366               imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
 367        /* if we add to pinger we want recovery on this import */
 368        imp->imp_obd->obd_no_recov = 0;
 369        ptlrpc_update_next_ping(imp, 0);
 370        /* XXX sort, blah blah */
 371        list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
 372        class_import_get(imp);
 373
 374        ptlrpc_pinger_wake_up();
 375        mutex_unlock(&pinger_mutex);
 376
 377        return 0;
 378}
 379EXPORT_SYMBOL(ptlrpc_pinger_add_import);
 380
 381int ptlrpc_pinger_del_import(struct obd_import *imp)
 382{
 383        if (list_empty(&imp->imp_pinger_chain))
 384                return -ENOENT;
 385
 386        mutex_lock(&pinger_mutex);
 387        list_del_init(&imp->imp_pinger_chain);
 388        CDEBUG(D_HA, "removing pingable import %s->%s\n",
 389               imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
 390        /* if we remove from pinger we don't want recovery on this import */
 391        imp->imp_obd->obd_no_recov = 1;
 392        class_import_put(imp);
 393        mutex_unlock(&pinger_mutex);
 394        return 0;
 395}
 396EXPORT_SYMBOL(ptlrpc_pinger_del_import);
 397
 398/**
 399 * Register a timeout callback to the pinger list, and the callback will
 400 * be called when timeout happens.
 401 */
 402static struct timeout_item *ptlrpc_new_timeout(int time,
 403                                               enum timeout_event event,
 404                                               timeout_cb_t cb, void *data)
 405{
 406        struct timeout_item *ti;
 407
 408        ti = kzalloc(sizeof(*ti), GFP_NOFS);
 409        if (!ti)
 410                return NULL;
 411
 412        INIT_LIST_HEAD(&ti->ti_obd_list);
 413        INIT_LIST_HEAD(&ti->ti_chain);
 414        ti->ti_timeout = time;
 415        ti->ti_event = event;
 416        ti->ti_cb = cb;
 417        ti->ti_cb_data = data;
 418
 419        return ti;
 420}
 421
 422/**
 423 * Register timeout event on the pinger thread.
 424 * Note: the timeout list is an sorted list with increased timeout value.
 425 */
 426static struct timeout_item*
 427ptlrpc_pinger_register_timeout(int time, enum timeout_event event,
 428                               timeout_cb_t cb, void *data)
 429{
 430        struct timeout_item *item, *tmp;
 431
 432        LASSERT(mutex_is_locked(&pinger_mutex));
 433
 434        list_for_each_entry(item, &timeout_list, ti_chain)
 435                if (item->ti_event == event)
 436                        goto out;
 437
 438        item = ptlrpc_new_timeout(time, event, cb, data);
 439        if (item) {
 440                list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) {
 441                        if (tmp->ti_timeout < time) {
 442                                list_add(&item->ti_chain, &tmp->ti_chain);
 443                                goto out;
 444                        }
 445                }
 446                list_add(&item->ti_chain, &timeout_list);
 447        }
 448out:
 449        return item;
 450}
 451
 452/* Add a client_obd to the timeout event list, when timeout(@time)
 453 * happens, the callback(@cb) will be called.
 454 */
 455int ptlrpc_add_timeout_client(int time, enum timeout_event event,
 456                              timeout_cb_t cb, void *data,
 457                              struct list_head *obd_list)
 458{
 459        struct timeout_item *ti;
 460
 461        mutex_lock(&pinger_mutex);
 462        ti = ptlrpc_pinger_register_timeout(time, event, cb, data);
 463        if (!ti) {
 464                mutex_unlock(&pinger_mutex);
 465                return -EINVAL;
 466        }
 467        list_add(obd_list, &ti->ti_obd_list);
 468        mutex_unlock(&pinger_mutex);
 469        return 0;
 470}
 471EXPORT_SYMBOL(ptlrpc_add_timeout_client);
 472
 473int ptlrpc_del_timeout_client(struct list_head *obd_list,
 474                              enum timeout_event event)
 475{
 476        struct timeout_item *ti = NULL, *item;
 477
 478        if (list_empty(obd_list))
 479                return 0;
 480        mutex_lock(&pinger_mutex);
 481        list_del_init(obd_list);
 482        /**
 483         * If there are no obd attached to the timeout event
 484         * list, remove this timeout event from the pinger
 485         */
 486        list_for_each_entry(item, &timeout_list, ti_chain) {
 487                if (item->ti_event == event) {
 488                        ti = item;
 489                        break;
 490                }
 491        }
 492        if (list_empty(&ti->ti_obd_list)) {
 493                list_del(&ti->ti_chain);
 494                kfree(ti);
 495        }
 496        mutex_unlock(&pinger_mutex);
 497        return 0;
 498}
 499EXPORT_SYMBOL(ptlrpc_del_timeout_client);
 500
 501static int ptlrpc_pinger_remove_timeouts(void)
 502{
 503        struct timeout_item *item, *tmp;
 504
 505        mutex_lock(&pinger_mutex);
 506        list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) {
 507                LASSERT(list_empty(&item->ti_obd_list));
 508                list_del(&item->ti_chain);
 509                kfree(item);
 510        }
 511        mutex_unlock(&pinger_mutex);
 512        return 0;
 513}
 514
 515void ptlrpc_pinger_wake_up(void)
 516{
 517        thread_add_flags(&pinger_thread, SVC_EVENT);
 518        wake_up(&pinger_thread.t_ctl_waitq);
 519}
 520