linux/drivers/staging/lustre/lnet/libcfs/workitem.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.gnu.org/licenses/gpl-2.0.html
  19 *
  20 * GPL HEADER END
  21 */
  22/*
  23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  24 * Use is subject to license terms.
  25 *
  26 * Copyright (c) 2011, 2012, Intel Corporation.
  27 */
  28/*
  29 * This file is part of Lustre, http://www.lustre.org/
  30 * Lustre is a trademark of Sun Microsystems, Inc.
  31 *
  32 * libcfs/libcfs/workitem.c
  33 *
  34 * Author: Isaac Huang <isaac@clusterfs.com>
  35 *       Liang Zhen  <zhen.liang@sun.com>
  36 */
  37
  38#define DEBUG_SUBSYSTEM S_LNET
  39
  40#include "../../include/linux/libcfs/libcfs.h"
  41
  42#define CFS_WS_NAME_LEN  16
  43
  44struct cfs_wi_sched {
  45        /* chain on global list */
  46        struct list_head                ws_list;
  47        /** serialised workitems */
  48        spinlock_t                      ws_lock;
  49        /** where schedulers sleep */
  50        wait_queue_head_t               ws_waitq;
  51        /** concurrent workitems */
  52        struct list_head                ws_runq;
  53        /**
  54         * rescheduled running-workitems, a workitem can be rescheduled
  55         * while running in wi_action(), but we don't to execute it again
  56         * unless it returns from wi_action(), so we put it on ws_rerunq
  57         * while rescheduling, and move it to runq after it returns
  58         * from wi_action()
  59         */
  60        struct list_head                ws_rerunq;
  61        /** CPT-table for this scheduler */
  62        struct cfs_cpt_table            *ws_cptab;
  63        /** CPT id for affinity */
  64        int                             ws_cpt;
  65        /** number of scheduled workitems */
  66        int                             ws_nscheduled;
  67        /** started scheduler thread, protected by cfs_wi_data::wi_glock */
  68        unsigned int                    ws_nthreads:30;
  69        /** shutting down, protected by cfs_wi_data::wi_glock */
  70        unsigned int                    ws_stopping:1;
  71        /** serialize starting thread, protected by cfs_wi_data::wi_glock */
  72        unsigned int                    ws_starting:1;
  73        /** scheduler name */
  74        char                            ws_name[CFS_WS_NAME_LEN];
  75};
  76
  77static struct cfs_workitem_data {
  78        /** serialize */
  79        spinlock_t              wi_glock;
  80        /** list of all schedulers */
  81        struct list_head        wi_scheds;
  82        /** WI module is initialized */
  83        int                     wi_init;
  84        /** shutting down the whole WI module */
  85        int                     wi_stopping;
  86} cfs_wi_data;
  87
  88static inline int
  89cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
  90{
  91        spin_lock(&sched->ws_lock);
  92        if (sched->ws_stopping) {
  93                spin_unlock(&sched->ws_lock);
  94                return 0;
  95        }
  96
  97        if (!list_empty(&sched->ws_runq)) {
  98                spin_unlock(&sched->ws_lock);
  99                return 0;
 100        }
 101        spin_unlock(&sched->ws_lock);
 102        return 1;
 103}
 104
 105/* XXX:
 106 * 0. it only works when called from wi->wi_action.
 107 * 1. when it returns no one shall try to schedule the workitem.
 108 */
 109void
 110cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
 111{
 112        LASSERT(!in_interrupt()); /* because we use plain spinlock */
 113        LASSERT(!sched->ws_stopping);
 114
 115        spin_lock(&sched->ws_lock);
 116
 117        LASSERT(wi->wi_running);
 118        if (wi->wi_scheduled) { /* cancel pending schedules */
 119                LASSERT(!list_empty(&wi->wi_list));
 120                list_del_init(&wi->wi_list);
 121
 122                LASSERT(sched->ws_nscheduled > 0);
 123                sched->ws_nscheduled--;
 124        }
 125
 126        LASSERT(list_empty(&wi->wi_list));
 127
 128        wi->wi_scheduled = 1; /* LBUG future schedule attempts */
 129        spin_unlock(&sched->ws_lock);
 130}
 131EXPORT_SYMBOL(cfs_wi_exit);
 132
 133/**
 134 * cancel schedule request of workitem \a wi
 135 */
 136int
 137cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
 138{
 139        int rc;
 140
 141        LASSERT(!in_interrupt()); /* because we use plain spinlock */
 142        LASSERT(!sched->ws_stopping);
 143
 144        /*
 145         * return 0 if it's running already, otherwise return 1, which
 146         * means the workitem will not be scheduled and will not have
 147         * any race with wi_action.
 148         */
 149        spin_lock(&sched->ws_lock);
 150
 151        rc = !(wi->wi_running);
 152
 153        if (wi->wi_scheduled) { /* cancel pending schedules */
 154                LASSERT(!list_empty(&wi->wi_list));
 155                list_del_init(&wi->wi_list);
 156
 157                LASSERT(sched->ws_nscheduled > 0);
 158                sched->ws_nscheduled--;
 159
 160                wi->wi_scheduled = 0;
 161        }
 162
 163        LASSERT(list_empty(&wi->wi_list));
 164
 165        spin_unlock(&sched->ws_lock);
 166        return rc;
 167}
 168EXPORT_SYMBOL(cfs_wi_deschedule);
 169
 170/*
 171 * Workitem scheduled with (serial == 1) is strictly serialised not only with
 172 * itself, but also with others scheduled this way.
 173 *
 174 * Now there's only one static serialised queue, but in the future more might
 175 * be added, and even dynamic creation of serialised queues might be supported.
 176 */
 177void
 178cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
 179{
 180        LASSERT(!in_interrupt()); /* because we use plain spinlock */
 181        LASSERT(!sched->ws_stopping);
 182
 183        spin_lock(&sched->ws_lock);
 184
 185        if (!wi->wi_scheduled) {
 186                LASSERT(list_empty(&wi->wi_list));
 187
 188                wi->wi_scheduled = 1;
 189                sched->ws_nscheduled++;
 190                if (!wi->wi_running) {
 191                        list_add_tail(&wi->wi_list, &sched->ws_runq);
 192                        wake_up(&sched->ws_waitq);
 193                } else {
 194                        list_add(&wi->wi_list, &sched->ws_rerunq);
 195                }
 196        }
 197
 198        LASSERT(!list_empty(&wi->wi_list));
 199        spin_unlock(&sched->ws_lock);
 200}
 201EXPORT_SYMBOL(cfs_wi_schedule);
 202
 203static int cfs_wi_scheduler(void *arg)
 204{
 205        struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
 206
 207        cfs_block_allsigs();
 208
 209        /* CPT affinity scheduler? */
 210        if (sched->ws_cptab)
 211                if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt))
 212                        CWARN("Unable to bind %s on CPU partition %d\n",
 213                              sched->ws_name, sched->ws_cpt);
 214
 215        spin_lock(&cfs_wi_data.wi_glock);
 216
 217        LASSERT(sched->ws_starting == 1);
 218        sched->ws_starting--;
 219        sched->ws_nthreads++;
 220
 221        spin_unlock(&cfs_wi_data.wi_glock);
 222
 223        spin_lock(&sched->ws_lock);
 224
 225        while (!sched->ws_stopping) {
 226                int nloops = 0;
 227                int rc;
 228                struct cfs_workitem *wi;
 229
 230                while (!list_empty(&sched->ws_runq) &&
 231                       nloops < CFS_WI_RESCHED) {
 232                        wi = list_entry(sched->ws_runq.next,
 233                                        struct cfs_workitem, wi_list);
 234                        LASSERT(wi->wi_scheduled && !wi->wi_running);
 235
 236                        list_del_init(&wi->wi_list);
 237
 238                        LASSERT(sched->ws_nscheduled > 0);
 239                        sched->ws_nscheduled--;
 240
 241                        wi->wi_running = 1;
 242                        wi->wi_scheduled = 0;
 243
 244                        spin_unlock(&sched->ws_lock);
 245                        nloops++;
 246
 247                        rc = (*wi->wi_action)(wi);
 248
 249                        spin_lock(&sched->ws_lock);
 250                        if (rc) /* WI should be dead, even be freed! */
 251                                continue;
 252
 253                        wi->wi_running = 0;
 254                        if (list_empty(&wi->wi_list))
 255                                continue;
 256
 257                        LASSERT(wi->wi_scheduled);
 258                        /* wi is rescheduled, should be on rerunq now, we
 259                         * move it to runq so it can run action now
 260                         */
 261                        list_move_tail(&wi->wi_list, &sched->ws_runq);
 262                }
 263
 264                if (!list_empty(&sched->ws_runq)) {
 265                        spin_unlock(&sched->ws_lock);
 266                        /* don't sleep because some workitems still
 267                         * expect me to come back soon
 268                         */
 269                        cond_resched();
 270                        spin_lock(&sched->ws_lock);
 271                        continue;
 272                }
 273
 274                spin_unlock(&sched->ws_lock);
 275                rc = wait_event_interruptible_exclusive(sched->ws_waitq,
 276                                                        !cfs_wi_sched_cansleep(sched));
 277                spin_lock(&sched->ws_lock);
 278        }
 279
 280        spin_unlock(&sched->ws_lock);
 281
 282        spin_lock(&cfs_wi_data.wi_glock);
 283        sched->ws_nthreads--;
 284        spin_unlock(&cfs_wi_data.wi_glock);
 285
 286        return 0;
 287}
 288
 289void
 290cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
 291{
 292        int i;
 293
 294        LASSERT(cfs_wi_data.wi_init);
 295        LASSERT(!cfs_wi_data.wi_stopping);
 296
 297        spin_lock(&cfs_wi_data.wi_glock);
 298        if (sched->ws_stopping) {
 299                CDEBUG(D_INFO, "%s is in progress of stopping\n",
 300                       sched->ws_name);
 301                spin_unlock(&cfs_wi_data.wi_glock);
 302                return;
 303        }
 304
 305        LASSERT(!list_empty(&sched->ws_list));
 306        sched->ws_stopping = 1;
 307
 308        spin_unlock(&cfs_wi_data.wi_glock);
 309
 310        i = 2;
 311        wake_up_all(&sched->ws_waitq);
 312
 313        spin_lock(&cfs_wi_data.wi_glock);
 314        while (sched->ws_nthreads > 0) {
 315                CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
 316                       "waiting for %d threads of WI sched[%s] to terminate\n",
 317                       sched->ws_nthreads, sched->ws_name);
 318
 319                spin_unlock(&cfs_wi_data.wi_glock);
 320                set_current_state(TASK_UNINTERRUPTIBLE);
 321                schedule_timeout(cfs_time_seconds(1) / 20);
 322                spin_lock(&cfs_wi_data.wi_glock);
 323        }
 324
 325        list_del(&sched->ws_list);
 326
 327        spin_unlock(&cfs_wi_data.wi_glock);
 328        LASSERT(!sched->ws_nscheduled);
 329
 330        LIBCFS_FREE(sched, sizeof(*sched));
 331}
 332EXPORT_SYMBOL(cfs_wi_sched_destroy);
 333
 334int
 335cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
 336                    int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
 337{
 338        struct cfs_wi_sched *sched;
 339        int rc;
 340
 341        LASSERT(cfs_wi_data.wi_init);
 342        LASSERT(!cfs_wi_data.wi_stopping);
 343        LASSERT(!cptab || cpt == CFS_CPT_ANY ||
 344                (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
 345
 346        LIBCFS_ALLOC(sched, sizeof(*sched));
 347        if (!sched)
 348                return -ENOMEM;
 349
 350        if (strlen(name) > sizeof(sched->ws_name) - 1) {
 351                LIBCFS_FREE(sched, sizeof(*sched));
 352                return -E2BIG;
 353        }
 354        strncpy(sched->ws_name, name, sizeof(sched->ws_name));
 355
 356        sched->ws_cptab = cptab;
 357        sched->ws_cpt = cpt;
 358
 359        spin_lock_init(&sched->ws_lock);
 360        init_waitqueue_head(&sched->ws_waitq);
 361        INIT_LIST_HEAD(&sched->ws_runq);
 362        INIT_LIST_HEAD(&sched->ws_rerunq);
 363        INIT_LIST_HEAD(&sched->ws_list);
 364
 365        rc = 0;
 366        while (nthrs > 0)  {
 367                char name[16];
 368                struct task_struct *task;
 369
 370                spin_lock(&cfs_wi_data.wi_glock);
 371                while (sched->ws_starting > 0) {
 372                        spin_unlock(&cfs_wi_data.wi_glock);
 373                        schedule();
 374                        spin_lock(&cfs_wi_data.wi_glock);
 375                }
 376
 377                sched->ws_starting++;
 378                spin_unlock(&cfs_wi_data.wi_glock);
 379
 380                if (sched->ws_cptab && sched->ws_cpt >= 0) {
 381                        snprintf(name, sizeof(name), "%s_%02d_%02u",
 382                                 sched->ws_name, sched->ws_cpt,
 383                                 sched->ws_nthreads);
 384                } else {
 385                        snprintf(name, sizeof(name), "%s_%02u",
 386                                 sched->ws_name, sched->ws_nthreads);
 387                }
 388
 389                task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
 390                if (!IS_ERR(task)) {
 391                        nthrs--;
 392                        continue;
 393                }
 394                rc = PTR_ERR(task);
 395
 396                CERROR("Failed to create thread for WI scheduler %s: %d\n",
 397                       name, rc);
 398
 399                spin_lock(&cfs_wi_data.wi_glock);
 400
 401                /* make up for cfs_wi_sched_destroy */
 402                list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
 403                sched->ws_starting--;
 404
 405                spin_unlock(&cfs_wi_data.wi_glock);
 406
 407                cfs_wi_sched_destroy(sched);
 408                return rc;
 409        }
 410        spin_lock(&cfs_wi_data.wi_glock);
 411        list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
 412        spin_unlock(&cfs_wi_data.wi_glock);
 413
 414        *sched_pp = sched;
 415        return 0;
 416}
 417EXPORT_SYMBOL(cfs_wi_sched_create);
 418
 419int
 420cfs_wi_startup(void)
 421{
 422        memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
 423
 424        spin_lock_init(&cfs_wi_data.wi_glock);
 425        INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
 426        cfs_wi_data.wi_init = 1;
 427
 428        return 0;
 429}
 430
 431void
 432cfs_wi_shutdown(void)
 433{
 434        struct cfs_wi_sched *sched;
 435        struct cfs_wi_sched *temp;
 436
 437        spin_lock(&cfs_wi_data.wi_glock);
 438        cfs_wi_data.wi_stopping = 1;
 439        spin_unlock(&cfs_wi_data.wi_glock);
 440
 441        /* nobody should contend on this list */
 442        list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
 443                sched->ws_stopping = 1;
 444                wake_up_all(&sched->ws_waitq);
 445        }
 446
 447        list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
 448                spin_lock(&cfs_wi_data.wi_glock);
 449
 450                while (sched->ws_nthreads) {
 451                        spin_unlock(&cfs_wi_data.wi_glock);
 452                        set_current_state(TASK_UNINTERRUPTIBLE);
 453                        schedule_timeout(cfs_time_seconds(1) / 20);
 454                        spin_lock(&cfs_wi_data.wi_glock);
 455                }
 456                spin_unlock(&cfs_wi_data.wi_glock);
 457        }
 458        list_for_each_entry_safe(sched, temp, &cfs_wi_data.wi_scheds, ws_list) {
 459                list_del(&sched->ws_list);
 460                LIBCFS_FREE(sched, sizeof(*sched));
 461        }
 462
 463        cfs_wi_data.wi_stopping = 0;
 464        cfs_wi_data.wi_init = 0;
 465}
 466