linux/drivers/staging/lustre/lustre/libcfs/workitem.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2011, 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * libcfs/libcfs/workitem.c
  37 *
  38 * Author: Isaac Huang <isaac@clusterfs.com>
  39 *       Liang Zhen  <zhen.liang@sun.com>
  40 */
  41
  42#define DEBUG_SUBSYSTEM S_LNET
  43
  44#include "../../include/linux/libcfs/libcfs.h"
  45
  46#define CFS_WS_NAME_LEN  16
  47
  48typedef struct cfs_wi_sched {
  49        struct list_head                ws_list;        /* chain on global list */
  50        /** serialised workitems */
  51        spinlock_t              ws_lock;
  52        /** where schedulers sleep */
  53        wait_queue_head_t               ws_waitq;
  54        /** concurrent workitems */
  55        struct list_head                ws_runq;
  56        /** rescheduled running-workitems, a workitem can be rescheduled
  57         * while running in wi_action(), but we don't to execute it again
  58         * unless it returns from wi_action(), so we put it on ws_rerunq
  59         * while rescheduling, and move it to runq after it returns
  60         * from wi_action() */
  61        struct list_head                ws_rerunq;
  62        /** CPT-table for this scheduler */
  63        struct cfs_cpt_table    *ws_cptab;
  64        /** CPT id for affinity */
  65        int                     ws_cpt;
  66        /** number of scheduled workitems */
  67        int                     ws_nscheduled;
  68        /** started scheduler thread, protected by cfs_wi_data::wi_glock */
  69        unsigned int            ws_nthreads:30;
  70        /** shutting down, protected by cfs_wi_data::wi_glock */
  71        unsigned int            ws_stopping:1;
  72        /** serialize starting thread, protected by cfs_wi_data::wi_glock */
  73        unsigned int            ws_starting:1;
  74        /** scheduler name */
  75        char                    ws_name[CFS_WS_NAME_LEN];
  76} cfs_wi_sched_t;
  77
  78static struct cfs_workitem_data {
  79        /** serialize */
  80        spinlock_t              wi_glock;
  81        /** list of all schedulers */
  82        struct list_head                wi_scheds;
  83        /** WI module is initialized */
  84        int                     wi_init;
  85        /** shutting down the whole WI module */
  86        int                     wi_stopping;
  87} cfs_wi_data;
  88
  89static inline void
  90cfs_wi_sched_lock(cfs_wi_sched_t *sched)
  91{
  92        spin_lock(&sched->ws_lock);
  93}
  94
  95static inline void
  96cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
  97{
  98        spin_unlock(&sched->ws_lock);
  99}
 100
 101static inline int
 102cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
 103{
 104        cfs_wi_sched_lock(sched);
 105        if (sched->ws_stopping) {
 106                cfs_wi_sched_unlock(sched);
 107                return 0;
 108        }
 109
 110        if (!list_empty(&sched->ws_runq)) {
 111                cfs_wi_sched_unlock(sched);
 112                return 0;
 113        }
 114        cfs_wi_sched_unlock(sched);
 115        return 1;
 116}
 117
 118
 119/* XXX:
 120 * 0. it only works when called from wi->wi_action.
 121 * 1. when it returns no one shall try to schedule the workitem.
 122 */
 123void
 124cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
 125{
 126        LASSERT(!in_interrupt()); /* because we use plain spinlock */
 127        LASSERT(!sched->ws_stopping);
 128
 129        cfs_wi_sched_lock(sched);
 130
 131        LASSERT(wi->wi_running);
 132        if (wi->wi_scheduled) { /* cancel pending schedules */
 133                LASSERT(!list_empty(&wi->wi_list));
 134                list_del_init(&wi->wi_list);
 135
 136                LASSERT(sched->ws_nscheduled > 0);
 137                sched->ws_nscheduled--;
 138        }
 139
 140        LASSERT(list_empty(&wi->wi_list));
 141
 142        wi->wi_scheduled = 1; /* LBUG future schedule attempts */
 143        cfs_wi_sched_unlock(sched);
 144
 145        return;
 146}
 147EXPORT_SYMBOL(cfs_wi_exit);
 148
 149/**
 150 * cancel schedule request of workitem \a wi
 151 */
 152int
 153cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
 154{
 155        int     rc;
 156
 157        LASSERT(!in_interrupt()); /* because we use plain spinlock */
 158        LASSERT(!sched->ws_stopping);
 159
 160        /*
 161         * return 0 if it's running already, otherwise return 1, which
 162         * means the workitem will not be scheduled and will not have
 163         * any race with wi_action.
 164         */
 165        cfs_wi_sched_lock(sched);
 166
 167        rc = !(wi->wi_running);
 168
 169        if (wi->wi_scheduled) { /* cancel pending schedules */
 170                LASSERT(!list_empty(&wi->wi_list));
 171                list_del_init(&wi->wi_list);
 172
 173                LASSERT(sched->ws_nscheduled > 0);
 174                sched->ws_nscheduled--;
 175
 176                wi->wi_scheduled = 0;
 177        }
 178
 179        LASSERT (list_empty(&wi->wi_list));
 180
 181        cfs_wi_sched_unlock(sched);
 182        return rc;
 183}
 184EXPORT_SYMBOL(cfs_wi_deschedule);
 185
 186/*
 187 * Workitem scheduled with (serial == 1) is strictly serialised not only with
 188 * itself, but also with others scheduled this way.
 189 *
 190 * Now there's only one static serialised queue, but in the future more might
 191 * be added, and even dynamic creation of serialised queues might be supported.
 192 */
 193void
 194cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
 195{
 196        LASSERT(!in_interrupt()); /* because we use plain spinlock */
 197        LASSERT(!sched->ws_stopping);
 198
 199        cfs_wi_sched_lock(sched);
 200
 201        if (!wi->wi_scheduled) {
 202                LASSERT (list_empty(&wi->wi_list));
 203
 204                wi->wi_scheduled = 1;
 205                sched->ws_nscheduled++;
 206                if (!wi->wi_running) {
 207                        list_add_tail(&wi->wi_list, &sched->ws_runq);
 208                        wake_up(&sched->ws_waitq);
 209                } else {
 210                        list_add(&wi->wi_list, &sched->ws_rerunq);
 211                }
 212        }
 213
 214        LASSERT (!list_empty(&wi->wi_list));
 215        cfs_wi_sched_unlock(sched);
 216        return;
 217}
 218EXPORT_SYMBOL(cfs_wi_schedule);
 219
 220
 221static int
 222cfs_wi_scheduler (void *arg)
 223{
 224        struct cfs_wi_sched     *sched = (cfs_wi_sched_t *)arg;
 225
 226        cfs_block_allsigs();
 227
 228        /* CPT affinity scheduler? */
 229        if (sched->ws_cptab != NULL)
 230                cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt);
 231
 232        spin_lock(&cfs_wi_data.wi_glock);
 233
 234        LASSERT(sched->ws_starting == 1);
 235        sched->ws_starting--;
 236        sched->ws_nthreads++;
 237
 238        spin_unlock(&cfs_wi_data.wi_glock);
 239
 240        cfs_wi_sched_lock(sched);
 241
 242        while (!sched->ws_stopping) {
 243                int          nloops = 0;
 244                int          rc;
 245                cfs_workitem_t *wi;
 246
 247                while (!list_empty(&sched->ws_runq) &&
 248                       nloops < CFS_WI_RESCHED) {
 249                        wi = list_entry(sched->ws_runq.next,
 250                                            cfs_workitem_t, wi_list);
 251                        LASSERT(wi->wi_scheduled && !wi->wi_running);
 252
 253                        list_del_init(&wi->wi_list);
 254
 255                        LASSERT(sched->ws_nscheduled > 0);
 256                        sched->ws_nscheduled--;
 257
 258                        wi->wi_running   = 1;
 259                        wi->wi_scheduled = 0;
 260
 261
 262                        cfs_wi_sched_unlock(sched);
 263                        nloops++;
 264
 265                        rc = (*wi->wi_action) (wi);
 266
 267                        cfs_wi_sched_lock(sched);
 268                        if (rc != 0) /* WI should be dead, even be freed! */
 269                                continue;
 270
 271                        wi->wi_running = 0;
 272                        if (list_empty(&wi->wi_list))
 273                                continue;
 274
 275                        LASSERT(wi->wi_scheduled);
 276                        /* wi is rescheduled, should be on rerunq now, we
 277                         * move it to runq so it can run action now */
 278                        list_move_tail(&wi->wi_list, &sched->ws_runq);
 279                }
 280
 281                if (!list_empty(&sched->ws_runq)) {
 282                        cfs_wi_sched_unlock(sched);
 283                        /* don't sleep because some workitems still
 284                         * expect me to come back soon */
 285                        cond_resched();
 286                        cfs_wi_sched_lock(sched);
 287                        continue;
 288                }
 289
 290                cfs_wi_sched_unlock(sched);
 291                cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
 292                                !cfs_wi_sched_cansleep(sched), rc);
 293                cfs_wi_sched_lock(sched);
 294        }
 295
 296        cfs_wi_sched_unlock(sched);
 297
 298        spin_lock(&cfs_wi_data.wi_glock);
 299        sched->ws_nthreads--;
 300        spin_unlock(&cfs_wi_data.wi_glock);
 301
 302        return 0;
 303}
 304
 305
 306void
 307cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
 308{
 309        int     i;
 310
 311        LASSERT(cfs_wi_data.wi_init);
 312        LASSERT(!cfs_wi_data.wi_stopping);
 313
 314        spin_lock(&cfs_wi_data.wi_glock);
 315        if (sched->ws_stopping) {
 316                CDEBUG(D_INFO, "%s is in progress of stopping\n",
 317                       sched->ws_name);
 318                spin_unlock(&cfs_wi_data.wi_glock);
 319                return;
 320        }
 321
 322        LASSERT(!list_empty(&sched->ws_list));
 323        sched->ws_stopping = 1;
 324
 325        spin_unlock(&cfs_wi_data.wi_glock);
 326
 327        i = 2;
 328        wake_up_all(&sched->ws_waitq);
 329
 330        spin_lock(&cfs_wi_data.wi_glock);
 331        while (sched->ws_nthreads > 0) {
 332                CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
 333                       "waiting for %d threads of WI sched[%s] to terminate\n",
 334                       sched->ws_nthreads, sched->ws_name);
 335
 336                spin_unlock(&cfs_wi_data.wi_glock);
 337                set_current_state(TASK_UNINTERRUPTIBLE);
 338                schedule_timeout(cfs_time_seconds(1) / 20);
 339                spin_lock(&cfs_wi_data.wi_glock);
 340        }
 341
 342        list_del(&sched->ws_list);
 343
 344        spin_unlock(&cfs_wi_data.wi_glock);
 345        LASSERT(sched->ws_nscheduled == 0);
 346
 347        LIBCFS_FREE(sched, sizeof(*sched));
 348}
 349EXPORT_SYMBOL(cfs_wi_sched_destroy);
 350
 351int
 352cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
 353                    int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
 354{
 355        struct cfs_wi_sched     *sched;
 356        int                     rc;
 357
 358        LASSERT(cfs_wi_data.wi_init);
 359        LASSERT(!cfs_wi_data.wi_stopping);
 360        LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
 361                (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
 362
 363        LIBCFS_ALLOC(sched, sizeof(*sched));
 364        if (sched == NULL)
 365                return -ENOMEM;
 366
 367        strncpy(sched->ws_name, name, CFS_WS_NAME_LEN);
 368        sched->ws_name[CFS_WS_NAME_LEN - 1] = '\0';
 369        sched->ws_cptab = cptab;
 370        sched->ws_cpt = cpt;
 371
 372        spin_lock_init(&sched->ws_lock);
 373        init_waitqueue_head(&sched->ws_waitq);
 374        INIT_LIST_HEAD(&sched->ws_runq);
 375        INIT_LIST_HEAD(&sched->ws_rerunq);
 376        INIT_LIST_HEAD(&sched->ws_list);
 377
 378        rc = 0;
 379        while (nthrs > 0)  {
 380                char    name[16];
 381                struct task_struct *task;
 382
 383                spin_lock(&cfs_wi_data.wi_glock);
 384                while (sched->ws_starting > 0) {
 385                        spin_unlock(&cfs_wi_data.wi_glock);
 386                        schedule();
 387                        spin_lock(&cfs_wi_data.wi_glock);
 388                }
 389
 390                sched->ws_starting++;
 391                spin_unlock(&cfs_wi_data.wi_glock);
 392
 393                if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
 394                        snprintf(name, sizeof(name), "%s_%02d_%02u",
 395                                 sched->ws_name, sched->ws_cpt,
 396                                 sched->ws_nthreads);
 397                } else {
 398                        snprintf(name, sizeof(name), "%s_%02u",
 399                                 sched->ws_name, sched->ws_nthreads);
 400                }
 401
 402                task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
 403                if (!IS_ERR(task)) {
 404                        nthrs--;
 405                        continue;
 406                }
 407                rc = PTR_ERR(task);
 408
 409                CERROR("Failed to create thread for WI scheduler %s: %d\n",
 410                       name, rc);
 411
 412                spin_lock(&cfs_wi_data.wi_glock);
 413
 414                /* make up for cfs_wi_sched_destroy */
 415                list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
 416                sched->ws_starting--;
 417
 418                spin_unlock(&cfs_wi_data.wi_glock);
 419
 420                cfs_wi_sched_destroy(sched);
 421                return rc;
 422        }
 423        spin_lock(&cfs_wi_data.wi_glock);
 424        list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
 425        spin_unlock(&cfs_wi_data.wi_glock);
 426
 427        *sched_pp = sched;
 428        return 0;
 429}
 430EXPORT_SYMBOL(cfs_wi_sched_create);
 431
 432int
 433cfs_wi_startup(void)
 434{
 435        memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
 436
 437        spin_lock_init(&cfs_wi_data.wi_glock);
 438        INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
 439        cfs_wi_data.wi_init = 1;
 440
 441        return 0;
 442}
 443
 444void
 445cfs_wi_shutdown (void)
 446{
 447        struct cfs_wi_sched     *sched;
 448
 449        spin_lock(&cfs_wi_data.wi_glock);
 450        cfs_wi_data.wi_stopping = 1;
 451        spin_unlock(&cfs_wi_data.wi_glock);
 452
 453        /* nobody should contend on this list */
 454        list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
 455                sched->ws_stopping = 1;
 456                wake_up_all(&sched->ws_waitq);
 457        }
 458
 459        list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
 460                spin_lock(&cfs_wi_data.wi_glock);
 461
 462                while (sched->ws_nthreads != 0) {
 463                        spin_unlock(&cfs_wi_data.wi_glock);
 464                        set_current_state(TASK_UNINTERRUPTIBLE);
 465                        schedule_timeout(cfs_time_seconds(1) / 20);
 466                        spin_lock(&cfs_wi_data.wi_glock);
 467                }
 468                spin_unlock(&cfs_wi_data.wi_glock);
 469        }
 470        while (!list_empty(&cfs_wi_data.wi_scheds)) {
 471                sched = list_entry(cfs_wi_data.wi_scheds.next,
 472                                       struct cfs_wi_sched, ws_list);
 473                list_del(&sched->ws_list);
 474                LIBCFS_FREE(sched, sizeof(*sched));
 475        }
 476
 477        cfs_wi_data.wi_stopping = 0;
 478        cfs_wi_data.wi_init = 0;
 479}
 480