linux/fs/ubifs/io.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation.
   5 * Copyright (C) 2006, 2007 University of Szeged, Hungary
   6 *
   7 * This program is free software; you can redistribute it and/or modify it
   8 * under the terms of the GNU General Public License version 2 as published by
   9 * the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope that it will be useful, but WITHOUT
  12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14 * more details.
  15 *
  16 * You should have received a copy of the GNU General Public License along with
  17 * this program; if not, write to the Free Software Foundation, Inc., 51
  18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19 *
  20 * Authors: Artem Bityutskiy (Битюцкий Артём)
  21 *          Adrian Hunter
  22 *          Zoltan Sogor
  23 */
  24
  25/*
  26 * This file implements UBIFS I/O subsystem which provides various I/O-related
  27 * helper functions (reading/writing/checking/validating nodes) and implements
  28 * write-buffering support. Write buffers help to save space which otherwise
  29 * would have been wasted for padding to the nearest minimal I/O unit boundary.
  30 * Instead, data first goes to the write-buffer and is flushed when the
  31 * buffer is full or when it is not used for some time (by timer). This is
  32 * similar to the mechanism is used by JFFS2.
  33 *
  34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
  35 * mutexes defined inside these objects. Since sometimes upper-level code
  36 * has to lock the write-buffer (e.g. journal space reservation code), many
  37 * functions related to write-buffers have "nolock" suffix which means that the
  38 * caller has to lock the write-buffer before calling this function.
  39 *
  40 * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not
  41 * aligned, UBIFS starts the next node from the aligned address, and the padded
  42 * bytes may contain any rubbish. In other words, UBIFS does not put padding
  43 * bytes in those small gaps. Common headers of nodes store real node lengths,
  44 * not aligned lengths. Indexing nodes also store real lengths in branches.
  45 *
  46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
  47 * uses padding nodes or padding bytes, if the padding node does not fit.
  48 *
  49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes
  50 * every time they are read from the flash media.
  51 */
  52
  53#include <linux/crc32.h>
  54#include "ubifs.h"
  55
  56/**
  57 * ubifs_ro_mode - switch UBIFS to read read-only mode.
  58 * @c: UBIFS file-system description object
  59 * @err: error code which is the reason of switching to R/O mode
  60 */
  61void ubifs_ro_mode(struct ubifs_info *c, int err)
  62{
  63        if (!c->ro_media) {
  64                c->ro_media = 1;
  65                c->no_chk_data_crc = 0;
  66                ubifs_warn("switched to read-only mode, error %d", err);
  67                dbg_dump_stack();
  68        }
  69}
  70
  71/**
  72 * ubifs_check_node - check node.
  73 * @c: UBIFS file-system description object
  74 * @buf: node to check
  75 * @lnum: logical eraseblock number
  76 * @offs: offset within the logical eraseblock
  77 * @quiet: print no messages
  78 * @must_chk_crc: indicates whether to always check the CRC
  79 *
  80 * This function checks node magic number and CRC checksum. This function also
  81 * validates node length to prevent UBIFS from becoming crazy when an attacker
  82 * feeds it a file-system image with incorrect nodes. For example, too large
  83 * node length in the common header could cause UBIFS to read memory outside of
  84 * allocated buffer when checking the CRC checksum.
  85 *
  86 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
  87 * true, which is controlled by corresponding UBIFS mount option. However, if
  88 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
  89 * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is
  90 * ignored and CRC is checked.
  91 *
  92 * This function returns zero in case of success and %-EUCLEAN in case of bad
  93 * CRC or magic.
  94 */
  95int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
  96                     int offs, int quiet, int must_chk_crc)
  97{
  98        int err = -EINVAL, type, node_len;
  99        uint32_t crc, node_crc, magic;
 100        const struct ubifs_ch *ch = buf;
 101
 102        ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
 103        ubifs_assert(!(offs & 7) && offs < c->leb_size);
 104
 105        magic = le32_to_cpu(ch->magic);
 106        if (magic != UBIFS_NODE_MAGIC) {
 107                if (!quiet)
 108                        ubifs_err("bad magic %#08x, expected %#08x",
 109                                  magic, UBIFS_NODE_MAGIC);
 110                err = -EUCLEAN;
 111                goto out;
 112        }
 113
 114        type = ch->node_type;
 115        if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
 116                if (!quiet)
 117                        ubifs_err("bad node type %d", type);
 118                goto out;
 119        }
 120
 121        node_len = le32_to_cpu(ch->len);
 122        if (node_len + offs > c->leb_size)
 123                goto out_len;
 124
 125        if (c->ranges[type].max_len == 0) {
 126                if (node_len != c->ranges[type].len)
 127                        goto out_len;
 128        } else if (node_len < c->ranges[type].min_len ||
 129                   node_len > c->ranges[type].max_len)
 130                goto out_len;
 131
 132        if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc &&
 133             c->no_chk_data_crc)
 134                return 0;
 135
 136        crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
 137        node_crc = le32_to_cpu(ch->crc);
 138        if (crc != node_crc) {
 139                if (!quiet)
 140                        ubifs_err("bad CRC: calculated %#08x, read %#08x",
 141                                  crc, node_crc);
 142                err = -EUCLEAN;
 143                goto out;
 144        }
 145
 146        return 0;
 147
 148out_len:
 149        if (!quiet)
 150                ubifs_err("bad node length %d", node_len);
 151out:
 152        if (!quiet) {
 153                ubifs_err("bad node at LEB %d:%d", lnum, offs);
 154                dbg_dump_node(c, buf);
 155                dbg_dump_stack();
 156        }
 157        return err;
 158}
 159
 160/**
 161 * ubifs_pad - pad flash space.
 162 * @c: UBIFS file-system description object
 163 * @buf: buffer to put padding to
 164 * @pad: how many bytes to pad
 165 *
 166 * The flash media obliges us to write only in chunks of %c->min_io_size and
 167 * when we have to write less data we add padding node to the write-buffer and
 168 * pad it to the next minimal I/O unit's boundary. Padding nodes help when the
 169 * media is being scanned. If the amount of wasted space is not enough to fit a
 170 * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes
 171 * pattern (%UBIFS_PADDING_BYTE).
 172 *
 173 * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is
 174 * used.
 175 */
 176void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
 177{
 178        uint32_t crc;
 179
 180        ubifs_assert(pad >= 0 && !(pad & 7));
 181
 182        if (pad >= UBIFS_PAD_NODE_SZ) {
 183                struct ubifs_ch *ch = buf;
 184                struct ubifs_pad_node *pad_node = buf;
 185
 186                ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
 187                ch->node_type = UBIFS_PAD_NODE;
 188                ch->group_type = UBIFS_NO_NODE_GROUP;
 189                ch->padding[0] = ch->padding[1] = 0;
 190                ch->sqnum = 0;
 191                ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ);
 192                pad -= UBIFS_PAD_NODE_SZ;
 193                pad_node->pad_len = cpu_to_le32(pad);
 194                crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8);
 195                ch->crc = cpu_to_le32(crc);
 196                memset(buf + UBIFS_PAD_NODE_SZ, 0, pad);
 197        } else if (pad > 0)
 198                /* Too little space, padding node won't fit */
 199                memset(buf, UBIFS_PADDING_BYTE, pad);
 200}
 201
 202/**
 203 * next_sqnum - get next sequence number.
 204 * @c: UBIFS file-system description object
 205 */
 206static unsigned long long next_sqnum(struct ubifs_info *c)
 207{
 208        unsigned long long sqnum;
 209
 210        spin_lock(&c->cnt_lock);
 211        sqnum = ++c->max_sqnum;
 212        spin_unlock(&c->cnt_lock);
 213
 214        if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
 215                if (sqnum >= SQNUM_WATERMARK) {
 216                        ubifs_err("sequence number overflow %llu, end of life",
 217                                  sqnum);
 218                        ubifs_ro_mode(c, -EINVAL);
 219                }
 220                ubifs_warn("running out of sequence numbers, end of life soon");
 221        }
 222
 223        return sqnum;
 224}
 225
 226/**
 227 * ubifs_prepare_node - prepare node to be written to flash.
 228 * @c: UBIFS file-system description object
 229 * @node: the node to pad
 230 * @len: node length
 231 * @pad: if the buffer has to be padded
 232 *
 233 * This function prepares node at @node to be written to the media - it
 234 * calculates node CRC, fills the common header, and adds proper padding up to
 235 * the next minimum I/O unit if @pad is not zero.
 236 */
 237void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
 238{
 239        uint32_t crc;
 240        struct ubifs_ch *ch = node;
 241        unsigned long long sqnum = next_sqnum(c);
 242
 243        ubifs_assert(len >= UBIFS_CH_SZ);
 244
 245        ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
 246        ch->len = cpu_to_le32(len);
 247        ch->group_type = UBIFS_NO_NODE_GROUP;
 248        ch->sqnum = cpu_to_le64(sqnum);
 249        ch->padding[0] = ch->padding[1] = 0;
 250        crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
 251        ch->crc = cpu_to_le32(crc);
 252
 253        if (pad) {
 254                len = ALIGN(len, 8);
 255                pad = ALIGN(len, c->min_io_size) - len;
 256                ubifs_pad(c, node + len, pad);
 257        }
 258}
 259
 260/**
 261 * ubifs_prep_grp_node - prepare node of a group to be written to flash.
 262 * @c: UBIFS file-system description object
 263 * @node: the node to pad
 264 * @len: node length
 265 * @last: indicates the last node of the group
 266 *
 267 * This function prepares node at @node to be written to the media - it
 268 * calculates node CRC and fills the common header.
 269 */
 270void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
 271{
 272        uint32_t crc;
 273        struct ubifs_ch *ch = node;
 274        unsigned long long sqnum = next_sqnum(c);
 275
 276        ubifs_assert(len >= UBIFS_CH_SZ);
 277
 278        ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
 279        ch->len = cpu_to_le32(len);
 280        if (last)
 281                ch->group_type = UBIFS_LAST_OF_NODE_GROUP;
 282        else
 283                ch->group_type = UBIFS_IN_NODE_GROUP;
 284        ch->sqnum = cpu_to_le64(sqnum);
 285        ch->padding[0] = ch->padding[1] = 0;
 286        crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
 287        ch->crc = cpu_to_le32(crc);
 288}
 289
 290/**
 291 * wbuf_timer_callback - write-buffer timer callback function.
 292 * @data: timer data (write-buffer descriptor)
 293 *
 294 * This function is called when the write-buffer timer expires.
 295 */
 296static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
 297{
 298        struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
 299
 300        dbg_io("jhead %s", dbg_jhead(wbuf->jhead));
 301        wbuf->need_sync = 1;
 302        wbuf->c->need_wbuf_sync = 1;
 303        ubifs_wake_up_bgt(wbuf->c);
 304        return HRTIMER_NORESTART;
 305}
 306
 307/**
 308 * new_wbuf_timer - start new write-buffer timer.
 309 * @wbuf: write-buffer descriptor
 310 */
 311static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
 312{
 313        ubifs_assert(!hrtimer_active(&wbuf->timer));
 314
 315        if (wbuf->no_timer)
 316                return;
 317        dbg_io("set timer for jhead %s, %llu-%llu millisecs",
 318               dbg_jhead(wbuf->jhead),
 319               div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
 320               div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
 321                       USEC_PER_SEC));
 322        hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
 323                               HRTIMER_MODE_REL);
 324}
 325
 326/**
 327 * cancel_wbuf_timer - cancel write-buffer timer.
 328 * @wbuf: write-buffer descriptor
 329 */
 330static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
 331{
 332        if (wbuf->no_timer)
 333                return;
 334        wbuf->need_sync = 0;
 335        hrtimer_cancel(&wbuf->timer);
 336}
 337
 338/**
 339 * ubifs_wbuf_sync_nolock - synchronize write-buffer.
 340 * @wbuf: write-buffer to synchronize
 341 *
 342 * This function synchronizes write-buffer @buf and returns zero in case of
 343 * success or a negative error code in case of failure.
 344 */
 345int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
 346{
 347        struct ubifs_info *c = wbuf->c;
 348        int err, dirt;
 349
 350        cancel_wbuf_timer_nolock(wbuf);
 351        if (!wbuf->used || wbuf->lnum == -1)
 352                /* Write-buffer is empty or not seeked */
 353                return 0;
 354
 355        dbg_io("LEB %d:%d, %d bytes, jhead %s",
 356               wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
 357        ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
 358        ubifs_assert(!(wbuf->avail & 7));
 359        ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
 360
 361        if (c->ro_media)
 362                return -EROFS;
 363
 364        ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
 365        err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
 366                            c->min_io_size, wbuf->dtype);
 367        if (err) {
 368                ubifs_err("cannot write %d bytes to LEB %d:%d",
 369                          c->min_io_size, wbuf->lnum, wbuf->offs);
 370                dbg_dump_stack();
 371                return err;
 372        }
 373
 374        dirt = wbuf->avail;
 375
 376        spin_lock(&wbuf->lock);
 377        wbuf->offs += c->min_io_size;
 378        wbuf->avail = c->min_io_size;
 379        wbuf->used = 0;
 380        wbuf->next_ino = 0;
 381        spin_unlock(&wbuf->lock);
 382
 383        if (wbuf->sync_callback)
 384                err = wbuf->sync_callback(c, wbuf->lnum,
 385                                          c->leb_size - wbuf->offs, dirt);
 386        return err;
 387}
 388
 389/**
 390 * ubifs_wbuf_seek_nolock - seek write-buffer.
 391 * @wbuf: write-buffer
 392 * @lnum: logical eraseblock number to seek to
 393 * @offs: logical eraseblock offset to seek to
 394 * @dtype: data type
 395 *
 396 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
 397 * The write-buffer is synchronized if it is not empty. Returns zero in case of
 398 * success and a negative error code in case of failure.
 399 */
 400int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
 401                           int dtype)
 402{
 403        const struct ubifs_info *c = wbuf->c;
 404
 405        dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
 406        ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
 407        ubifs_assert(offs >= 0 && offs <= c->leb_size);
 408        ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
 409        ubifs_assert(lnum != wbuf->lnum);
 410
 411        if (wbuf->used > 0) {
 412                int err = ubifs_wbuf_sync_nolock(wbuf);
 413
 414                if (err)
 415                        return err;
 416        }
 417
 418        spin_lock(&wbuf->lock);
 419        wbuf->lnum = lnum;
 420        wbuf->offs = offs;
 421        wbuf->avail = c->min_io_size;
 422        wbuf->used = 0;
 423        spin_unlock(&wbuf->lock);
 424        wbuf->dtype = dtype;
 425
 426        return 0;
 427}
 428
 429/**
 430 * ubifs_bg_wbufs_sync - synchronize write-buffers.
 431 * @c: UBIFS file-system description object
 432 *
 433 * This function is called by background thread to synchronize write-buffers.
 434 * Returns zero in case of success and a negative error code in case of
 435 * failure.
 436 */
 437int ubifs_bg_wbufs_sync(struct ubifs_info *c)
 438{
 439        int err, i;
 440
 441        if (!c->need_wbuf_sync)
 442                return 0;
 443        c->need_wbuf_sync = 0;
 444
 445        if (c->ro_media) {
 446                err = -EROFS;
 447                goto out_timers;
 448        }
 449
 450        dbg_io("synchronize");
 451        for (i = 0; i < c->jhead_cnt; i++) {
 452                struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
 453
 454                cond_resched();
 455
 456                /*
 457                 * If the mutex is locked then wbuf is being changed, so
 458                 * synchronization is not necessary.
 459                 */
 460                if (mutex_is_locked(&wbuf->io_mutex))
 461                        continue;
 462
 463                mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
 464                if (!wbuf->need_sync) {
 465                        mutex_unlock(&wbuf->io_mutex);
 466                        continue;
 467                }
 468
 469                err = ubifs_wbuf_sync_nolock(wbuf);
 470                mutex_unlock(&wbuf->io_mutex);
 471                if (err) {
 472                        ubifs_err("cannot sync write-buffer, error %d", err);
 473                        ubifs_ro_mode(c, err);
 474                        goto out_timers;
 475                }
 476        }
 477
 478        return 0;
 479
 480out_timers:
 481        /* Cancel all timers to prevent repeated errors */
 482        for (i = 0; i < c->jhead_cnt; i++) {
 483                struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
 484
 485                mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
 486                cancel_wbuf_timer_nolock(wbuf);
 487                mutex_unlock(&wbuf->io_mutex);
 488        }
 489        return err;
 490}
 491
 492/**
 493 * ubifs_wbuf_write_nolock - write data to flash via write-buffer.
 494 * @wbuf: write-buffer
 495 * @buf: node to write
 496 * @len: node length
 497 *
 498 * This function writes data to flash via write-buffer @wbuf. This means that
 499 * the last piece of the node won't reach the flash media immediately if it
 500 * does not take whole minimal I/O unit. Instead, the node will sit in RAM
 501 * until the write-buffer is synchronized (e.g., by timer).
 502 *
 503 * This function returns zero in case of success and a negative error code in
 504 * case of failure. If the node cannot be written because there is no more
 505 * space in this logical eraseblock, %-ENOSPC is returned.
 506 */
 507int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 508{
 509        struct ubifs_info *c = wbuf->c;
 510        int err, written, n, aligned_len = ALIGN(len, 8), offs;
 511
 512        dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
 513               dbg_ntype(((struct ubifs_ch *)buf)->node_type),
 514               dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
 515        ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
 516        ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
 517        ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
 518        ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
 519        ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
 520
 521        if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
 522                err = -ENOSPC;
 523                goto out;
 524        }
 525
 526        cancel_wbuf_timer_nolock(wbuf);
 527
 528        if (c->ro_media)
 529                return -EROFS;
 530
 531        if (aligned_len <= wbuf->avail) {
 532                /*
 533                 * The node is not very large and fits entirely within
 534                 * write-buffer.
 535                 */
 536                memcpy(wbuf->buf + wbuf->used, buf, len);
 537
 538                if (aligned_len == wbuf->avail) {
 539                        dbg_io("flush jhead %s wbuf to LEB %d:%d",
 540                               dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
 541                        err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
 542                                            wbuf->offs, c->min_io_size,
 543                                            wbuf->dtype);
 544                        if (err)
 545                                goto out;
 546
 547                        spin_lock(&wbuf->lock);
 548                        wbuf->offs += c->min_io_size;
 549                        wbuf->avail = c->min_io_size;
 550                        wbuf->used = 0;
 551                        wbuf->next_ino = 0;
 552                        spin_unlock(&wbuf->lock);
 553                } else {
 554                        spin_lock(&wbuf->lock);
 555                        wbuf->avail -= aligned_len;
 556                        wbuf->used += aligned_len;
 557                        spin_unlock(&wbuf->lock);
 558                }
 559
 560                goto exit;
 561        }
 562
 563        /*
 564         * The node is large enough and does not fit entirely within current
 565         * minimal I/O unit. We have to fill and flush write-buffer and switch
 566         * to the next min. I/O unit.
 567         */
 568        dbg_io("flush jhead %s wbuf to LEB %d:%d",
 569               dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
 570        memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
 571        err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
 572                            c->min_io_size, wbuf->dtype);
 573        if (err)
 574                goto out;
 575
 576        offs = wbuf->offs + c->min_io_size;
 577        len -= wbuf->avail;
 578        aligned_len -= wbuf->avail;
 579        written = wbuf->avail;
 580
 581        /*
 582         * The remaining data may take more whole min. I/O units, so write the
 583         * remains multiple to min. I/O unit size directly to the flash media.
 584         * We align node length to 8-byte boundary because we anyway flash wbuf
 585         * if the remaining space is less than 8 bytes.
 586         */
 587        n = aligned_len >> c->min_io_shift;
 588        if (n) {
 589                n <<= c->min_io_shift;
 590                dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
 591                err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
 592                                    wbuf->dtype);
 593                if (err)
 594                        goto out;
 595                offs += n;
 596                aligned_len -= n;
 597                len -= n;
 598                written += n;
 599        }
 600
 601        spin_lock(&wbuf->lock);
 602        if (aligned_len)
 603                /*
 604                 * And now we have what's left and what does not take whole
 605                 * min. I/O unit, so write it to the write-buffer and we are
 606                 * done.
 607                 */
 608                memcpy(wbuf->buf, buf + written, len);
 609
 610        wbuf->offs = offs;
 611        wbuf->used = aligned_len;
 612        wbuf->avail = c->min_io_size - aligned_len;
 613        wbuf->next_ino = 0;
 614        spin_unlock(&wbuf->lock);
 615
 616exit:
 617        if (wbuf->sync_callback) {
 618                int free = c->leb_size - wbuf->offs - wbuf->used;
 619
 620                err = wbuf->sync_callback(c, wbuf->lnum, free, 0);
 621                if (err)
 622                        goto out;
 623        }
 624
 625        if (wbuf->used)
 626                new_wbuf_timer_nolock(wbuf);
 627
 628        return 0;
 629
 630out:
 631        ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
 632                  len, wbuf->lnum, wbuf->offs, err);
 633        dbg_dump_node(c, buf);
 634        dbg_dump_stack();
 635        dbg_dump_leb(c, wbuf->lnum);
 636        return err;
 637}
 638
 639/**
 640 * ubifs_write_node - write node to the media.
 641 * @c: UBIFS file-system description object
 642 * @buf: the node to write
 643 * @len: node length
 644 * @lnum: logical eraseblock number
 645 * @offs: offset within the logical eraseblock
 646 * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
 647 *
 648 * This function automatically fills node magic number, assigns sequence
 649 * number, and calculates node CRC checksum. The length of the @buf buffer has
 650 * to be aligned to the minimal I/O unit size. This function automatically
 651 * appends padding node and padding bytes if needed. Returns zero in case of
 652 * success and a negative error code in case of failure.
 653 */
 654int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
 655                     int offs, int dtype)
 656{
 657        int err, buf_len = ALIGN(len, c->min_io_size);
 658
 659        dbg_io("LEB %d:%d, %s, length %d (aligned %d)",
 660               lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len,
 661               buf_len);
 662        ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
 663        ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
 664
 665        if (c->ro_media)
 666                return -EROFS;
 667
 668        ubifs_prepare_node(c, buf, len, 1);
 669        err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
 670        if (err) {
 671                ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
 672                          buf_len, lnum, offs, err);
 673                dbg_dump_node(c, buf);
 674                dbg_dump_stack();
 675        }
 676
 677        return err;
 678}
 679
 680/**
 681 * ubifs_read_node_wbuf - read node from the media or write-buffer.
 682 * @wbuf: wbuf to check for un-written data
 683 * @buf: buffer to read to
 684 * @type: node type
 685 * @len: node length
 686 * @lnum: logical eraseblock number
 687 * @offs: offset within the logical eraseblock
 688 *
 689 * This function reads a node of known type and length, checks it and stores
 690 * in @buf. If the node partially or fully sits in the write-buffer, this
 691 * function takes data from the buffer, otherwise it reads the flash media.
 692 * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative
 693 * error code in case of failure.
 694 */
 695int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 696                         int lnum, int offs)
 697{
 698        const struct ubifs_info *c = wbuf->c;
 699        int err, rlen, overlap;
 700        struct ubifs_ch *ch = buf;
 701
 702        dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
 703               dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
 704        ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
 705        ubifs_assert(!(offs & 7) && offs < c->leb_size);
 706        ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
 707
 708        spin_lock(&wbuf->lock);
 709        overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
 710        if (!overlap) {
 711                /* We may safely unlock the write-buffer and read the data */
 712                spin_unlock(&wbuf->lock);
 713                return ubifs_read_node(c, buf, type, len, lnum, offs);
 714        }
 715
 716        /* Don't read under wbuf */
 717        rlen = wbuf->offs - offs;
 718        if (rlen < 0)
 719                rlen = 0;
 720
 721        /* Copy the rest from the write-buffer */
 722        memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
 723        spin_unlock(&wbuf->lock);
 724
 725        if (rlen > 0) {
 726                /* Read everything that goes before write-buffer */
 727                err = ubi_read(c->ubi, lnum, buf, offs, rlen);
 728                if (err && err != -EBADMSG) {
 729                        ubifs_err("failed to read node %d from LEB %d:%d, "
 730                                  "error %d", type, lnum, offs, err);
 731                        dbg_dump_stack();
 732                        return err;
 733                }
 734        }
 735
 736        if (type != ch->node_type) {
 737                ubifs_err("bad node type (%d but expected %d)",
 738                          ch->node_type, type);
 739                goto out;
 740        }
 741
 742        err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
 743        if (err) {
 744                ubifs_err("expected node type %d", type);
 745                return err;
 746        }
 747
 748        rlen = le32_to_cpu(ch->len);
 749        if (rlen != len) {
 750                ubifs_err("bad node length %d, expected %d", rlen, len);
 751                goto out;
 752        }
 753
 754        return 0;
 755
 756out:
 757        ubifs_err("bad node at LEB %d:%d", lnum, offs);
 758        dbg_dump_node(c, buf);
 759        dbg_dump_stack();
 760        return -EINVAL;
 761}
 762
 763/**
 764 * ubifs_read_node - read node.
 765 * @c: UBIFS file-system description object
 766 * @buf: buffer to read to
 767 * @type: node type
 768 * @len: node length (not aligned)
 769 * @lnum: logical eraseblock number
 770 * @offs: offset within the logical eraseblock
 771 *
 772 * This function reads a node of known type and and length, checks it and
 773 * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched
 774 * and a negative error code in case of failure.
 775 */
 776int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
 777                    int lnum, int offs)
 778{
 779        int err, l;
 780        struct ubifs_ch *ch = buf;
 781
 782        dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
 783        ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
 784        ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size);
 785        ubifs_assert(!(offs & 7) && offs < c->leb_size);
 786        ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
 787
 788        err = ubi_read(c->ubi, lnum, buf, offs, len);
 789        if (err && err != -EBADMSG) {
 790                ubifs_err("cannot read node %d from LEB %d:%d, error %d",
 791                          type, lnum, offs, err);
 792                return err;
 793        }
 794
 795        if (type != ch->node_type) {
 796                ubifs_err("bad node type (%d but expected %d)",
 797                          ch->node_type, type);
 798                goto out;
 799        }
 800
 801        err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
 802        if (err) {
 803                ubifs_err("expected node type %d", type);
 804                return err;
 805        }
 806
 807        l = le32_to_cpu(ch->len);
 808        if (l != len) {
 809                ubifs_err("bad node length %d, expected %d", l, len);
 810                goto out;
 811        }
 812
 813        return 0;
 814
 815out:
 816        ubifs_err("bad node at LEB %d:%d", lnum, offs);
 817        dbg_dump_node(c, buf);
 818        dbg_dump_stack();
 819        return -EINVAL;
 820}
 821
 822/**
 823 * ubifs_wbuf_init - initialize write-buffer.
 824 * @c: UBIFS file-system description object
 825 * @wbuf: write-buffer to initialize
 826 *
 827 * This function initializes write-buffer. Returns zero in case of success
 828 * %-ENOMEM in case of failure.
 829 */
 830int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
 831{
 832        size_t size;
 833
 834        wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
 835        if (!wbuf->buf)
 836                return -ENOMEM;
 837
 838        size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
 839        wbuf->inodes = kmalloc(size, GFP_KERNEL);
 840        if (!wbuf->inodes) {
 841                kfree(wbuf->buf);
 842                wbuf->buf = NULL;
 843                return -ENOMEM;
 844        }
 845
 846        wbuf->used = 0;
 847        wbuf->lnum = wbuf->offs = -1;
 848        wbuf->avail = c->min_io_size;
 849        wbuf->dtype = UBI_UNKNOWN;
 850        wbuf->sync_callback = NULL;
 851        mutex_init(&wbuf->io_mutex);
 852        spin_lock_init(&wbuf->lock);
 853        wbuf->c = c;
 854        wbuf->next_ino = 0;
 855
 856        hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 857        wbuf->timer.function = wbuf_timer_callback_nolock;
 858        wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0);
 859        wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT;
 860        wbuf->delta *= 1000000000ULL;
 861        ubifs_assert(wbuf->delta <= ULONG_MAX);
 862        return 0;
 863}
 864
 865/**
 866 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
 867 * @wbuf: the write-buffer where to add
 868 * @inum: the inode number
 869 *
 870 * This function adds an inode number to the inode array of the write-buffer.
 871 */
 872void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum)
 873{
 874        if (!wbuf->buf)
 875                /* NOR flash or something similar */
 876                return;
 877
 878        spin_lock(&wbuf->lock);
 879        if (wbuf->used)
 880                wbuf->inodes[wbuf->next_ino++] = inum;
 881        spin_unlock(&wbuf->lock);
 882}
 883
 884/**
 885 * wbuf_has_ino - returns if the wbuf contains data from the inode.
 886 * @wbuf: the write-buffer
 887 * @inum: the inode number
 888 *
 889 * This function returns with %1 if the write-buffer contains some data from the
 890 * given inode otherwise it returns with %0.
 891 */
 892static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum)
 893{
 894        int i, ret = 0;
 895
 896        spin_lock(&wbuf->lock);
 897        for (i = 0; i < wbuf->next_ino; i++)
 898                if (inum == wbuf->inodes[i]) {
 899                        ret = 1;
 900                        break;
 901                }
 902        spin_unlock(&wbuf->lock);
 903
 904        return ret;
 905}
 906
 907/**
 908 * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode.
 909 * @c: UBIFS file-system description object
 910 * @inode: inode to synchronize
 911 *
 912 * This function synchronizes write-buffers which contain nodes belonging to
 913 * @inode. Returns zero in case of success and a negative error code in case of
 914 * failure.
 915 */
 916int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode)
 917{
 918        int i, err = 0;
 919
 920        for (i = 0; i < c->jhead_cnt; i++) {
 921                struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
 922
 923                if (i == GCHD)
 924                        /*
 925                         * GC head is special, do not look at it. Even if the
 926                         * head contains something related to this inode, it is
 927                         * a _copy_ of corresponding on-flash node which sits
 928                         * somewhere else.
 929                         */
 930                        continue;
 931
 932                if (!wbuf_has_ino(wbuf, inode->i_ino))
 933                        continue;
 934
 935                mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
 936                if (wbuf_has_ino(wbuf, inode->i_ino))
 937                        err = ubifs_wbuf_sync_nolock(wbuf);
 938                mutex_unlock(&wbuf->io_mutex);
 939
 940                if (err) {
 941                        ubifs_ro_mode(c, err);
 942                        return err;
 943                }
 944        }
 945        return 0;
 946}
 947