linux/fs/jffs2/gc.c
<<
>>
Prefs
   1/*
   2 * JFFS2 -- Journalling Flash File System, Version 2.
   3 *
   4 * Copyright © 2001-2007 Red Hat, Inc.
   5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
   6 *
   7 * Created by David Woodhouse <dwmw2@infradead.org>
   8 *
   9 * For licensing information, see the file 'LICENCE' in this directory.
  10 *
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/kernel.h>
  16#include <linux/mtd/mtd.h>
  17#include <linux/slab.h>
  18#include <linux/pagemap.h>
  19#include <linux/crc32.h>
  20#include <linux/compiler.h>
  21#include <linux/stat.h>
  22#include "nodelist.h"
  23#include "compr.h"
  24
  25static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
  26                                          struct jffs2_inode_cache *ic,
  27                                          struct jffs2_raw_node_ref *raw);
  28static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  29                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
  30static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  31                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  32static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  33                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  34static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  35                                      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  36                                      uint32_t start, uint32_t end);
  37static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  38                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  39                                       uint32_t start, uint32_t end);
  40static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
  41                               struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
  42
  43/* Called with erase_completion_lock held */
  44static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
  45{
  46        struct jffs2_eraseblock *ret;
  47        struct list_head *nextlist = NULL;
  48        int n = jiffies % 128;
  49
  50        /* Pick an eraseblock to garbage collect next. This is where we'll
  51           put the clever wear-levelling algorithms. Eventually.  */
  52        /* We possibly want to favour the dirtier blocks more when the
  53           number of free blocks is low. */
  54again:
  55        if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
  56                jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
  57                nextlist = &c->bad_used_list;
  58        } else if (n < 50 && !list_empty(&c->erasable_list)) {
  59                /* Note that most of them will have gone directly to be erased.
  60                   So don't favour the erasable_list _too_ much. */
  61                jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
  62                nextlist = &c->erasable_list;
  63        } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
  64                /* Most of the time, pick one off the very_dirty list */
  65                jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
  66                nextlist = &c->very_dirty_list;
  67        } else if (n < 126 && !list_empty(&c->dirty_list)) {
  68                jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
  69                nextlist = &c->dirty_list;
  70        } else if (!list_empty(&c->clean_list)) {
  71                jffs2_dbg(1, "Picking block from clean_list to GC next\n");
  72                nextlist = &c->clean_list;
  73        } else if (!list_empty(&c->dirty_list)) {
  74                jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
  75
  76                nextlist = &c->dirty_list;
  77        } else if (!list_empty(&c->very_dirty_list)) {
  78                jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
  79                nextlist = &c->very_dirty_list;
  80        } else if (!list_empty(&c->erasable_list)) {
  81                jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
  82
  83                nextlist = &c->erasable_list;
  84        } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
  85                /* There are blocks are wating for the wbuf sync */
  86                jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
  87                spin_unlock(&c->erase_completion_lock);
  88                jffs2_flush_wbuf_pad(c);
  89                spin_lock(&c->erase_completion_lock);
  90                goto again;
  91        } else {
  92                /* Eep. All were empty */
  93                jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
  94                return NULL;
  95        }
  96
  97        ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
  98        list_del(&ret->list);
  99        c->gcblock = ret;
 100        ret->gc_node = ret->first_node;
 101        if (!ret->gc_node) {
 102                pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
 103                        ret->offset);
 104                BUG();
 105        }
 106
 107        /* Have we accidentally picked a clean block with wasted space ? */
 108        if (ret->wasted_size) {
 109                jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
 110                          ret->wasted_size);
 111                ret->dirty_size += ret->wasted_size;
 112                c->wasted_size -= ret->wasted_size;
 113                c->dirty_size += ret->wasted_size;
 114                ret->wasted_size = 0;
 115        }
 116
 117        return ret;
 118}
 119
 120/* jffs2_garbage_collect_pass
 121 * Make a single attempt to progress GC. Move one node, and possibly
 122 * start erasing one eraseblock.
 123 */
 124int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 125{
 126        struct jffs2_inode_info *f;
 127        struct jffs2_inode_cache *ic;
 128        struct jffs2_eraseblock *jeb;
 129        struct jffs2_raw_node_ref *raw;
 130        uint32_t gcblock_dirty;
 131        int ret = 0, inum, nlink;
 132        int xattr = 0;
 133
 134        if (mutex_lock_interruptible(&c->alloc_sem))
 135                return -EINTR;
 136
 137
 138        for (;;) {
 139                /* We can't start doing GC until we've finished checking
 140                   the node CRCs etc. */
 141                int bucket, want_ino;
 142
 143                spin_lock(&c->erase_completion_lock);
 144                if (!c->unchecked_size)
 145                        break;
 146                spin_unlock(&c->erase_completion_lock);
 147
 148                if (!xattr)
 149                        xattr = jffs2_verify_xattr(c);
 150
 151                spin_lock(&c->inocache_lock);
 152                /* Instead of doing the inodes in numeric order, doing a lookup
 153                 * in the hash for each possible number, just walk the hash
 154                 * buckets of *existing* inodes. This means that we process
 155                 * them out-of-order, but it can be a lot faster if there's
 156                 * a sparse inode# space. Which there often is. */
 157                want_ino = c->check_ino;
 158                for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) {
 159                        for (ic = c->inocache_list[bucket]; ic; ic = ic->next) {
 160                                if (ic->ino < want_ino)
 161                                        continue;
 162
 163                                if (ic->state != INO_STATE_CHECKEDABSENT &&
 164                                    ic->state != INO_STATE_PRESENT)
 165                                        goto got_next; /* with inocache_lock held */
 166
 167                                jffs2_dbg(1, "Skipping ino #%u already checked\n",
 168                                          ic->ino);
 169                        }
 170                        want_ino = 0;
 171                }
 172
 173                /* Point c->check_ino past the end of the last bucket. */
 174                c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) &
 175                                ~c->inocache_hashsize) - 1;
 176
 177                spin_unlock(&c->inocache_lock);
 178
 179                pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
 180                        c->unchecked_size);
 181                jffs2_dbg_dump_block_lists_nolock(c);
 182                mutex_unlock(&c->alloc_sem);
 183                return -ENOSPC;
 184
 185        got_next:
 186                /* For next time round the loop, we want c->checked_ino to indicate
 187                 * the *next* one we want to check. And since we're walking the
 188                 * buckets rather than doing it sequentially, it's: */
 189                c->check_ino = ic->ino + c->inocache_hashsize;
 190
 191                if (!ic->pino_nlink) {
 192                        jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
 193                                  ic->ino);
 194                        spin_unlock(&c->inocache_lock);
 195                        jffs2_xattr_delete_inode(c, ic);
 196                        continue;
 197                }
 198                switch(ic->state) {
 199                case INO_STATE_CHECKEDABSENT:
 200                case INO_STATE_PRESENT:
 201                        spin_unlock(&c->inocache_lock);
 202                        continue;
 203
 204                case INO_STATE_GC:
 205                case INO_STATE_CHECKING:
 206                        pr_warn("Inode #%u is in state %d during CRC check phase!\n",
 207                                ic->ino, ic->state);
 208                        spin_unlock(&c->inocache_lock);
 209                        BUG();
 210
 211                case INO_STATE_READING:
 212                        /* We need to wait for it to finish, lest we move on
 213                           and trigger the BUG() above while we haven't yet
 214                           finished checking all its nodes */
 215                        jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
 216                                  ic->ino);
 217                        /* We need to come back again for the _same_ inode. We've
 218                         made no progress in this case, but that should be OK */
 219                        c->check_ino = ic->ino;
 220
 221                        mutex_unlock(&c->alloc_sem);
 222                        sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 223                        return 0;
 224
 225                default:
 226                        BUG();
 227
 228                case INO_STATE_UNCHECKED:
 229                        ;
 230                }
 231                ic->state = INO_STATE_CHECKING;
 232                spin_unlock(&c->inocache_lock);
 233
 234                jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
 235                          __func__, ic->ino);
 236
 237                ret = jffs2_do_crccheck_inode(c, ic);
 238                if (ret)
 239                        pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
 240                                ic->ino);
 241
 242                jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
 243                mutex_unlock(&c->alloc_sem);
 244                return ret;
 245        }
 246
 247        /* If there are any blocks which need erasing, erase them now */
 248        if (!list_empty(&c->erase_complete_list) ||
 249            !list_empty(&c->erase_pending_list)) {
 250                spin_unlock(&c->erase_completion_lock);
 251                mutex_unlock(&c->alloc_sem);
 252                jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
 253                if (jffs2_erase_pending_blocks(c, 1))
 254                        return 0;
 255
 256                jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
 257                mutex_lock(&c->alloc_sem);
 258                spin_lock(&c->erase_completion_lock);
 259        }
 260
 261        /* First, work out which block we're garbage-collecting */
 262        jeb = c->gcblock;
 263
 264        if (!jeb)
 265                jeb = jffs2_find_gc_block(c);
 266
 267        if (!jeb) {
 268                /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
 269                if (c->nr_erasing_blocks) {
 270                        spin_unlock(&c->erase_completion_lock);
 271                        mutex_unlock(&c->alloc_sem);
 272                        return -EAGAIN;
 273                }
 274                jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
 275                spin_unlock(&c->erase_completion_lock);
 276                mutex_unlock(&c->alloc_sem);
 277                return -EIO;
 278        }
 279
 280        jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
 281                  jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
 282        D1(if (c->nextblock)
 283           printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
 284
 285        if (!jeb->used_size) {
 286                mutex_unlock(&c->alloc_sem);
 287                goto eraseit;
 288        }
 289
 290        raw = jeb->gc_node;
 291        gcblock_dirty = jeb->dirty_size;
 292
 293        while(ref_obsolete(raw)) {
 294                jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
 295                          ref_offset(raw));
 296                raw = ref_next(raw);
 297                if (unlikely(!raw)) {
 298                        pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
 299                        pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
 300                                jeb->offset, jeb->free_size,
 301                                jeb->dirty_size, jeb->used_size);
 302                        jeb->gc_node = raw;
 303                        spin_unlock(&c->erase_completion_lock);
 304                        mutex_unlock(&c->alloc_sem);
 305                        BUG();
 306                }
 307        }
 308        jeb->gc_node = raw;
 309
 310        jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
 311                  ref_offset(raw));
 312
 313        if (!raw->next_in_ino) {
 314                /* Inode-less node. Clean marker, snapshot or something like that */
 315                spin_unlock(&c->erase_completion_lock);
 316                if (ref_flags(raw) == REF_PRISTINE) {
 317                        /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
 318                        jffs2_garbage_collect_pristine(c, NULL, raw);
 319                } else {
 320                        /* Just mark it obsolete */
 321                        jffs2_mark_node_obsolete(c, raw);
 322                }
 323                mutex_unlock(&c->alloc_sem);
 324                goto eraseit_lock;
 325        }
 326
 327        ic = jffs2_raw_ref_to_ic(raw);
 328
 329#ifdef CONFIG_JFFS2_FS_XATTR
 330        /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
 331         * We can decide whether this node is inode or xattr by ic->class.     */
 332        if (ic->class == RAWNODE_CLASS_XATTR_DATUM
 333            || ic->class == RAWNODE_CLASS_XATTR_REF) {
 334                spin_unlock(&c->erase_completion_lock);
 335
 336                if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
 337                        ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
 338                } else {
 339                        ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
 340                }
 341                goto test_gcnode;
 342        }
 343#endif
 344
 345        /* We need to hold the inocache. Either the erase_completion_lock or
 346           the inocache_lock are sufficient; we trade down since the inocache_lock
 347           causes less contention. */
 348        spin_lock(&c->inocache_lock);
 349
 350        spin_unlock(&c->erase_completion_lock);
 351
 352        jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
 353                  __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
 354                  ic->ino);
 355
 356        /* Three possibilities:
 357           1. Inode is already in-core. We must iget it and do proper
 358              updating to its fragtree, etc.
 359           2. Inode is not in-core, node is REF_PRISTINE. We lock the
 360              inocache to prevent a read_inode(), copy the node intact.
 361           3. Inode is not in-core, node is not pristine. We must iget()
 362              and take the slow path.
 363        */
 364
 365        switch(ic->state) {
 366        case INO_STATE_CHECKEDABSENT:
 367                /* It's been checked, but it's not currently in-core.
 368                   We can just copy any pristine nodes, but have
 369                   to prevent anyone else from doing read_inode() while
 370                   we're at it, so we set the state accordingly */
 371                if (ref_flags(raw) == REF_PRISTINE)
 372                        ic->state = INO_STATE_GC;
 373                else {
 374                        jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
 375                                  ic->ino);
 376                }
 377                break;
 378
 379        case INO_STATE_PRESENT:
 380                /* It's in-core. GC must iget() it. */
 381                break;
 382
 383        case INO_STATE_UNCHECKED:
 384        case INO_STATE_CHECKING:
 385        case INO_STATE_GC:
 386                /* Should never happen. We should have finished checking
 387                   by the time we actually start doing any GC, and since
 388                   we're holding the alloc_sem, no other garbage collection
 389                   can happen.
 390                */
 391                pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
 392                        ic->ino, ic->state);
 393                mutex_unlock(&c->alloc_sem);
 394                spin_unlock(&c->inocache_lock);
 395                BUG();
 396
 397        case INO_STATE_READING:
 398                /* Someone's currently trying to read it. We must wait for
 399                   them to finish and then go through the full iget() route
 400                   to do the GC. However, sometimes read_inode() needs to get
 401                   the alloc_sem() (for marking nodes invalid) so we must
 402                   drop the alloc_sem before sleeping. */
 403
 404                mutex_unlock(&c->alloc_sem);
 405                jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
 406                          __func__, ic->ino, ic->state);
 407                sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 408                /* And because we dropped the alloc_sem we must start again from the
 409                   beginning. Ponder chance of livelock here -- we're returning success
 410                   without actually making any progress.
 411
 412                   Q: What are the chances that the inode is back in INO_STATE_READING
 413                   again by the time we next enter this function? And that this happens
 414                   enough times to cause a real delay?
 415
 416                   A: Small enough that I don't care :)
 417                */
 418                return 0;
 419        }
 420
 421        /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
 422           node intact, and we don't have to muck about with the fragtree etc.
 423           because we know it's not in-core. If it _was_ in-core, we go through
 424           all the iget() crap anyway */
 425
 426        if (ic->state == INO_STATE_GC) {
 427                spin_unlock(&c->inocache_lock);
 428
 429                ret = jffs2_garbage_collect_pristine(c, ic, raw);
 430
 431                spin_lock(&c->inocache_lock);
 432                ic->state = INO_STATE_CHECKEDABSENT;
 433                wake_up(&c->inocache_wq);
 434
 435                if (ret != -EBADFD) {
 436                        spin_unlock(&c->inocache_lock);
 437                        goto test_gcnode;
 438                }
 439
 440                /* Fall through if it wanted us to, with inocache_lock held */
 441        }
 442
 443        /* Prevent the fairly unlikely race where the gcblock is
 444           entirely obsoleted by the final close of a file which had
 445           the only valid nodes in the block, followed by erasure,
 446           followed by freeing of the ic because the erased block(s)
 447           held _all_ the nodes of that inode.... never been seen but
 448           it's vaguely possible. */
 449
 450        inum = ic->ino;
 451        nlink = ic->pino_nlink;
 452        spin_unlock(&c->inocache_lock);
 453
 454        f = jffs2_gc_fetch_inode(c, inum, !nlink);
 455        if (IS_ERR(f)) {
 456                ret = PTR_ERR(f);
 457                goto release_sem;
 458        }
 459        if (!f) {
 460                ret = 0;
 461                goto release_sem;
 462        }
 463
 464        ret = jffs2_garbage_collect_live(c, jeb, raw, f);
 465
 466        jffs2_gc_release_inode(c, f);
 467
 468 test_gcnode:
 469        if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
 470                /* Eep. This really should never happen. GC is broken */
 471                pr_err("Error garbage collecting node at %08x!\n",
 472                       ref_offset(jeb->gc_node));
 473                ret = -ENOSPC;
 474        }
 475 release_sem:
 476        mutex_unlock(&c->alloc_sem);
 477
 478 eraseit_lock:
 479        /* If we've finished this block, start it erasing */
 480        spin_lock(&c->erase_completion_lock);
 481
 482 eraseit:
 483        if (c->gcblock && !c->gcblock->used_size) {
 484                jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
 485                          c->gcblock->offset);
 486                /* We're GC'ing an empty block? */
 487                list_add_tail(&c->gcblock->list, &c->erase_pending_list);
 488                c->gcblock = NULL;
 489                c->nr_erasing_blocks++;
 490                jffs2_garbage_collect_trigger(c);
 491        }
 492        spin_unlock(&c->erase_completion_lock);
 493
 494        return ret;
 495}
 496
 497static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
 498                                      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
 499{
 500        struct jffs2_node_frag *frag;
 501        struct jffs2_full_dnode *fn = NULL;
 502        struct jffs2_full_dirent *fd;
 503        uint32_t start = 0, end = 0, nrfrags = 0;
 504        int ret = 0;
 505
 506        mutex_lock(&f->sem);
 507
 508        /* Now we have the lock for this inode. Check that it's still the one at the head
 509           of the list. */
 510
 511        spin_lock(&c->erase_completion_lock);
 512
 513        if (c->gcblock != jeb) {
 514                spin_unlock(&c->erase_completion_lock);
 515                jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
 516                goto upnout;
 517        }
 518        if (ref_obsolete(raw)) {
 519                spin_unlock(&c->erase_completion_lock);
 520                jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
 521                /* They'll call again */
 522                goto upnout;
 523        }
 524        spin_unlock(&c->erase_completion_lock);
 525
 526        /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
 527        if (f->metadata && f->metadata->raw == raw) {
 528                fn = f->metadata;
 529                ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
 530                goto upnout;
 531        }
 532
 533        /* FIXME. Read node and do lookup? */
 534        for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
 535                if (frag->node && frag->node->raw == raw) {
 536                        fn = frag->node;
 537                        end = frag->ofs + frag->size;
 538                        if (!nrfrags++)
 539                                start = frag->ofs;
 540                        if (nrfrags == frag->node->frags)
 541                                break; /* We've found them all */
 542                }
 543        }
 544        if (fn) {
 545                if (ref_flags(raw) == REF_PRISTINE) {
 546                        ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
 547                        if (!ret) {
 548                                /* Urgh. Return it sensibly. */
 549                                frag->node->raw = f->inocache->nodes;
 550                        }
 551                        if (ret != -EBADFD)
 552                                goto upnout;
 553                }
 554                /* We found a datanode. Do the GC */
 555                if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) {
 556                        /* It crosses a page boundary. Therefore, it must be a hole. */
 557                        ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 558                } else {
 559                        /* It could still be a hole. But we GC the page this way anyway */
 560                        ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
 561                }
 562                goto upnout;
 563        }
 564
 565        /* Wasn't a dnode. Try dirent */
 566        for (fd = f->dents; fd; fd=fd->next) {
 567                if (fd->raw == raw)
 568                        break;
 569        }
 570
 571        if (fd && fd->ino) {
 572                ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
 573        } else if (fd) {
 574                ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
 575        } else {
 576                pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
 577                        ref_offset(raw), f->inocache->ino);
 578                if (ref_obsolete(raw)) {
 579                        pr_warn("But it's obsolete so we don't mind too much\n");
 580                } else {
 581                        jffs2_dbg_dump_node(c, ref_offset(raw));
 582                        BUG();
 583                }
 584        }
 585 upnout:
 586        mutex_unlock(&f->sem);
 587
 588        return ret;
 589}
 590
 591static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
 592                                          struct jffs2_inode_cache *ic,
 593                                          struct jffs2_raw_node_ref *raw)
 594{
 595        union jffs2_node_union *node;
 596        size_t retlen;
 597        int ret;
 598        uint32_t phys_ofs, alloclen;
 599        uint32_t crc, rawlen;
 600        int retried = 0;
 601
 602        jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
 603                  ref_offset(raw));
 604
 605        alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
 606
 607        /* Ask for a small amount of space (or the totlen if smaller) because we
 608           don't want to force wastage of the end of a block if splitting would
 609           work. */
 610        if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
 611                alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
 612
 613        ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
 614        /* 'rawlen' is not the exact summary size; it is only an upper estimation */
 615
 616        if (ret)
 617                return ret;
 618
 619        if (alloclen < rawlen) {
 620                /* Doesn't fit untouched. We'll go the old route and split it */
 621                return -EBADFD;
 622        }
 623
 624        node = kmalloc(rawlen, GFP_KERNEL);
 625        if (!node)
 626                return -ENOMEM;
 627
 628        ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
 629        if (!ret && retlen != rawlen)
 630                ret = -EIO;
 631        if (ret)
 632                goto out_node;
 633
 634        crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
 635        if (je32_to_cpu(node->u.hdr_crc) != crc) {
 636                pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 637                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
 638                goto bail;
 639        }
 640
 641        switch(je16_to_cpu(node->u.nodetype)) {
 642        case JFFS2_NODETYPE_INODE:
 643                crc = crc32(0, node, sizeof(node->i)-8);
 644                if (je32_to_cpu(node->i.node_crc) != crc) {
 645                        pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 646                                ref_offset(raw), je32_to_cpu(node->i.node_crc),
 647                                crc);
 648                        goto bail;
 649                }
 650
 651                if (je32_to_cpu(node->i.dsize)) {
 652                        crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
 653                        if (je32_to_cpu(node->i.data_crc) != crc) {
 654                                pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 655                                        ref_offset(raw),
 656                                        je32_to_cpu(node->i.data_crc), crc);
 657                                goto bail;
 658                        }
 659                }
 660                break;
 661
 662        case JFFS2_NODETYPE_DIRENT:
 663                crc = crc32(0, node, sizeof(node->d)-8);
 664                if (je32_to_cpu(node->d.node_crc) != crc) {
 665                        pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 666                                ref_offset(raw),
 667                                je32_to_cpu(node->d.node_crc), crc);
 668                        goto bail;
 669                }
 670
 671                if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
 672                        pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
 673                                ref_offset(raw));
 674                        goto bail;
 675                }
 676
 677                if (node->d.nsize) {
 678                        crc = crc32(0, node->d.name, node->d.nsize);
 679                        if (je32_to_cpu(node->d.name_crc) != crc) {
 680                                pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 681                                        ref_offset(raw),
 682                                        je32_to_cpu(node->d.name_crc), crc);
 683                                goto bail;
 684                        }
 685                }
 686                break;
 687        default:
 688                /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
 689                if (ic) {
 690                        pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
 691                                ref_offset(raw), je16_to_cpu(node->u.nodetype));
 692                        goto bail;
 693                }
 694        }
 695
 696        /* OK, all the CRCs are good; this node can just be copied as-is. */
 697 retry:
 698        phys_ofs = write_ofs(c);
 699
 700        ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
 701
 702        if (ret || (retlen != rawlen)) {
 703                pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
 704                          rawlen, phys_ofs, ret, retlen);
 705                if (retlen) {
 706                        jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
 707                } else {
 708                        pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
 709                                  phys_ofs);
 710                }
 711                if (!retried) {
 712                        /* Try to reallocate space and retry */
 713                        uint32_t dummy;
 714                        struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
 715
 716                        retried = 1;
 717
 718                        jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
 719
 720                        jffs2_dbg_acct_sanity_check(c,jeb);
 721                        jffs2_dbg_acct_paranoia_check(c, jeb);
 722
 723                        ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
 724                                                /* this is not the exact summary size of it,
 725                                                        it is only an upper estimation */
 726
 727                        if (!ret) {
 728                                jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
 729                                          phys_ofs);
 730
 731                                jffs2_dbg_acct_sanity_check(c,jeb);
 732                                jffs2_dbg_acct_paranoia_check(c, jeb);
 733
 734                                goto retry;
 735                        }
 736                        jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
 737                                  ret);
 738                }
 739
 740                if (!ret)
 741                        ret = -EIO;
 742                goto out_node;
 743        }
 744        jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
 745
 746        jffs2_mark_node_obsolete(c, raw);
 747        jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
 748                  ref_offset(raw));
 749
 750 out_node:
 751        kfree(node);
 752        return ret;
 753 bail:
 754        ret = -EBADFD;
 755        goto out_node;
 756}
 757
 758static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 759                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 760{
 761        struct jffs2_full_dnode *new_fn;
 762        struct jffs2_raw_inode ri;
 763        struct jffs2_node_frag *last_frag;
 764        union jffs2_device_node dev;
 765        char *mdata = NULL;
 766        int mdatalen = 0;
 767        uint32_t alloclen, ilen;
 768        int ret;
 769
 770        if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
 771            S_ISCHR(JFFS2_F_I_MODE(f)) ) {
 772                /* For these, we don't actually need to read the old node */
 773                mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
 774                mdata = (char *)&dev;
 775                jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
 776                          __func__, mdatalen);
 777        } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
 778                mdatalen = fn->size;
 779                mdata = kmalloc(fn->size, GFP_KERNEL);
 780                if (!mdata) {
 781                        pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
 782                        return -ENOMEM;
 783                }
 784                ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
 785                if (ret) {
 786                        pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
 787                                ret);
 788                        kfree(mdata);
 789                        return ret;
 790                }
 791                jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
 792                          __func__, mdatalen);
 793
 794        }
 795
 796        ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
 797                                JFFS2_SUMMARY_INODE_SIZE);
 798        if (ret) {
 799                pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
 800                        sizeof(ri) + mdatalen, ret);
 801                goto out;
 802        }
 803
 804        last_frag = frag_last(&f->fragtree);
 805        if (last_frag)
 806                /* Fetch the inode length from the fragtree rather then
 807                 * from i_size since i_size may have not been updated yet */
 808                ilen = last_frag->ofs + last_frag->size;
 809        else
 810                ilen = JFFS2_F_I_SIZE(f);
 811
 812        memset(&ri, 0, sizeof(ri));
 813        ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 814        ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
 815        ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
 816        ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
 817
 818        ri.ino = cpu_to_je32(f->inocache->ino);
 819        ri.version = cpu_to_je32(++f->highest_version);
 820        ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
 821        ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
 822        ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
 823        ri.isize = cpu_to_je32(ilen);
 824        ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
 825        ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
 826        ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 827        ri.offset = cpu_to_je32(0);
 828        ri.csize = cpu_to_je32(mdatalen);
 829        ri.dsize = cpu_to_je32(mdatalen);
 830        ri.compr = JFFS2_COMPR_NONE;
 831        ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
 832        ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
 833
 834        new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
 835
 836        if (IS_ERR(new_fn)) {
 837                pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
 838                ret = PTR_ERR(new_fn);
 839                goto out;
 840        }
 841        jffs2_mark_node_obsolete(c, fn->raw);
 842        jffs2_free_full_dnode(fn);
 843        f->metadata = new_fn;
 844 out:
 845        if (S_ISLNK(JFFS2_F_I_MODE(f)))
 846                kfree(mdata);
 847        return ret;
 848}
 849
 850static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 851                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 852{
 853        struct jffs2_full_dirent *new_fd;
 854        struct jffs2_raw_dirent rd;
 855        uint32_t alloclen;
 856        int ret;
 857
 858        rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 859        rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
 860        rd.nsize = strlen(fd->name);
 861        rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
 862        rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
 863
 864        rd.pino = cpu_to_je32(f->inocache->ino);
 865        rd.version = cpu_to_je32(++f->highest_version);
 866        rd.ino = cpu_to_je32(fd->ino);
 867        /* If the times on this inode were set by explicit utime() they can be different,
 868           so refrain from splatting them. */
 869        if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
 870                rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 871        else
 872                rd.mctime = cpu_to_je32(0);
 873        rd.type = fd->type;
 874        rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
 875        rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
 876
 877        ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
 878                                JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
 879        if (ret) {
 880                pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
 881                        sizeof(rd)+rd.nsize, ret);
 882                return ret;
 883        }
 884        new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
 885
 886        if (IS_ERR(new_fd)) {
 887                pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
 888                        PTR_ERR(new_fd));
 889                return PTR_ERR(new_fd);
 890        }
 891        jffs2_add_fd_to_list(c, new_fd, &f->dents);
 892        return 0;
 893}
 894
 895static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 896                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 897{
 898        struct jffs2_full_dirent **fdp = &f->dents;
 899        int found = 0;
 900
 901        /* On a medium where we can't actually mark nodes obsolete
 902           pernamently, such as NAND flash, we need to work out
 903           whether this deletion dirent is still needed to actively
 904           delete a 'real' dirent with the same name that's still
 905           somewhere else on the flash. */
 906        if (!jffs2_can_mark_obsolete(c)) {
 907                struct jffs2_raw_dirent *rd;
 908                struct jffs2_raw_node_ref *raw;
 909                int ret;
 910                size_t retlen;
 911                int name_len = strlen(fd->name);
 912                uint32_t name_crc = crc32(0, fd->name, name_len);
 913                uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
 914
 915                rd = kmalloc(rawlen, GFP_KERNEL);
 916                if (!rd)
 917                        return -ENOMEM;
 918
 919                /* Prevent the erase code from nicking the obsolete node refs while
 920                   we're looking at them. I really don't like this extra lock but
 921                   can't see any alternative. Suggestions on a postcard to... */
 922                mutex_lock(&c->erase_free_sem);
 923
 924                for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
 925
 926                        cond_resched();
 927
 928                        /* We only care about obsolete ones */
 929                        if (!(ref_obsolete(raw)))
 930                                continue;
 931
 932                        /* Any dirent with the same name is going to have the same length... */
 933                        if (ref_totlen(c, NULL, raw) != rawlen)
 934                                continue;
 935
 936                        /* Doesn't matter if there's one in the same erase block. We're going to
 937                           delete it too at the same time. */
 938                        if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
 939                                continue;
 940
 941                        jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
 942                                  ref_offset(raw));
 943
 944                        /* This is an obsolete node belonging to the same directory, and it's of the right
 945                           length. We need to take a closer look...*/
 946                        ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
 947                        if (ret) {
 948                                pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
 949                                        __func__, ret, ref_offset(raw));
 950                                /* If we can't read it, we don't need to continue to obsolete it. Continue */
 951                                continue;
 952                        }
 953                        if (retlen != rawlen) {
 954                                pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
 955                                        __func__, retlen, rawlen,
 956                                        ref_offset(raw));
 957                                continue;
 958                        }
 959
 960                        if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
 961                                continue;
 962
 963                        /* If the name CRC doesn't match, skip */
 964                        if (je32_to_cpu(rd->name_crc) != name_crc)
 965                                continue;
 966
 967                        /* If the name length doesn't match, or it's another deletion dirent, skip */
 968                        if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
 969                                continue;
 970
 971                        /* OK, check the actual name now */
 972                        if (memcmp(rd->name, fd->name, name_len))
 973                                continue;
 974
 975                        /* OK. The name really does match. There really is still an older node on
 976                           the flash which our deletion dirent obsoletes. So we have to write out
 977                           a new deletion dirent to replace it */
 978                        mutex_unlock(&c->erase_free_sem);
 979
 980                        jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
 981                                  ref_offset(fd->raw), fd->name,
 982                                  ref_offset(raw), je32_to_cpu(rd->ino));
 983                        kfree(rd);
 984
 985                        return jffs2_garbage_collect_dirent(c, jeb, f, fd);
 986                }
 987
 988                mutex_unlock(&c->erase_free_sem);
 989                kfree(rd);
 990        }
 991
 992        /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
 993           we should update the metadata node with those times accordingly */
 994
 995        /* No need for it any more. Just mark it obsolete and remove it from the list */
 996        while (*fdp) {
 997                if ((*fdp) == fd) {
 998                        found = 1;
 999                        *fdp = fd->next;
1000                        break;
1001                }
1002                fdp = &(*fdp)->next;
1003        }
1004        if (!found) {
1005                pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
1006                        fd->name, f->inocache->ino);
1007        }
1008        jffs2_mark_node_obsolete(c, fd->raw);
1009        jffs2_free_full_dirent(fd);
1010        return 0;
1011}
1012
1013static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1014                                      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1015                                      uint32_t start, uint32_t end)
1016{
1017        struct jffs2_raw_inode ri;
1018        struct jffs2_node_frag *frag;
1019        struct jffs2_full_dnode *new_fn;
1020        uint32_t alloclen, ilen;
1021        int ret;
1022
1023        jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1024                  f->inocache->ino, start, end);
1025
1026        memset(&ri, 0, sizeof(ri));
1027
1028        if(fn->frags > 1) {
1029                size_t readlen;
1030                uint32_t crc;
1031                /* It's partially obsoleted by a later write. So we have to
1032                   write it out again with the _same_ version as before */
1033                ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1034                if (readlen != sizeof(ri) || ret) {
1035                        pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1036                                ret, readlen);
1037                        goto fill;
1038                }
1039                if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1040                        pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1041                                __func__, ref_offset(fn->raw),
1042                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1043                        return -EIO;
1044                }
1045                if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1046                        pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1047                                __func__, ref_offset(fn->raw),
1048                                je32_to_cpu(ri.totlen), sizeof(ri));
1049                        return -EIO;
1050                }
1051                crc = crc32(0, &ri, sizeof(ri)-8);
1052                if (crc != je32_to_cpu(ri.node_crc)) {
1053                        pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1054                                __func__, ref_offset(fn->raw),
1055                                je32_to_cpu(ri.node_crc), crc);
1056                        /* FIXME: We could possibly deal with this by writing new holes for each frag */
1057                        pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1058                                start, end, f->inocache->ino);
1059                        goto fill;
1060                }
1061                if (ri.compr != JFFS2_COMPR_ZERO) {
1062                        pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1063                                __func__, ref_offset(fn->raw));
1064                        pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1065                                start, end, f->inocache->ino);
1066                        goto fill;
1067                }
1068        } else {
1069        fill:
1070                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1071                ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1072                ri.totlen = cpu_to_je32(sizeof(ri));
1073                ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1074
1075                ri.ino = cpu_to_je32(f->inocache->ino);
1076                ri.version = cpu_to_je32(++f->highest_version);
1077                ri.offset = cpu_to_je32(start);
1078                ri.dsize = cpu_to_je32(end - start);
1079                ri.csize = cpu_to_je32(0);
1080                ri.compr = JFFS2_COMPR_ZERO;
1081        }
1082
1083        frag = frag_last(&f->fragtree);
1084        if (frag)
1085                /* Fetch the inode length from the fragtree rather then
1086                 * from i_size since i_size may have not been updated yet */
1087                ilen = frag->ofs + frag->size;
1088        else
1089                ilen = JFFS2_F_I_SIZE(f);
1090
1091        ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1092        ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1093        ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1094        ri.isize = cpu_to_je32(ilen);
1095        ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1096        ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1097        ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1098        ri.data_crc = cpu_to_je32(0);
1099        ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1100
1101        ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1102                                     JFFS2_SUMMARY_INODE_SIZE);
1103        if (ret) {
1104                pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1105                        sizeof(ri), ret);
1106                return ret;
1107        }
1108        new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1109
1110        if (IS_ERR(new_fn)) {
1111                pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1112                return PTR_ERR(new_fn);
1113        }
1114        if (je32_to_cpu(ri.version) == f->highest_version) {
1115                jffs2_add_full_dnode_to_inode(c, f, new_fn);
1116                if (f->metadata) {
1117                        jffs2_mark_node_obsolete(c, f->metadata->raw);
1118                        jffs2_free_full_dnode(f->metadata);
1119                        f->metadata = NULL;
1120                }
1121                return 0;
1122        }
1123
1124        /*
1125         * We should only get here in the case where the node we are
1126         * replacing had more than one frag, so we kept the same version
1127         * number as before. (Except in case of error -- see 'goto fill;'
1128         * above.)
1129         */
1130        D1(if(unlikely(fn->frags <= 1)) {
1131                        pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1132                                __func__, fn->frags, je32_to_cpu(ri.version),
1133                                f->highest_version, je32_to_cpu(ri.ino));
1134        });
1135
1136        /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1137        mark_ref_normal(new_fn->raw);
1138
1139        for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1140             frag; frag = frag_next(frag)) {
1141                if (frag->ofs > fn->size + fn->ofs)
1142                        break;
1143                if (frag->node == fn) {
1144                        frag->node = new_fn;
1145                        new_fn->frags++;
1146                        fn->frags--;
1147                }
1148        }
1149        if (fn->frags) {
1150                pr_warn("%s(): Old node still has frags!\n", __func__);
1151                BUG();
1152        }
1153        if (!new_fn->frags) {
1154                pr_warn("%s(): New node has no frags!\n", __func__);
1155                BUG();
1156        }
1157
1158        jffs2_mark_node_obsolete(c, fn->raw);
1159        jffs2_free_full_dnode(fn);
1160
1161        return 0;
1162}
1163
1164static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1165                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1166                                       uint32_t start, uint32_t end)
1167{
1168        struct jffs2_full_dnode *new_fn;
1169        struct jffs2_raw_inode ri;
1170        uint32_t alloclen, offset, orig_end, orig_start;
1171        int ret = 0;
1172        unsigned char *comprbuf = NULL, *writebuf;
1173        unsigned long pg;
1174        unsigned char *pg_ptr;
1175
1176        memset(&ri, 0, sizeof(ri));
1177
1178        jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1179                  f->inocache->ino, start, end);
1180
1181        orig_end = end;
1182        orig_start = start;
1183
1184        if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1185                /* Attempt to do some merging. But only expand to cover logically
1186                   adjacent frags if the block containing them is already considered
1187                   to be dirty. Otherwise we end up with GC just going round in
1188                   circles dirtying the nodes it already wrote out, especially
1189                   on NAND where we have small eraseblocks and hence a much higher
1190                   chance of nodes having to be split to cross boundaries. */
1191
1192                struct jffs2_node_frag *frag;
1193                uint32_t min, max;
1194
1195                min = start & ~(PAGE_SIZE-1);
1196                max = min + PAGE_SIZE;
1197
1198                frag = jffs2_lookup_node_frag(&f->fragtree, start);
1199
1200                /* BUG_ON(!frag) but that'll happen anyway... */
1201
1202                BUG_ON(frag->ofs != start);
1203
1204                /* First grow down... */
1205                while((frag = frag_prev(frag)) && frag->ofs >= min) {
1206
1207                        /* If the previous frag doesn't even reach the beginning, there's
1208                           excessive fragmentation. Just merge. */
1209                        if (frag->ofs > min) {
1210                                jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1211                                          frag->ofs, frag->ofs+frag->size);
1212                                start = frag->ofs;
1213                                continue;
1214                        }
1215                        /* OK. This frag holds the first byte of the page. */
1216                        if (!frag->node || !frag->node->raw) {
1217                                jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1218                                          frag->ofs, frag->ofs+frag->size);
1219                                break;
1220                        } else {
1221
1222                                /* OK, it's a frag which extends to the beginning of the page. Does it live
1223                                   in a block which is still considered clean? If so, don't obsolete it.
1224                                   If not, cover it anyway. */
1225
1226                                struct jffs2_raw_node_ref *raw = frag->node->raw;
1227                                struct jffs2_eraseblock *jeb;
1228
1229                                jeb = &c->blocks[raw->flash_offset / c->sector_size];
1230
1231                                if (jeb == c->gcblock) {
1232                                        jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1233                                                  frag->ofs,
1234                                                  frag->ofs + frag->size,
1235                                                  ref_offset(raw));
1236                                        start = frag->ofs;
1237                                        break;
1238                                }
1239                                if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1240                                        jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1241                                                  frag->ofs,
1242                                                  frag->ofs + frag->size,
1243                                                  jeb->offset);
1244                                        break;
1245                                }
1246
1247                                jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1248                                          frag->ofs,
1249                                          frag->ofs + frag->size,
1250                                          jeb->offset);
1251                                start = frag->ofs;
1252                                break;
1253                        }
1254                }
1255
1256                /* ... then up */
1257
1258                /* Find last frag which is actually part of the node we're to GC. */
1259                frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1260
1261                while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1262
1263                        /* If the previous frag doesn't even reach the beginning, there's lots
1264                           of fragmentation. Just merge. */
1265                        if (frag->ofs+frag->size < max) {
1266                                jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1267                                          frag->ofs, frag->ofs+frag->size);
1268                                end = frag->ofs + frag->size;
1269                                continue;
1270                        }
1271
1272                        if (!frag->node || !frag->node->raw) {
1273                                jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1274                                          frag->ofs, frag->ofs+frag->size);
1275                                break;
1276                        } else {
1277
1278                                /* OK, it's a frag which extends to the beginning of the page. Does it live
1279                                   in a block which is still considered clean? If so, don't obsolete it.
1280                                   If not, cover it anyway. */
1281
1282                                struct jffs2_raw_node_ref *raw = frag->node->raw;
1283                                struct jffs2_eraseblock *jeb;
1284
1285                                jeb = &c->blocks[raw->flash_offset / c->sector_size];
1286
1287                                if (jeb == c->gcblock) {
1288                                        jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1289                                                  frag->ofs,
1290                                                  frag->ofs + frag->size,
1291                                                  ref_offset(raw));
1292                                        end = frag->ofs + frag->size;
1293                                        break;
1294                                }
1295                                if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1296                                        jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1297                                                  frag->ofs,
1298                                                  frag->ofs + frag->size,
1299                                                  jeb->offset);
1300                                        break;
1301                                }
1302
1303                                jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1304                                          frag->ofs,
1305                                          frag->ofs + frag->size,
1306                                          jeb->offset);
1307                                end = frag->ofs + frag->size;
1308                                break;
1309                        }
1310                }
1311                jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1312                          orig_start, orig_end, start, end);
1313
1314                D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1315                BUG_ON(end < orig_end);
1316                BUG_ON(start > orig_start);
1317        }
1318
1319        /* The rules state that we must obtain the page lock *before* f->sem, so
1320         * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
1321         * actually going to *change* so we're safe; we only allow reading.
1322         *
1323         * It is important to note that jffs2_write_begin() will ensure that its
1324         * page is marked Uptodate before allocating space. That means that if we
1325         * end up here trying to GC the *same* page that jffs2_write_begin() is
1326         * trying to write out, read_cache_page() will not deadlock. */
1327        mutex_unlock(&f->sem);
1328        pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1329        mutex_lock(&f->sem);
1330
1331        if (IS_ERR(pg_ptr)) {
1332                pr_warn("read_cache_page() returned error: %ld\n",
1333                        PTR_ERR(pg_ptr));
1334                return PTR_ERR(pg_ptr);
1335        }
1336
1337        offset = start;
1338        while(offset < orig_end) {
1339                uint32_t datalen;
1340                uint32_t cdatalen;
1341                uint16_t comprtype = JFFS2_COMPR_NONE;
1342
1343                ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1344                                        &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1345
1346                if (ret) {
1347                        pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1348                                sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1349                        break;
1350                }
1351                cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1352                datalen = end - offset;
1353
1354                writebuf = pg_ptr + (offset & (PAGE_SIZE -1));
1355
1356                comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1357
1358                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1359                ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1360                ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1361                ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1362
1363                ri.ino = cpu_to_je32(f->inocache->ino);
1364                ri.version = cpu_to_je32(++f->highest_version);
1365                ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1366                ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1367                ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1368                ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1369                ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1370                ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1371                ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1372                ri.offset = cpu_to_je32(offset);
1373                ri.csize = cpu_to_je32(cdatalen);
1374                ri.dsize = cpu_to_je32(datalen);
1375                ri.compr = comprtype & 0xff;
1376                ri.usercompr = (comprtype >> 8) & 0xff;
1377                ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1378                ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1379
1380                new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1381
1382                jffs2_free_comprbuf(comprbuf, writebuf);
1383
1384                if (IS_ERR(new_fn)) {
1385                        pr_warn("Error writing new dnode: %ld\n",
1386                                PTR_ERR(new_fn));
1387                        ret = PTR_ERR(new_fn);
1388                        break;
1389                }
1390                ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1391                offset += datalen;
1392                if (f->metadata) {
1393                        jffs2_mark_node_obsolete(c, f->metadata->raw);
1394                        jffs2_free_full_dnode(f->metadata);
1395                        f->metadata = NULL;
1396                }
1397        }
1398
1399        jffs2_gc_release_page(c, pg_ptr, &pg);
1400        return ret;
1401}
1402