linux/fs/jffs2/gc.c
<<
>>
Prefs
   1/*
   2 * JFFS2 -- Journalling Flash File System, Version 2.
   3 *
   4 * Copyright © 2001-2007 Red Hat, Inc.
   5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
   6 *
   7 * Created by David Woodhouse <dwmw2@infradead.org>
   8 *
   9 * For licensing information, see the file 'LICENCE' in this directory.
  10 *
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/kernel.h>
  16#include <linux/mtd/mtd.h>
  17#include <linux/slab.h>
  18#include <linux/pagemap.h>
  19#include <linux/crc32.h>
  20#include <linux/compiler.h>
  21#include <linux/stat.h>
  22#include "nodelist.h"
  23#include "compr.h"
  24
  25static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
  26                                          struct jffs2_inode_cache *ic,
  27                                          struct jffs2_raw_node_ref *raw);
  28static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  29                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
  30static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  31                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  32static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  33                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  34static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  35                                      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  36                                      uint32_t start, uint32_t end);
  37static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  38                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  39                                       uint32_t start, uint32_t end);
  40static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
  41                               struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
  42
  43/* Called with erase_completion_lock held */
  44static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
  45{
  46        struct jffs2_eraseblock *ret;
  47        struct list_head *nextlist = NULL;
  48        int n = jiffies % 128;
  49
  50        /* Pick an eraseblock to garbage collect next. This is where we'll
  51           put the clever wear-levelling algorithms. Eventually.  */
  52        /* We possibly want to favour the dirtier blocks more when the
  53           number of free blocks is low. */
  54again:
  55        if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
  56                jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
  57                nextlist = &c->bad_used_list;
  58        } else if (n < 50 && !list_empty(&c->erasable_list)) {
  59                /* Note that most of them will have gone directly to be erased.
  60                   So don't favour the erasable_list _too_ much. */
  61                jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
  62                nextlist = &c->erasable_list;
  63        } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
  64                /* Most of the time, pick one off the very_dirty list */
  65                jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
  66                nextlist = &c->very_dirty_list;
  67        } else if (n < 126 && !list_empty(&c->dirty_list)) {
  68                jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
  69                nextlist = &c->dirty_list;
  70        } else if (!list_empty(&c->clean_list)) {
  71                jffs2_dbg(1, "Picking block from clean_list to GC next\n");
  72                nextlist = &c->clean_list;
  73        } else if (!list_empty(&c->dirty_list)) {
  74                jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
  75
  76                nextlist = &c->dirty_list;
  77        } else if (!list_empty(&c->very_dirty_list)) {
  78                jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
  79                nextlist = &c->very_dirty_list;
  80        } else if (!list_empty(&c->erasable_list)) {
  81                jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
  82
  83                nextlist = &c->erasable_list;
  84        } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
  85                /* There are blocks are wating for the wbuf sync */
  86                jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
  87                spin_unlock(&c->erase_completion_lock);
  88                jffs2_flush_wbuf_pad(c);
  89                spin_lock(&c->erase_completion_lock);
  90                goto again;
  91        } else {
  92                /* Eep. All were empty */
  93                jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
  94                return NULL;
  95        }
  96
  97        ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
  98        list_del(&ret->list);
  99        c->gcblock = ret;
 100        ret->gc_node = ret->first_node;
 101        if (!ret->gc_node) {
 102                pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
 103                        ret->offset);
 104                BUG();
 105        }
 106
 107        /* Have we accidentally picked a clean block with wasted space ? */
 108        if (ret->wasted_size) {
 109                jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
 110                          ret->wasted_size);
 111                ret->dirty_size += ret->wasted_size;
 112                c->wasted_size -= ret->wasted_size;
 113                c->dirty_size += ret->wasted_size;
 114                ret->wasted_size = 0;
 115        }
 116
 117        return ret;
 118}
 119
 120/* jffs2_garbage_collect_pass
 121 * Make a single attempt to progress GC. Move one node, and possibly
 122 * start erasing one eraseblock.
 123 */
 124int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 125{
 126        struct jffs2_inode_info *f;
 127        struct jffs2_inode_cache *ic;
 128        struct jffs2_eraseblock *jeb;
 129        struct jffs2_raw_node_ref *raw;
 130        uint32_t gcblock_dirty;
 131        int ret = 0, inum, nlink;
 132        int xattr = 0;
 133
 134        if (mutex_lock_interruptible(&c->alloc_sem))
 135                return -EINTR;
 136
 137        for (;;) {
 138                spin_lock(&c->erase_completion_lock);
 139                if (!c->unchecked_size)
 140                        break;
 141
 142                /* We can't start doing GC yet. We haven't finished checking
 143                   the node CRCs etc. Do it now. */
 144
 145                /* checked_ino is protected by the alloc_sem */
 146                if (c->checked_ino > c->highest_ino && xattr) {
 147                        pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
 148                                c->unchecked_size);
 149                        jffs2_dbg_dump_block_lists_nolock(c);
 150                        spin_unlock(&c->erase_completion_lock);
 151                        mutex_unlock(&c->alloc_sem);
 152                        return -ENOSPC;
 153                }
 154
 155                spin_unlock(&c->erase_completion_lock);
 156
 157                if (!xattr)
 158                        xattr = jffs2_verify_xattr(c);
 159
 160                spin_lock(&c->inocache_lock);
 161
 162                ic = jffs2_get_ino_cache(c, c->checked_ino++);
 163
 164                if (!ic) {
 165                        spin_unlock(&c->inocache_lock);
 166                        continue;
 167                }
 168
 169                if (!ic->pino_nlink) {
 170                        jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
 171                                  ic->ino);
 172                        spin_unlock(&c->inocache_lock);
 173                        jffs2_xattr_delete_inode(c, ic);
 174                        continue;
 175                }
 176                switch(ic->state) {
 177                case INO_STATE_CHECKEDABSENT:
 178                case INO_STATE_PRESENT:
 179                        jffs2_dbg(1, "Skipping ino #%u already checked\n",
 180                                  ic->ino);
 181                        spin_unlock(&c->inocache_lock);
 182                        continue;
 183
 184                case INO_STATE_GC:
 185                case INO_STATE_CHECKING:
 186                        pr_warn("Inode #%u is in state %d during CRC check phase!\n",
 187                                ic->ino, ic->state);
 188                        spin_unlock(&c->inocache_lock);
 189                        BUG();
 190
 191                case INO_STATE_READING:
 192                        /* We need to wait for it to finish, lest we move on
 193                           and trigger the BUG() above while we haven't yet
 194                           finished checking all its nodes */
 195                        jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
 196                                  ic->ino);
 197                        /* We need to come back again for the _same_ inode. We've
 198                         made no progress in this case, but that should be OK */
 199                        c->checked_ino--;
 200
 201                        mutex_unlock(&c->alloc_sem);
 202                        sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 203                        return 0;
 204
 205                default:
 206                        BUG();
 207
 208                case INO_STATE_UNCHECKED:
 209                        ;
 210                }
 211                ic->state = INO_STATE_CHECKING;
 212                spin_unlock(&c->inocache_lock);
 213
 214                jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
 215                          __func__, ic->ino);
 216
 217                ret = jffs2_do_crccheck_inode(c, ic);
 218                if (ret)
 219                        pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
 220                                ic->ino);
 221
 222                jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
 223                mutex_unlock(&c->alloc_sem);
 224                return ret;
 225        }
 226
 227        /* If there are any blocks which need erasing, erase them now */
 228        if (!list_empty(&c->erase_complete_list) ||
 229            !list_empty(&c->erase_pending_list)) {
 230                spin_unlock(&c->erase_completion_lock);
 231                mutex_unlock(&c->alloc_sem);
 232                jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
 233                if (jffs2_erase_pending_blocks(c, 1))
 234                        return 0;
 235
 236                jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
 237                mutex_lock(&c->alloc_sem);
 238                spin_lock(&c->erase_completion_lock);
 239        }
 240
 241        /* First, work out which block we're garbage-collecting */
 242        jeb = c->gcblock;
 243
 244        if (!jeb)
 245                jeb = jffs2_find_gc_block(c);
 246
 247        if (!jeb) {
 248                /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
 249                if (c->nr_erasing_blocks) {
 250                        spin_unlock(&c->erase_completion_lock);
 251                        mutex_unlock(&c->alloc_sem);
 252                        return -EAGAIN;
 253                }
 254                jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
 255                spin_unlock(&c->erase_completion_lock);
 256                mutex_unlock(&c->alloc_sem);
 257                return -EIO;
 258        }
 259
 260        jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
 261                  jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
 262        D1(if (c->nextblock)
 263           printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
 264
 265        if (!jeb->used_size) {
 266                mutex_unlock(&c->alloc_sem);
 267                goto eraseit;
 268        }
 269
 270        raw = jeb->gc_node;
 271        gcblock_dirty = jeb->dirty_size;
 272
 273        while(ref_obsolete(raw)) {
 274                jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
 275                          ref_offset(raw));
 276                raw = ref_next(raw);
 277                if (unlikely(!raw)) {
 278                        pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
 279                        pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
 280                                jeb->offset, jeb->free_size,
 281                                jeb->dirty_size, jeb->used_size);
 282                        jeb->gc_node = raw;
 283                        spin_unlock(&c->erase_completion_lock);
 284                        mutex_unlock(&c->alloc_sem);
 285                        BUG();
 286                }
 287        }
 288        jeb->gc_node = raw;
 289
 290        jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
 291                  ref_offset(raw));
 292
 293        if (!raw->next_in_ino) {
 294                /* Inode-less node. Clean marker, snapshot or something like that */
 295                spin_unlock(&c->erase_completion_lock);
 296                if (ref_flags(raw) == REF_PRISTINE) {
 297                        /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
 298                        jffs2_garbage_collect_pristine(c, NULL, raw);
 299                } else {
 300                        /* Just mark it obsolete */
 301                        jffs2_mark_node_obsolete(c, raw);
 302                }
 303                mutex_unlock(&c->alloc_sem);
 304                goto eraseit_lock;
 305        }
 306
 307        ic = jffs2_raw_ref_to_ic(raw);
 308
 309#ifdef CONFIG_JFFS2_FS_XATTR
 310        /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
 311         * We can decide whether this node is inode or xattr by ic->class.     */
 312        if (ic->class == RAWNODE_CLASS_XATTR_DATUM
 313            || ic->class == RAWNODE_CLASS_XATTR_REF) {
 314                spin_unlock(&c->erase_completion_lock);
 315
 316                if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
 317                        ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
 318                } else {
 319                        ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
 320                }
 321                goto test_gcnode;
 322        }
 323#endif
 324
 325        /* We need to hold the inocache. Either the erase_completion_lock or
 326           the inocache_lock are sufficient; we trade down since the inocache_lock
 327           causes less contention. */
 328        spin_lock(&c->inocache_lock);
 329
 330        spin_unlock(&c->erase_completion_lock);
 331
 332        jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
 333                  __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
 334                  ic->ino);
 335
 336        /* Three possibilities:
 337           1. Inode is already in-core. We must iget it and do proper
 338              updating to its fragtree, etc.
 339           2. Inode is not in-core, node is REF_PRISTINE. We lock the
 340              inocache to prevent a read_inode(), copy the node intact.
 341           3. Inode is not in-core, node is not pristine. We must iget()
 342              and take the slow path.
 343        */
 344
 345        switch(ic->state) {
 346        case INO_STATE_CHECKEDABSENT:
 347                /* It's been checked, but it's not currently in-core.
 348                   We can just copy any pristine nodes, but have
 349                   to prevent anyone else from doing read_inode() while
 350                   we're at it, so we set the state accordingly */
 351                if (ref_flags(raw) == REF_PRISTINE)
 352                        ic->state = INO_STATE_GC;
 353                else {
 354                        jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
 355                                  ic->ino);
 356                }
 357                break;
 358
 359        case INO_STATE_PRESENT:
 360                /* It's in-core. GC must iget() it. */
 361                break;
 362
 363        case INO_STATE_UNCHECKED:
 364        case INO_STATE_CHECKING:
 365        case INO_STATE_GC:
 366                /* Should never happen. We should have finished checking
 367                   by the time we actually start doing any GC, and since
 368                   we're holding the alloc_sem, no other garbage collection
 369                   can happen.
 370                */
 371                pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
 372                        ic->ino, ic->state);
 373                mutex_unlock(&c->alloc_sem);
 374                spin_unlock(&c->inocache_lock);
 375                BUG();
 376
 377        case INO_STATE_READING:
 378                /* Someone's currently trying to read it. We must wait for
 379                   them to finish and then go through the full iget() route
 380                   to do the GC. However, sometimes read_inode() needs to get
 381                   the alloc_sem() (for marking nodes invalid) so we must
 382                   drop the alloc_sem before sleeping. */
 383
 384                mutex_unlock(&c->alloc_sem);
 385                jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
 386                          __func__, ic->ino, ic->state);
 387                sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 388                /* And because we dropped the alloc_sem we must start again from the
 389                   beginning. Ponder chance of livelock here -- we're returning success
 390                   without actually making any progress.
 391
 392                   Q: What are the chances that the inode is back in INO_STATE_READING
 393                   again by the time we next enter this function? And that this happens
 394                   enough times to cause a real delay?
 395
 396                   A: Small enough that I don't care :)
 397                */
 398                return 0;
 399        }
 400
 401        /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
 402           node intact, and we don't have to muck about with the fragtree etc.
 403           because we know it's not in-core. If it _was_ in-core, we go through
 404           all the iget() crap anyway */
 405
 406        if (ic->state == INO_STATE_GC) {
 407                spin_unlock(&c->inocache_lock);
 408
 409                ret = jffs2_garbage_collect_pristine(c, ic, raw);
 410
 411                spin_lock(&c->inocache_lock);
 412                ic->state = INO_STATE_CHECKEDABSENT;
 413                wake_up(&c->inocache_wq);
 414
 415                if (ret != -EBADFD) {
 416                        spin_unlock(&c->inocache_lock);
 417                        goto test_gcnode;
 418                }
 419
 420                /* Fall through if it wanted us to, with inocache_lock held */
 421        }
 422
 423        /* Prevent the fairly unlikely race where the gcblock is
 424           entirely obsoleted by the final close of a file which had
 425           the only valid nodes in the block, followed by erasure,
 426           followed by freeing of the ic because the erased block(s)
 427           held _all_ the nodes of that inode.... never been seen but
 428           it's vaguely possible. */
 429
 430        inum = ic->ino;
 431        nlink = ic->pino_nlink;
 432        spin_unlock(&c->inocache_lock);
 433
 434        f = jffs2_gc_fetch_inode(c, inum, !nlink);
 435        if (IS_ERR(f)) {
 436                ret = PTR_ERR(f);
 437                goto release_sem;
 438        }
 439        if (!f) {
 440                ret = 0;
 441                goto release_sem;
 442        }
 443
 444        ret = jffs2_garbage_collect_live(c, jeb, raw, f);
 445
 446        jffs2_gc_release_inode(c, f);
 447
 448 test_gcnode:
 449        if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
 450                /* Eep. This really should never happen. GC is broken */
 451                pr_err("Error garbage collecting node at %08x!\n",
 452                       ref_offset(jeb->gc_node));
 453                ret = -ENOSPC;
 454        }
 455 release_sem:
 456        mutex_unlock(&c->alloc_sem);
 457
 458 eraseit_lock:
 459        /* If we've finished this block, start it erasing */
 460        spin_lock(&c->erase_completion_lock);
 461
 462 eraseit:
 463        if (c->gcblock && !c->gcblock->used_size) {
 464                jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
 465                          c->gcblock->offset);
 466                /* We're GC'ing an empty block? */
 467                list_add_tail(&c->gcblock->list, &c->erase_pending_list);
 468                c->gcblock = NULL;
 469                c->nr_erasing_blocks++;
 470                jffs2_garbage_collect_trigger(c);
 471        }
 472        spin_unlock(&c->erase_completion_lock);
 473
 474        return ret;
 475}
 476
 477static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
 478                                      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
 479{
 480        struct jffs2_node_frag *frag;
 481        struct jffs2_full_dnode *fn = NULL;
 482        struct jffs2_full_dirent *fd;
 483        uint32_t start = 0, end = 0, nrfrags = 0;
 484        int ret = 0;
 485
 486        mutex_lock(&f->sem);
 487
 488        /* Now we have the lock for this inode. Check that it's still the one at the head
 489           of the list. */
 490
 491        spin_lock(&c->erase_completion_lock);
 492
 493        if (c->gcblock != jeb) {
 494                spin_unlock(&c->erase_completion_lock);
 495                jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
 496                goto upnout;
 497        }
 498        if (ref_obsolete(raw)) {
 499                spin_unlock(&c->erase_completion_lock);
 500                jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
 501                /* They'll call again */
 502                goto upnout;
 503        }
 504        spin_unlock(&c->erase_completion_lock);
 505
 506        /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
 507        if (f->metadata && f->metadata->raw == raw) {
 508                fn = f->metadata;
 509                ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
 510                goto upnout;
 511        }
 512
 513        /* FIXME. Read node and do lookup? */
 514        for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
 515                if (frag->node && frag->node->raw == raw) {
 516                        fn = frag->node;
 517                        end = frag->ofs + frag->size;
 518                        if (!nrfrags++)
 519                                start = frag->ofs;
 520                        if (nrfrags == frag->node->frags)
 521                                break; /* We've found them all */
 522                }
 523        }
 524        if (fn) {
 525                if (ref_flags(raw) == REF_PRISTINE) {
 526                        ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
 527                        if (!ret) {
 528                                /* Urgh. Return it sensibly. */
 529                                frag->node->raw = f->inocache->nodes;
 530                        }
 531                        if (ret != -EBADFD)
 532                                goto upnout;
 533                }
 534                /* We found a datanode. Do the GC */
 535                if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
 536                        /* It crosses a page boundary. Therefore, it must be a hole. */
 537                        ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 538                } else {
 539                        /* It could still be a hole. But we GC the page this way anyway */
 540                        ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
 541                }
 542                goto upnout;
 543        }
 544
 545        /* Wasn't a dnode. Try dirent */
 546        for (fd = f->dents; fd; fd=fd->next) {
 547                if (fd->raw == raw)
 548                        break;
 549        }
 550
 551        if (fd && fd->ino) {
 552                ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
 553        } else if (fd) {
 554                ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
 555        } else {
 556                pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
 557                        ref_offset(raw), f->inocache->ino);
 558                if (ref_obsolete(raw)) {
 559                        pr_warn("But it's obsolete so we don't mind too much\n");
 560                } else {
 561                        jffs2_dbg_dump_node(c, ref_offset(raw));
 562                        BUG();
 563                }
 564        }
 565 upnout:
 566        mutex_unlock(&f->sem);
 567
 568        return ret;
 569}
 570
 571static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
 572                                          struct jffs2_inode_cache *ic,
 573                                          struct jffs2_raw_node_ref *raw)
 574{
 575        union jffs2_node_union *node;
 576        size_t retlen;
 577        int ret;
 578        uint32_t phys_ofs, alloclen;
 579        uint32_t crc, rawlen;
 580        int retried = 0;
 581
 582        jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
 583                  ref_offset(raw));
 584
 585        alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
 586
 587        /* Ask for a small amount of space (or the totlen if smaller) because we
 588           don't want to force wastage of the end of a block if splitting would
 589           work. */
 590        if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
 591                alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
 592
 593        ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
 594        /* 'rawlen' is not the exact summary size; it is only an upper estimation */
 595
 596        if (ret)
 597                return ret;
 598
 599        if (alloclen < rawlen) {
 600                /* Doesn't fit untouched. We'll go the old route and split it */
 601                return -EBADFD;
 602        }
 603
 604        node = kmalloc(rawlen, GFP_KERNEL);
 605        if (!node)
 606                return -ENOMEM;
 607
 608        ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
 609        if (!ret && retlen != rawlen)
 610                ret = -EIO;
 611        if (ret)
 612                goto out_node;
 613
 614        crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
 615        if (je32_to_cpu(node->u.hdr_crc) != crc) {
 616                pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 617                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
 618                goto bail;
 619        }
 620
 621        switch(je16_to_cpu(node->u.nodetype)) {
 622        case JFFS2_NODETYPE_INODE:
 623                crc = crc32(0, node, sizeof(node->i)-8);
 624                if (je32_to_cpu(node->i.node_crc) != crc) {
 625                        pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 626                                ref_offset(raw), je32_to_cpu(node->i.node_crc),
 627                                crc);
 628                        goto bail;
 629                }
 630
 631                if (je32_to_cpu(node->i.dsize)) {
 632                        crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
 633                        if (je32_to_cpu(node->i.data_crc) != crc) {
 634                                pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 635                                        ref_offset(raw),
 636                                        je32_to_cpu(node->i.data_crc), crc);
 637                                goto bail;
 638                        }
 639                }
 640                break;
 641
 642        case JFFS2_NODETYPE_DIRENT:
 643                crc = crc32(0, node, sizeof(node->d)-8);
 644                if (je32_to_cpu(node->d.node_crc) != crc) {
 645                        pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 646                                ref_offset(raw),
 647                                je32_to_cpu(node->d.node_crc), crc);
 648                        goto bail;
 649                }
 650
 651                if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
 652                        pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
 653                                ref_offset(raw));
 654                        goto bail;
 655                }
 656
 657                if (node->d.nsize) {
 658                        crc = crc32(0, node->d.name, node->d.nsize);
 659                        if (je32_to_cpu(node->d.name_crc) != crc) {
 660                                pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 661                                        ref_offset(raw),
 662                                        je32_to_cpu(node->d.name_crc), crc);
 663                                goto bail;
 664                        }
 665                }
 666                break;
 667        default:
 668                /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
 669                if (ic) {
 670                        pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
 671                                ref_offset(raw), je16_to_cpu(node->u.nodetype));
 672                        goto bail;
 673                }
 674        }
 675
 676        /* OK, all the CRCs are good; this node can just be copied as-is. */
 677 retry:
 678        phys_ofs = write_ofs(c);
 679
 680        ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
 681
 682        if (ret || (retlen != rawlen)) {
 683                pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
 684                          rawlen, phys_ofs, ret, retlen);
 685                if (retlen) {
 686                        jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
 687                } else {
 688                        pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
 689                                  phys_ofs);
 690                }
 691                if (!retried) {
 692                        /* Try to reallocate space and retry */
 693                        uint32_t dummy;
 694                        struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
 695
 696                        retried = 1;
 697
 698                        jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
 699
 700                        jffs2_dbg_acct_sanity_check(c,jeb);
 701                        jffs2_dbg_acct_paranoia_check(c, jeb);
 702
 703                        ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
 704                                                /* this is not the exact summary size of it,
 705                                                        it is only an upper estimation */
 706
 707                        if (!ret) {
 708                                jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
 709                                          phys_ofs);
 710
 711                                jffs2_dbg_acct_sanity_check(c,jeb);
 712                                jffs2_dbg_acct_paranoia_check(c, jeb);
 713
 714                                goto retry;
 715                        }
 716                        jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
 717                                  ret);
 718                }
 719
 720                if (!ret)
 721                        ret = -EIO;
 722                goto out_node;
 723        }
 724        jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
 725
 726        jffs2_mark_node_obsolete(c, raw);
 727        jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
 728                  ref_offset(raw));
 729
 730 out_node:
 731        kfree(node);
 732        return ret;
 733 bail:
 734        ret = -EBADFD;
 735        goto out_node;
 736}
 737
 738static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 739                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 740{
 741        struct jffs2_full_dnode *new_fn;
 742        struct jffs2_raw_inode ri;
 743        struct jffs2_node_frag *last_frag;
 744        union jffs2_device_node dev;
 745        char *mdata = NULL;
 746        int mdatalen = 0;
 747        uint32_t alloclen, ilen;
 748        int ret;
 749
 750        if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
 751            S_ISCHR(JFFS2_F_I_MODE(f)) ) {
 752                /* For these, we don't actually need to read the old node */
 753                mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
 754                mdata = (char *)&dev;
 755                jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
 756                          __func__, mdatalen);
 757        } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
 758                mdatalen = fn->size;
 759                mdata = kmalloc(fn->size, GFP_KERNEL);
 760                if (!mdata) {
 761                        pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
 762                        return -ENOMEM;
 763                }
 764                ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
 765                if (ret) {
 766                        pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
 767                                ret);
 768                        kfree(mdata);
 769                        return ret;
 770                }
 771                jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
 772                          __func__, mdatalen);
 773
 774        }
 775
 776        ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
 777                                JFFS2_SUMMARY_INODE_SIZE);
 778        if (ret) {
 779                pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
 780                        sizeof(ri) + mdatalen, ret);
 781                goto out;
 782        }
 783
 784        last_frag = frag_last(&f->fragtree);
 785        if (last_frag)
 786                /* Fetch the inode length from the fragtree rather then
 787                 * from i_size since i_size may have not been updated yet */
 788                ilen = last_frag->ofs + last_frag->size;
 789        else
 790                ilen = JFFS2_F_I_SIZE(f);
 791
 792        memset(&ri, 0, sizeof(ri));
 793        ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 794        ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
 795        ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
 796        ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
 797
 798        ri.ino = cpu_to_je32(f->inocache->ino);
 799        ri.version = cpu_to_je32(++f->highest_version);
 800        ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
 801        ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
 802        ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
 803        ri.isize = cpu_to_je32(ilen);
 804        ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
 805        ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
 806        ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 807        ri.offset = cpu_to_je32(0);
 808        ri.csize = cpu_to_je32(mdatalen);
 809        ri.dsize = cpu_to_je32(mdatalen);
 810        ri.compr = JFFS2_COMPR_NONE;
 811        ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
 812        ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
 813
 814        new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
 815
 816        if (IS_ERR(new_fn)) {
 817                pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
 818                ret = PTR_ERR(new_fn);
 819                goto out;
 820        }
 821        jffs2_mark_node_obsolete(c, fn->raw);
 822        jffs2_free_full_dnode(fn);
 823        f->metadata = new_fn;
 824 out:
 825        if (S_ISLNK(JFFS2_F_I_MODE(f)))
 826                kfree(mdata);
 827        return ret;
 828}
 829
 830static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 831                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 832{
 833        struct jffs2_full_dirent *new_fd;
 834        struct jffs2_raw_dirent rd;
 835        uint32_t alloclen;
 836        int ret;
 837
 838        rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 839        rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
 840        rd.nsize = strlen(fd->name);
 841        rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
 842        rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
 843
 844        rd.pino = cpu_to_je32(f->inocache->ino);
 845        rd.version = cpu_to_je32(++f->highest_version);
 846        rd.ino = cpu_to_je32(fd->ino);
 847        /* If the times on this inode were set by explicit utime() they can be different,
 848           so refrain from splatting them. */
 849        if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
 850                rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 851        else
 852                rd.mctime = cpu_to_je32(0);
 853        rd.type = fd->type;
 854        rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
 855        rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
 856
 857        ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
 858                                JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
 859        if (ret) {
 860                pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
 861                        sizeof(rd)+rd.nsize, ret);
 862                return ret;
 863        }
 864        new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
 865
 866        if (IS_ERR(new_fd)) {
 867                pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
 868                        PTR_ERR(new_fd));
 869                return PTR_ERR(new_fd);
 870        }
 871        jffs2_add_fd_to_list(c, new_fd, &f->dents);
 872        return 0;
 873}
 874
 875static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 876                                        struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 877{
 878        struct jffs2_full_dirent **fdp = &f->dents;
 879        int found = 0;
 880
 881        /* On a medium where we can't actually mark nodes obsolete
 882           pernamently, such as NAND flash, we need to work out
 883           whether this deletion dirent is still needed to actively
 884           delete a 'real' dirent with the same name that's still
 885           somewhere else on the flash. */
 886        if (!jffs2_can_mark_obsolete(c)) {
 887                struct jffs2_raw_dirent *rd;
 888                struct jffs2_raw_node_ref *raw;
 889                int ret;
 890                size_t retlen;
 891                int name_len = strlen(fd->name);
 892                uint32_t name_crc = crc32(0, fd->name, name_len);
 893                uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
 894
 895                rd = kmalloc(rawlen, GFP_KERNEL);
 896                if (!rd)
 897                        return -ENOMEM;
 898
 899                /* Prevent the erase code from nicking the obsolete node refs while
 900                   we're looking at them. I really don't like this extra lock but
 901                   can't see any alternative. Suggestions on a postcard to... */
 902                mutex_lock(&c->erase_free_sem);
 903
 904                for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
 905
 906                        cond_resched();
 907
 908                        /* We only care about obsolete ones */
 909                        if (!(ref_obsolete(raw)))
 910                                continue;
 911
 912                        /* Any dirent with the same name is going to have the same length... */
 913                        if (ref_totlen(c, NULL, raw) != rawlen)
 914                                continue;
 915
 916                        /* Doesn't matter if there's one in the same erase block. We're going to
 917                           delete it too at the same time. */
 918                        if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
 919                                continue;
 920
 921                        jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
 922                                  ref_offset(raw));
 923
 924                        /* This is an obsolete node belonging to the same directory, and it's of the right
 925                           length. We need to take a closer look...*/
 926                        ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
 927                        if (ret) {
 928                                pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
 929                                        __func__, ret, ref_offset(raw));
 930                                /* If we can't read it, we don't need to continue to obsolete it. Continue */
 931                                continue;
 932                        }
 933                        if (retlen != rawlen) {
 934                                pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
 935                                        __func__, retlen, rawlen,
 936                                        ref_offset(raw));
 937                                continue;
 938                        }
 939
 940                        if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
 941                                continue;
 942
 943                        /* If the name CRC doesn't match, skip */
 944                        if (je32_to_cpu(rd->name_crc) != name_crc)
 945                                continue;
 946
 947                        /* If the name length doesn't match, or it's another deletion dirent, skip */
 948                        if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
 949                                continue;
 950
 951                        /* OK, check the actual name now */
 952                        if (memcmp(rd->name, fd->name, name_len))
 953                                continue;
 954
 955                        /* OK. The name really does match. There really is still an older node on
 956                           the flash which our deletion dirent obsoletes. So we have to write out
 957                           a new deletion dirent to replace it */
 958                        mutex_unlock(&c->erase_free_sem);
 959
 960                        jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
 961                                  ref_offset(fd->raw), fd->name,
 962                                  ref_offset(raw), je32_to_cpu(rd->ino));
 963                        kfree(rd);
 964
 965                        return jffs2_garbage_collect_dirent(c, jeb, f, fd);
 966                }
 967
 968                mutex_unlock(&c->erase_free_sem);
 969                kfree(rd);
 970        }
 971
 972        /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
 973           we should update the metadata node with those times accordingly */
 974
 975        /* No need for it any more. Just mark it obsolete and remove it from the list */
 976        while (*fdp) {
 977                if ((*fdp) == fd) {
 978                        found = 1;
 979                        *fdp = fd->next;
 980                        break;
 981                }
 982                fdp = &(*fdp)->next;
 983        }
 984        if (!found) {
 985                pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
 986                        fd->name, f->inocache->ino);
 987        }
 988        jffs2_mark_node_obsolete(c, fd->raw);
 989        jffs2_free_full_dirent(fd);
 990        return 0;
 991}
 992
 993static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 994                                      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
 995                                      uint32_t start, uint32_t end)
 996{
 997        struct jffs2_raw_inode ri;
 998        struct jffs2_node_frag *frag;
 999        struct jffs2_full_dnode *new_fn;
1000        uint32_t alloclen, ilen;
1001        int ret;
1002
1003        jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1004                  f->inocache->ino, start, end);
1005
1006        memset(&ri, 0, sizeof(ri));
1007
1008        if(fn->frags > 1) {
1009                size_t readlen;
1010                uint32_t crc;
1011                /* It's partially obsoleted by a later write. So we have to
1012                   write it out again with the _same_ version as before */
1013                ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1014                if (readlen != sizeof(ri) || ret) {
1015                        pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1016                                ret, readlen);
1017                        goto fill;
1018                }
1019                if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1020                        pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1021                                __func__, ref_offset(fn->raw),
1022                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1023                        return -EIO;
1024                }
1025                if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1026                        pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1027                                __func__, ref_offset(fn->raw),
1028                                je32_to_cpu(ri.totlen), sizeof(ri));
1029                        return -EIO;
1030                }
1031                crc = crc32(0, &ri, sizeof(ri)-8);
1032                if (crc != je32_to_cpu(ri.node_crc)) {
1033                        pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1034                                __func__, ref_offset(fn->raw),
1035                                je32_to_cpu(ri.node_crc), crc);
1036                        /* FIXME: We could possibly deal with this by writing new holes for each frag */
1037                        pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1038                                start, end, f->inocache->ino);
1039                        goto fill;
1040                }
1041                if (ri.compr != JFFS2_COMPR_ZERO) {
1042                        pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1043                                __func__, ref_offset(fn->raw));
1044                        pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1045                                start, end, f->inocache->ino);
1046                        goto fill;
1047                }
1048        } else {
1049        fill:
1050                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1051                ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1052                ri.totlen = cpu_to_je32(sizeof(ri));
1053                ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1054
1055                ri.ino = cpu_to_je32(f->inocache->ino);
1056                ri.version = cpu_to_je32(++f->highest_version);
1057                ri.offset = cpu_to_je32(start);
1058                ri.dsize = cpu_to_je32(end - start);
1059                ri.csize = cpu_to_je32(0);
1060                ri.compr = JFFS2_COMPR_ZERO;
1061        }
1062
1063        frag = frag_last(&f->fragtree);
1064        if (frag)
1065                /* Fetch the inode length from the fragtree rather then
1066                 * from i_size since i_size may have not been updated yet */
1067                ilen = frag->ofs + frag->size;
1068        else
1069                ilen = JFFS2_F_I_SIZE(f);
1070
1071        ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1072        ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1073        ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1074        ri.isize = cpu_to_je32(ilen);
1075        ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1076        ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1077        ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1078        ri.data_crc = cpu_to_je32(0);
1079        ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1080
1081        ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1082                                     JFFS2_SUMMARY_INODE_SIZE);
1083        if (ret) {
1084                pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1085                        sizeof(ri), ret);
1086                return ret;
1087        }
1088        new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1089
1090        if (IS_ERR(new_fn)) {
1091                pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1092                return PTR_ERR(new_fn);
1093        }
1094        if (je32_to_cpu(ri.version) == f->highest_version) {
1095                jffs2_add_full_dnode_to_inode(c, f, new_fn);
1096                if (f->metadata) {
1097                        jffs2_mark_node_obsolete(c, f->metadata->raw);
1098                        jffs2_free_full_dnode(f->metadata);
1099                        f->metadata = NULL;
1100                }
1101                return 0;
1102        }
1103
1104        /*
1105         * We should only get here in the case where the node we are
1106         * replacing had more than one frag, so we kept the same version
1107         * number as before. (Except in case of error -- see 'goto fill;'
1108         * above.)
1109         */
1110        D1(if(unlikely(fn->frags <= 1)) {
1111                        pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1112                                __func__, fn->frags, je32_to_cpu(ri.version),
1113                                f->highest_version, je32_to_cpu(ri.ino));
1114        });
1115
1116        /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1117        mark_ref_normal(new_fn->raw);
1118
1119        for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1120             frag; frag = frag_next(frag)) {
1121                if (frag->ofs > fn->size + fn->ofs)
1122                        break;
1123                if (frag->node == fn) {
1124                        frag->node = new_fn;
1125                        new_fn->frags++;
1126                        fn->frags--;
1127                }
1128        }
1129        if (fn->frags) {
1130                pr_warn("%s(): Old node still has frags!\n", __func__);
1131                BUG();
1132        }
1133        if (!new_fn->frags) {
1134                pr_warn("%s(): New node has no frags!\n", __func__);
1135                BUG();
1136        }
1137
1138        jffs2_mark_node_obsolete(c, fn->raw);
1139        jffs2_free_full_dnode(fn);
1140
1141        return 0;
1142}
1143
1144static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1145                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1146                                       uint32_t start, uint32_t end)
1147{
1148        struct jffs2_full_dnode *new_fn;
1149        struct jffs2_raw_inode ri;
1150        uint32_t alloclen, offset, orig_end, orig_start;
1151        int ret = 0;
1152        unsigned char *comprbuf = NULL, *writebuf;
1153        unsigned long pg;
1154        unsigned char *pg_ptr;
1155
1156        memset(&ri, 0, sizeof(ri));
1157
1158        jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1159                  f->inocache->ino, start, end);
1160
1161        orig_end = end;
1162        orig_start = start;
1163
1164        if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1165                /* Attempt to do some merging. But only expand to cover logically
1166                   adjacent frags if the block containing them is already considered
1167                   to be dirty. Otherwise we end up with GC just going round in
1168                   circles dirtying the nodes it already wrote out, especially
1169                   on NAND where we have small eraseblocks and hence a much higher
1170                   chance of nodes having to be split to cross boundaries. */
1171
1172                struct jffs2_node_frag *frag;
1173                uint32_t min, max;
1174
1175                min = start & ~(PAGE_CACHE_SIZE-1);
1176                max = min + PAGE_CACHE_SIZE;
1177
1178                frag = jffs2_lookup_node_frag(&f->fragtree, start);
1179
1180                /* BUG_ON(!frag) but that'll happen anyway... */
1181
1182                BUG_ON(frag->ofs != start);
1183
1184                /* First grow down... */
1185                while((frag = frag_prev(frag)) && frag->ofs >= min) {
1186
1187                        /* If the previous frag doesn't even reach the beginning, there's
1188                           excessive fragmentation. Just merge. */
1189                        if (frag->ofs > min) {
1190                                jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1191                                          frag->ofs, frag->ofs+frag->size);
1192                                start = frag->ofs;
1193                                continue;
1194                        }
1195                        /* OK. This frag holds the first byte of the page. */
1196                        if (!frag->node || !frag->node->raw) {
1197                                jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1198                                          frag->ofs, frag->ofs+frag->size);
1199                                break;
1200                        } else {
1201
1202                                /* OK, it's a frag which extends to the beginning of the page. Does it live
1203                                   in a block which is still considered clean? If so, don't obsolete it.
1204                                   If not, cover it anyway. */
1205
1206                                struct jffs2_raw_node_ref *raw = frag->node->raw;
1207                                struct jffs2_eraseblock *jeb;
1208
1209                                jeb = &c->blocks[raw->flash_offset / c->sector_size];
1210
1211                                if (jeb == c->gcblock) {
1212                                        jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1213                                                  frag->ofs,
1214                                                  frag->ofs + frag->size,
1215                                                  ref_offset(raw));
1216                                        start = frag->ofs;
1217                                        break;
1218                                }
1219                                if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1220                                        jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1221                                                  frag->ofs,
1222                                                  frag->ofs + frag->size,
1223                                                  jeb->offset);
1224                                        break;
1225                                }
1226
1227                                jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1228                                          frag->ofs,
1229                                          frag->ofs + frag->size,
1230                                          jeb->offset);
1231                                start = frag->ofs;
1232                                break;
1233                        }
1234                }
1235
1236                /* ... then up */
1237
1238                /* Find last frag which is actually part of the node we're to GC. */
1239                frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1240
1241                while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1242
1243                        /* If the previous frag doesn't even reach the beginning, there's lots
1244                           of fragmentation. Just merge. */
1245                        if (frag->ofs+frag->size < max) {
1246                                jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1247                                          frag->ofs, frag->ofs+frag->size);
1248                                end = frag->ofs + frag->size;
1249                                continue;
1250                        }
1251
1252                        if (!frag->node || !frag->node->raw) {
1253                                jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1254                                          frag->ofs, frag->ofs+frag->size);
1255                                break;
1256                        } else {
1257
1258                                /* OK, it's a frag which extends to the beginning of the page. Does it live
1259                                   in a block which is still considered clean? If so, don't obsolete it.
1260                                   If not, cover it anyway. */
1261
1262                                struct jffs2_raw_node_ref *raw = frag->node->raw;
1263                                struct jffs2_eraseblock *jeb;
1264
1265                                jeb = &c->blocks[raw->flash_offset / c->sector_size];
1266
1267                                if (jeb == c->gcblock) {
1268                                        jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1269                                                  frag->ofs,
1270                                                  frag->ofs + frag->size,
1271                                                  ref_offset(raw));
1272                                        end = frag->ofs + frag->size;
1273                                        break;
1274                                }
1275                                if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1276                                        jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1277                                                  frag->ofs,
1278                                                  frag->ofs + frag->size,
1279                                                  jeb->offset);
1280                                        break;
1281                                }
1282
1283                                jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1284                                          frag->ofs,
1285                                          frag->ofs + frag->size,
1286                                          jeb->offset);
1287                                end = frag->ofs + frag->size;
1288                                break;
1289                        }
1290                }
1291                jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1292                          orig_start, orig_end, start, end);
1293
1294                D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1295                BUG_ON(end < orig_end);
1296                BUG_ON(start > orig_start);
1297        }
1298
1299        /* First, use readpage() to read the appropriate page into the page cache */
1300        /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1301         *    triggered garbage collection in the first place?
1302         * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1303         *    page OK. We'll actually write it out again in commit_write, which is a little
1304         *    suboptimal, but at least we're correct.
1305         */
1306        pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1307
1308        if (IS_ERR(pg_ptr)) {
1309                pr_warn("read_cache_page() returned error: %ld\n",
1310                        PTR_ERR(pg_ptr));
1311                return PTR_ERR(pg_ptr);
1312        }
1313
1314        offset = start;
1315        while(offset < orig_end) {
1316                uint32_t datalen;
1317                uint32_t cdatalen;
1318                uint16_t comprtype = JFFS2_COMPR_NONE;
1319
1320                ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1321                                        &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1322
1323                if (ret) {
1324                        pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1325                                sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1326                        break;
1327                }
1328                cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1329                datalen = end - offset;
1330
1331                writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1332
1333                comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1334
1335                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1336                ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1337                ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1338                ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1339
1340                ri.ino = cpu_to_je32(f->inocache->ino);
1341                ri.version = cpu_to_je32(++f->highest_version);
1342                ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1343                ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1344                ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1345                ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1346                ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1347                ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1348                ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1349                ri.offset = cpu_to_je32(offset);
1350                ri.csize = cpu_to_je32(cdatalen);
1351                ri.dsize = cpu_to_je32(datalen);
1352                ri.compr = comprtype & 0xff;
1353                ri.usercompr = (comprtype >> 8) & 0xff;
1354                ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1355                ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1356
1357                new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1358
1359                jffs2_free_comprbuf(comprbuf, writebuf);
1360
1361                if (IS_ERR(new_fn)) {
1362                        pr_warn("Error writing new dnode: %ld\n",
1363                                PTR_ERR(new_fn));
1364                        ret = PTR_ERR(new_fn);
1365                        break;
1366                }
1367                ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1368                offset += datalen;
1369                if (f->metadata) {
1370                        jffs2_mark_node_obsolete(c, f->metadata->raw);
1371                        jffs2_free_full_dnode(f->metadata);
1372                        f->metadata = NULL;
1373                }
1374        }
1375
1376        jffs2_gc_release_page(c, pg_ptr, &pg);
1377        return ret;
1378}
1379