linux/fs/jfs/jfs_dtree.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 */
   5
   6/*
   7 *      jfs_dtree.c: directory B+-tree manager
   8 *
   9 * B+-tree with variable length key directory:
  10 *
  11 * each directory page is structured as an array of 32-byte
  12 * directory entry slots initialized as a freelist
  13 * to avoid search/compaction of free space at insertion.
  14 * when an entry is inserted, a number of slots are allocated
  15 * from the freelist as required to store variable length data
  16 * of the entry; when the entry is deleted, slots of the entry
  17 * are returned to freelist.
  18 *
  19 * leaf entry stores full name as key and file serial number
  20 * (aka inode number) as data.
  21 * internal/router entry stores sufffix compressed name
  22 * as key and simple extent descriptor as data.
  23 *
  24 * each directory page maintains a sorted entry index table
  25 * which stores the start slot index of sorted entries
  26 * to allow binary search on the table.
  27 *
  28 * directory starts as a root/leaf page in on-disk inode
  29 * inline data area.
  30 * when it becomes full, it starts a leaf of a external extent
  31 * of length of 1 block. each time the first leaf becomes full,
  32 * it is extended rather than split (its size is doubled),
  33 * until its length becoms 4 KBytes, from then the extent is split
  34 * with new 4 Kbyte extent when it becomes full
  35 * to reduce external fragmentation of small directories.
  36 *
  37 * blah, blah, blah, for linear scan of directory in pieces by
  38 * readdir().
  39 *
  40 *
  41 *      case-insensitive directory file system
  42 *
  43 * names are stored in case-sensitive way in leaf entry.
  44 * but stored, searched and compared in case-insensitive (uppercase) order
  45 * (i.e., both search key and entry key are folded for search/compare):
  46 * (note that case-sensitive order is BROKEN in storage, e.g.,
  47 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
  48 *
  49 *  entries which folds to the same key makes up a equivalent class
  50 *  whose members are stored as contiguous cluster (may cross page boundary)
  51 *  but whose order is arbitrary and acts as duplicate, e.g.,
  52 *  abc, Abc, aBc, abC)
  53 *
  54 * once match is found at leaf, requires scan forward/backward
  55 * either for, in case-insensitive search, duplicate
  56 * or for, in case-sensitive search, for exact match
  57 *
  58 * router entry must be created/stored in case-insensitive way
  59 * in internal entry:
  60 * (right most key of left page and left most key of right page
  61 * are folded, and its suffix compression is propagated as router
  62 * key in parent)
  63 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
  64 * should be made the router key for the split)
  65 *
  66 * case-insensitive search:
  67 *
  68 *      fold search key;
  69 *
  70 *      case-insensitive search of B-tree:
  71 *      for internal entry, router key is already folded;
  72 *      for leaf entry, fold the entry key before comparison.
  73 *
  74 *      if (leaf entry case-insensitive match found)
  75 *              if (next entry satisfies case-insensitive match)
  76 *                      return EDUPLICATE;
  77 *              if (prev entry satisfies case-insensitive match)
  78 *                      return EDUPLICATE;
  79 *              return match;
  80 *      else
  81 *              return no match;
  82 *
  83 *      serialization:
  84 * target directory inode lock is being held on entry/exit
  85 * of all main directory service routines.
  86 *
  87 *      log based recovery:
  88 */
  89
  90#include <linux/fs.h>
  91#include <linux/quotaops.h>
  92#include <linux/slab.h>
  93#include "jfs_incore.h"
  94#include "jfs_superblock.h"
  95#include "jfs_filsys.h"
  96#include "jfs_metapage.h"
  97#include "jfs_dmap.h"
  98#include "jfs_unicode.h"
  99#include "jfs_debug.h"
 100
 101/* dtree split parameter */
 102struct dtsplit {
 103        struct metapage *mp;
 104        s16 index;
 105        s16 nslot;
 106        struct component_name *key;
 107        ddata_t *data;
 108        struct pxdlist *pxdlist;
 109};
 110
 111#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
 112
 113/* get page buffer for specified block address */
 114#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)                             \
 115do {                                                                    \
 116        BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot);        \
 117        if (!(RC)) {                                                    \
 118                if (((P)->header.nextindex >                            \
 119                     (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \
 120                    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) {  \
 121                        BT_PUTPAGE(MP);                                 \
 122                        jfs_error((IP)->i_sb,                           \
 123                                  "DT_GETPAGE: dtree page corrupt\n");  \
 124                        MP = NULL;                                      \
 125                        RC = -EIO;                                      \
 126                }                                                       \
 127        }                                                               \
 128} while (0)
 129
 130/* for consistency */
 131#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
 132
 133#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
 134        BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
 135
 136/*
 137 * forward references
 138 */
 139static int dtSplitUp(tid_t tid, struct inode *ip,
 140                     struct dtsplit * split, struct btstack * btstack);
 141
 142static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 143                       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
 144
 145static int dtExtendPage(tid_t tid, struct inode *ip,
 146                        struct dtsplit * split, struct btstack * btstack);
 147
 148static int dtSplitRoot(tid_t tid, struct inode *ip,
 149                       struct dtsplit * split, struct metapage ** rmpp);
 150
 151static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 152                      dtpage_t * fp, struct btstack * btstack);
 153
 154static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
 155
 156static int dtReadFirst(struct inode *ip, struct btstack * btstack);
 157
 158static int dtReadNext(struct inode *ip,
 159                      loff_t * offset, struct btstack * btstack);
 160
 161static int dtCompare(struct component_name * key, dtpage_t * p, int si);
 162
 163static int ciCompare(struct component_name * key, dtpage_t * p, int si,
 164                     int flag);
 165
 166static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
 167                     int flag);
 168
 169static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
 170                              int ri, struct component_name * key, int flag);
 171
 172static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
 173                          ddata_t * data, struct dt_lock **);
 174
 175static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
 176                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
 177                        int do_index);
 178
 179static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
 180
 181static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
 182
 183static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
 184
 185#define ciToUpper(c)    UniStrupr((c)->name)
 186
 187/*
 188 *      read_index_page()
 189 *
 190 *      Reads a page of a directory's index table.
 191 *      Having metadata mapped into the directory inode's address space
 192 *      presents a multitude of problems.  We avoid this by mapping to
 193 *      the absolute address space outside of the *_metapage routines
 194 */
 195static struct metapage *read_index_page(struct inode *inode, s64 blkno)
 196{
 197        int rc;
 198        s64 xaddr;
 199        int xflag;
 200        s32 xlen;
 201
 202        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 203        if (rc || (xaddr == 0))
 204                return NULL;
 205
 206        return read_metapage(inode, xaddr, PSIZE, 1);
 207}
 208
 209/*
 210 *      get_index_page()
 211 *
 212 *      Same as get_index_page(), but get's a new page without reading
 213 */
 214static struct metapage *get_index_page(struct inode *inode, s64 blkno)
 215{
 216        int rc;
 217        s64 xaddr;
 218        int xflag;
 219        s32 xlen;
 220
 221        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 222        if (rc || (xaddr == 0))
 223                return NULL;
 224
 225        return get_metapage(inode, xaddr, PSIZE, 1);
 226}
 227
 228/*
 229 *      find_index()
 230 *
 231 *      Returns dtree page containing directory table entry for specified
 232 *      index and pointer to its entry.
 233 *
 234 *      mp must be released by caller.
 235 */
 236static struct dir_table_slot *find_index(struct inode *ip, u32 index,
 237                                         struct metapage ** mp, s64 *lblock)
 238{
 239        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 240        s64 blkno;
 241        s64 offset;
 242        int page_offset;
 243        struct dir_table_slot *slot;
 244        static int maxWarnings = 10;
 245
 246        if (index < 2) {
 247                if (maxWarnings) {
 248                        jfs_warn("find_entry called with index = %d", index);
 249                        maxWarnings--;
 250                }
 251                return NULL;
 252        }
 253
 254        if (index >= jfs_ip->next_index) {
 255                jfs_warn("find_entry called with index >= next_index");
 256                return NULL;
 257        }
 258
 259        if (jfs_dirtable_inline(ip)) {
 260                /*
 261                 * Inline directory table
 262                 */
 263                *mp = NULL;
 264                slot = &jfs_ip->i_dirtable[index - 2];
 265        } else {
 266                offset = (index - 2) * sizeof(struct dir_table_slot);
 267                page_offset = offset & (PSIZE - 1);
 268                blkno = ((offset + 1) >> L2PSIZE) <<
 269                    JFS_SBI(ip->i_sb)->l2nbperpage;
 270
 271                if (*mp && (*lblock != blkno)) {
 272                        release_metapage(*mp);
 273                        *mp = NULL;
 274                }
 275                if (!(*mp)) {
 276                        *lblock = blkno;
 277                        *mp = read_index_page(ip, blkno);
 278                }
 279                if (!(*mp)) {
 280                        jfs_err("free_index: error reading directory table");
 281                        return NULL;
 282                }
 283
 284                slot =
 285                    (struct dir_table_slot *) ((char *) (*mp)->data +
 286                                               page_offset);
 287        }
 288        return slot;
 289}
 290
 291static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
 292                              u32 index)
 293{
 294        struct tlock *tlck;
 295        struct linelock *llck;
 296        struct lv *lv;
 297
 298        tlck = txLock(tid, ip, mp, tlckDATA);
 299        llck = (struct linelock *) tlck->lock;
 300
 301        if (llck->index >= llck->maxcnt)
 302                llck = txLinelock(llck);
 303        lv = &llck->lv[llck->index];
 304
 305        /*
 306         *      Linelock slot size is twice the size of directory table
 307         *      slot size.  512 entries per page.
 308         */
 309        lv->offset = ((index - 2) & 511) >> 1;
 310        lv->length = 1;
 311        llck->index++;
 312}
 313
 314/*
 315 *      add_index()
 316 *
 317 *      Adds an entry to the directory index table.  This is used to provide
 318 *      each directory entry with a persistent index in which to resume
 319 *      directory traversals
 320 */
 321static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 322{
 323        struct super_block *sb = ip->i_sb;
 324        struct jfs_sb_info *sbi = JFS_SBI(sb);
 325        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 326        u64 blkno;
 327        struct dir_table_slot *dirtab_slot;
 328        u32 index;
 329        struct linelock *llck;
 330        struct lv *lv;
 331        struct metapage *mp;
 332        s64 offset;
 333        uint page_offset;
 334        struct tlock *tlck;
 335        s64 xaddr;
 336
 337        ASSERT(DO_INDEX(ip));
 338
 339        if (jfs_ip->next_index < 2) {
 340                jfs_warn("add_index: next_index = %d.  Resetting!",
 341                           jfs_ip->next_index);
 342                jfs_ip->next_index = 2;
 343        }
 344
 345        index = jfs_ip->next_index++;
 346
 347        if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
 348                /*
 349                 * i_size reflects size of index table, or 8 bytes per entry.
 350                 */
 351                ip->i_size = (loff_t) (index - 1) << 3;
 352
 353                /*
 354                 * dir table fits inline within inode
 355                 */
 356                dirtab_slot = &jfs_ip->i_dirtable[index-2];
 357                dirtab_slot->flag = DIR_INDEX_VALID;
 358                dirtab_slot->slot = slot;
 359                DTSaddress(dirtab_slot, bn);
 360
 361                set_cflag(COMMIT_Dirtable, ip);
 362
 363                return index;
 364        }
 365        if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
 366                struct dir_table_slot temp_table[12];
 367
 368                /*
 369                 * It's time to move the inline table to an external
 370                 * page and begin to build the xtree
 371                 */
 372                if (dquot_alloc_block(ip, sbi->nbperpage))
 373                        goto clean_up;
 374                if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
 375                        dquot_free_block(ip, sbi->nbperpage);
 376                        goto clean_up;
 377                }
 378
 379                /*
 380                 * Save the table, we're going to overwrite it with the
 381                 * xtree root
 382                 */
 383                memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
 384
 385                /*
 386                 * Initialize empty x-tree
 387                 */
 388                xtInitRoot(tid, ip);
 389
 390                /*
 391                 * Add the first block to the xtree
 392                 */
 393                if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
 394                        /* This really shouldn't fail */
 395                        jfs_warn("add_index: xtInsert failed!");
 396                        memcpy(&jfs_ip->i_dirtable, temp_table,
 397                               sizeof (temp_table));
 398                        dbFree(ip, xaddr, sbi->nbperpage);
 399                        dquot_free_block(ip, sbi->nbperpage);
 400                        goto clean_up;
 401                }
 402                ip->i_size = PSIZE;
 403
 404                mp = get_index_page(ip, 0);
 405                if (!mp) {
 406                        jfs_err("add_index: get_metapage failed!");
 407                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
 408                        memcpy(&jfs_ip->i_dirtable, temp_table,
 409                               sizeof (temp_table));
 410                        goto clean_up;
 411                }
 412                tlck = txLock(tid, ip, mp, tlckDATA);
 413                llck = (struct linelock *) & tlck->lock;
 414                ASSERT(llck->index == 0);
 415                lv = &llck->lv[0];
 416
 417                lv->offset = 0;
 418                lv->length = 6; /* tlckDATA slot size is 16 bytes */
 419                llck->index++;
 420
 421                memcpy(mp->data, temp_table, sizeof(temp_table));
 422
 423                mark_metapage_dirty(mp);
 424                release_metapage(mp);
 425
 426                /*
 427                 * Logging is now directed by xtree tlocks
 428                 */
 429                clear_cflag(COMMIT_Dirtable, ip);
 430        }
 431
 432        offset = (index - 2) * sizeof(struct dir_table_slot);
 433        page_offset = offset & (PSIZE - 1);
 434        blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
 435        if (page_offset == 0) {
 436                /*
 437                 * This will be the beginning of a new page
 438                 */
 439                xaddr = 0;
 440                if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
 441                        jfs_warn("add_index: xtInsert failed!");
 442                        goto clean_up;
 443                }
 444                ip->i_size += PSIZE;
 445
 446                if ((mp = get_index_page(ip, blkno)))
 447                        memset(mp->data, 0, PSIZE);     /* Just looks better */
 448                else
 449                        xtTruncate(tid, ip, offset, COMMIT_PWMAP);
 450        } else
 451                mp = read_index_page(ip, blkno);
 452
 453        if (!mp) {
 454                jfs_err("add_index: get/read_metapage failed!");
 455                goto clean_up;
 456        }
 457
 458        lock_index(tid, ip, mp, index);
 459
 460        dirtab_slot =
 461            (struct dir_table_slot *) ((char *) mp->data + page_offset);
 462        dirtab_slot->flag = DIR_INDEX_VALID;
 463        dirtab_slot->slot = slot;
 464        DTSaddress(dirtab_slot, bn);
 465
 466        mark_metapage_dirty(mp);
 467        release_metapage(mp);
 468
 469        return index;
 470
 471      clean_up:
 472
 473        jfs_ip->next_index--;
 474
 475        return 0;
 476}
 477
 478/*
 479 *      free_index()
 480 *
 481 *      Marks an entry to the directory index table as free.
 482 */
 483static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
 484{
 485        struct dir_table_slot *dirtab_slot;
 486        s64 lblock;
 487        struct metapage *mp = NULL;
 488
 489        dirtab_slot = find_index(ip, index, &mp, &lblock);
 490
 491        if (!dirtab_slot)
 492                return;
 493
 494        dirtab_slot->flag = DIR_INDEX_FREE;
 495        dirtab_slot->slot = dirtab_slot->addr1 = 0;
 496        dirtab_slot->addr2 = cpu_to_le32(next);
 497
 498        if (mp) {
 499                lock_index(tid, ip, mp, index);
 500                mark_metapage_dirty(mp);
 501                release_metapage(mp);
 502        } else
 503                set_cflag(COMMIT_Dirtable, ip);
 504}
 505
 506/*
 507 *      modify_index()
 508 *
 509 *      Changes an entry in the directory index table
 510 */
 511static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
 512                         int slot, struct metapage ** mp, s64 *lblock)
 513{
 514        struct dir_table_slot *dirtab_slot;
 515
 516        dirtab_slot = find_index(ip, index, mp, lblock);
 517
 518        if (!dirtab_slot)
 519                return;
 520
 521        DTSaddress(dirtab_slot, bn);
 522        dirtab_slot->slot = slot;
 523
 524        if (*mp) {
 525                lock_index(tid, ip, *mp, index);
 526                mark_metapage_dirty(*mp);
 527        } else
 528                set_cflag(COMMIT_Dirtable, ip);
 529}
 530
 531/*
 532 *      read_index()
 533 *
 534 *      reads a directory table slot
 535 */
 536static int read_index(struct inode *ip, u32 index,
 537                     struct dir_table_slot * dirtab_slot)
 538{
 539        s64 lblock;
 540        struct metapage *mp = NULL;
 541        struct dir_table_slot *slot;
 542
 543        slot = find_index(ip, index, &mp, &lblock);
 544        if (!slot) {
 545                return -EIO;
 546        }
 547
 548        memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
 549
 550        if (mp)
 551                release_metapage(mp);
 552
 553        return 0;
 554}
 555
 556/*
 557 *      dtSearch()
 558 *
 559 * function:
 560 *      Search for the entry with specified key
 561 *
 562 * parameter:
 563 *
 564 * return: 0 - search result on stack, leaf page pinned;
 565 *         errno - I/O error
 566 */
 567int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
 568             struct btstack * btstack, int flag)
 569{
 570        int rc = 0;
 571        int cmp = 1;            /* init for empty page */
 572        s64 bn;
 573        struct metapage *mp;
 574        dtpage_t *p;
 575        s8 *stbl;
 576        int base, index, lim;
 577        struct btframe *btsp;
 578        pxd_t *pxd;
 579        int psize = 288;        /* initial in-line directory */
 580        ino_t inumber;
 581        struct component_name ciKey;
 582        struct super_block *sb = ip->i_sb;
 583
 584        ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
 585                                   GFP_NOFS);
 586        if (!ciKey.name) {
 587                rc = -ENOMEM;
 588                goto dtSearch_Exit2;
 589        }
 590
 591
 592        /* uppercase search key for c-i directory */
 593        UniStrcpy(ciKey.name, key->name);
 594        ciKey.namlen = key->namlen;
 595
 596        /* only uppercase if case-insensitive support is on */
 597        if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
 598                ciToUpper(&ciKey);
 599        }
 600        BT_CLR(btstack);        /* reset stack */
 601
 602        /* init level count for max pages to split */
 603        btstack->nsplit = 1;
 604
 605        /*
 606         *      search down tree from root:
 607         *
 608         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 609         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 610         *
 611         * if entry with search key K is not found
 612         * internal page search find the entry with largest key Ki
 613         * less than K which point to the child page to search;
 614         * leaf page search find the entry with smallest key Kj
 615         * greater than K so that the returned index is the position of
 616         * the entry to be shifted right for insertion of new entry.
 617         * for empty tree, search key is greater than any key of the tree.
 618         *
 619         * by convention, root bn = 0.
 620         */
 621        for (bn = 0;;) {
 622                /* get/pin the page to search */
 623                DT_GETPAGE(ip, bn, mp, psize, p, rc);
 624                if (rc)
 625                        goto dtSearch_Exit1;
 626
 627                /* get sorted entry table of the page */
 628                stbl = DT_GETSTBL(p);
 629
 630                /*
 631                 * binary search with search key K on the current page.
 632                 */
 633                for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
 634                        index = base + (lim >> 1);
 635
 636                        if (p->header.flag & BT_LEAF) {
 637                                /* uppercase leaf name to compare */
 638                                cmp =
 639                                    ciCompare(&ciKey, p, stbl[index],
 640                                              JFS_SBI(sb)->mntflag);
 641                        } else {
 642                                /* router key is in uppercase */
 643
 644                                cmp = dtCompare(&ciKey, p, stbl[index]);
 645
 646
 647                        }
 648                        if (cmp == 0) {
 649                                /*
 650                                 *      search hit
 651                                 */
 652                                /* search hit - leaf page:
 653                                 * return the entry found
 654                                 */
 655                                if (p->header.flag & BT_LEAF) {
 656                                        inumber = le32_to_cpu(
 657                        ((struct ldtentry *) & p->slot[stbl[index]])->inumber);
 658
 659                                        /*
 660                                         * search for JFS_LOOKUP
 661                                         */
 662                                        if (flag == JFS_LOOKUP) {
 663                                                *data = inumber;
 664                                                rc = 0;
 665                                                goto out;
 666                                        }
 667
 668                                        /*
 669                                         * search for JFS_CREATE
 670                                         */
 671                                        if (flag == JFS_CREATE) {
 672                                                *data = inumber;
 673                                                rc = -EEXIST;
 674                                                goto out;
 675                                        }
 676
 677                                        /*
 678                                         * search for JFS_REMOVE or JFS_RENAME
 679                                         */
 680                                        if ((flag == JFS_REMOVE ||
 681                                             flag == JFS_RENAME) &&
 682                                            *data != inumber) {
 683                                                rc = -ESTALE;
 684                                                goto out;
 685                                        }
 686
 687                                        /*
 688                                         * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
 689                                         */
 690                                        /* save search result */
 691                                        *data = inumber;
 692                                        btsp = btstack->top;
 693                                        btsp->bn = bn;
 694                                        btsp->index = index;
 695                                        btsp->mp = mp;
 696
 697                                        rc = 0;
 698                                        goto dtSearch_Exit1;
 699                                }
 700
 701                                /* search hit - internal page:
 702                                 * descend/search its child page
 703                                 */
 704                                goto getChild;
 705                        }
 706
 707                        if (cmp > 0) {
 708                                base = index + 1;
 709                                --lim;
 710                        }
 711                }
 712
 713                /*
 714                 *      search miss
 715                 *
 716                 * base is the smallest index with key (Kj) greater than
 717                 * search key (K) and may be zero or (maxindex + 1) index.
 718                 */
 719                /*
 720                 * search miss - leaf page
 721                 *
 722                 * return location of entry (base) where new entry with
 723                 * search key K is to be inserted.
 724                 */
 725                if (p->header.flag & BT_LEAF) {
 726                        /*
 727                         * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
 728                         */
 729                        if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
 730                            flag == JFS_RENAME) {
 731                                rc = -ENOENT;
 732                                goto out;
 733                        }
 734
 735                        /*
 736                         * search for JFS_CREATE|JFS_FINDDIR:
 737                         *
 738                         * save search result
 739                         */
 740                        *data = 0;
 741                        btsp = btstack->top;
 742                        btsp->bn = bn;
 743                        btsp->index = base;
 744                        btsp->mp = mp;
 745
 746                        rc = 0;
 747                        goto dtSearch_Exit1;
 748                }
 749
 750                /*
 751                 * search miss - internal page
 752                 *
 753                 * if base is non-zero, decrement base by one to get the parent
 754                 * entry of the child page to search.
 755                 */
 756                index = base ? base - 1 : base;
 757
 758                /*
 759                 * go down to child page
 760                 */
 761              getChild:
 762                /* update max. number of pages to split */
 763                if (BT_STACK_FULL(btstack)) {
 764                        /* Something's corrupted, mark filesystem dirty so
 765                         * chkdsk will fix it.
 766                         */
 767                        jfs_error(sb, "stack overrun!\n");
 768                        BT_STACK_DUMP(btstack);
 769                        rc = -EIO;
 770                        goto out;
 771                }
 772                btstack->nsplit++;
 773
 774                /* push (bn, index) of the parent page/entry */
 775                BT_PUSH(btstack, bn, index);
 776
 777                /* get the child page block number */
 778                pxd = (pxd_t *) & p->slot[stbl[index]];
 779                bn = addressPXD(pxd);
 780                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
 781
 782                /* unpin the parent page */
 783                DT_PUTPAGE(mp);
 784        }
 785
 786      out:
 787        DT_PUTPAGE(mp);
 788
 789      dtSearch_Exit1:
 790
 791        kfree(ciKey.name);
 792
 793      dtSearch_Exit2:
 794
 795        return rc;
 796}
 797
 798
 799/*
 800 *      dtInsert()
 801 *
 802 * function: insert an entry to directory tree
 803 *
 804 * parameter:
 805 *
 806 * return: 0 - success;
 807 *         errno - failure;
 808 */
 809int dtInsert(tid_t tid, struct inode *ip,
 810         struct component_name * name, ino_t * fsn, struct btstack * btstack)
 811{
 812        int rc = 0;
 813        struct metapage *mp;    /* meta-page buffer */
 814        dtpage_t *p;            /* base B+-tree index page */
 815        s64 bn;
 816        int index;
 817        struct dtsplit split;   /* split information */
 818        ddata_t data;
 819        struct dt_lock *dtlck;
 820        int n;
 821        struct tlock *tlck;
 822        struct lv *lv;
 823
 824        /*
 825         *      retrieve search result
 826         *
 827         * dtSearch() returns (leaf page pinned, index at which to insert).
 828         * n.b. dtSearch() may return index of (maxindex + 1) of
 829         * the full page.
 830         */
 831        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
 832
 833        /*
 834         *      insert entry for new key
 835         */
 836        if (DO_INDEX(ip)) {
 837                if (JFS_IP(ip)->next_index == DIREND) {
 838                        DT_PUTPAGE(mp);
 839                        return -EMLINK;
 840                }
 841                n = NDTLEAF(name->namlen);
 842                data.leaf.tid = tid;
 843                data.leaf.ip = ip;
 844        } else {
 845                n = NDTLEAF_LEGACY(name->namlen);
 846                data.leaf.ip = NULL;    /* signifies legacy directory format */
 847        }
 848        data.leaf.ino = *fsn;
 849
 850        /*
 851         *      leaf page does not have enough room for new entry:
 852         *
 853         *      extend/split the leaf page;
 854         *
 855         * dtSplitUp() will insert the entry and unpin the leaf page.
 856         */
 857        if (n > p->header.freecnt) {
 858                split.mp = mp;
 859                split.index = index;
 860                split.nslot = n;
 861                split.key = name;
 862                split.data = &data;
 863                rc = dtSplitUp(tid, ip, &split, btstack);
 864                return rc;
 865        }
 866
 867        /*
 868         *      leaf page does have enough room for new entry:
 869         *
 870         *      insert the new data entry into the leaf page;
 871         */
 872        BT_MARK_DIRTY(mp, ip);
 873        /*
 874         * acquire a transaction lock on the leaf page
 875         */
 876        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
 877        dtlck = (struct dt_lock *) & tlck->lock;
 878        ASSERT(dtlck->index == 0);
 879        lv = & dtlck->lv[0];
 880
 881        /* linelock header */
 882        lv->offset = 0;
 883        lv->length = 1;
 884        dtlck->index++;
 885
 886        dtInsertEntry(p, index, name, &data, &dtlck);
 887
 888        /* linelock stbl of non-root leaf page */
 889        if (!(p->header.flag & BT_ROOT)) {
 890                if (dtlck->index >= dtlck->maxcnt)
 891                        dtlck = (struct dt_lock *) txLinelock(dtlck);
 892                lv = & dtlck->lv[dtlck->index];
 893                n = index >> L2DTSLOTSIZE;
 894                lv->offset = p->header.stblindex + n;
 895                lv->length =
 896                    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
 897                dtlck->index++;
 898        }
 899
 900        /* unpin the leaf page */
 901        DT_PUTPAGE(mp);
 902
 903        return 0;
 904}
 905
 906
 907/*
 908 *      dtSplitUp()
 909 *
 910 * function: propagate insertion bottom up;
 911 *
 912 * parameter:
 913 *
 914 * return: 0 - success;
 915 *         errno - failure;
 916 *      leaf page unpinned;
 917 */
 918static int dtSplitUp(tid_t tid,
 919          struct inode *ip, struct dtsplit * split, struct btstack * btstack)
 920{
 921        struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 922        int rc = 0;
 923        struct metapage *smp;
 924        dtpage_t *sp;           /* split page */
 925        struct metapage *rmp;
 926        dtpage_t *rp;           /* new right page split from sp */
 927        pxd_t rpxd;             /* new right page extent descriptor */
 928        struct metapage *lmp;
 929        dtpage_t *lp;           /* left child page */
 930        int skip;               /* index of entry of insertion */
 931        struct btframe *parent; /* parent page entry on traverse stack */
 932        s64 xaddr, nxaddr;
 933        int xlen, xsize;
 934        struct pxdlist pxdlist;
 935        pxd_t *pxd;
 936        struct component_name key = { 0, NULL };
 937        ddata_t *data = split->data;
 938        int n;
 939        struct dt_lock *dtlck;
 940        struct tlock *tlck;
 941        struct lv *lv;
 942        int quota_allocation = 0;
 943
 944        /* get split page */
 945        smp = split->mp;
 946        sp = DT_PAGE(ip, smp);
 947
 948        key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS);
 949        if (!key.name) {
 950                DT_PUTPAGE(smp);
 951                rc = -ENOMEM;
 952                goto dtSplitUp_Exit;
 953        }
 954
 955        /*
 956         *      split leaf page
 957         *
 958         * The split routines insert the new entry, and
 959         * acquire txLock as appropriate.
 960         */
 961        /*
 962         *      split root leaf page:
 963         */
 964        if (sp->header.flag & BT_ROOT) {
 965                /*
 966                 * allocate a single extent child page
 967                 */
 968                xlen = 1;
 969                n = sbi->bsize >> L2DTSLOTSIZE;
 970                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
 971                n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
 972                if (n <= split->nslot)
 973                        xlen++;
 974                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
 975                        DT_PUTPAGE(smp);
 976                        goto freeKeyName;
 977                }
 978
 979                pxdlist.maxnpxd = 1;
 980                pxdlist.npxd = 0;
 981                pxd = &pxdlist.pxd[0];
 982                PXDaddress(pxd, xaddr);
 983                PXDlength(pxd, xlen);
 984                split->pxdlist = &pxdlist;
 985                rc = dtSplitRoot(tid, ip, split, &rmp);
 986
 987                if (rc)
 988                        dbFree(ip, xaddr, xlen);
 989                else
 990                        DT_PUTPAGE(rmp);
 991
 992                DT_PUTPAGE(smp);
 993
 994                if (!DO_INDEX(ip))
 995                        ip->i_size = xlen << sbi->l2bsize;
 996
 997                goto freeKeyName;
 998        }
 999
1000        /*
1001         *      extend first leaf page
1002         *
1003         * extend the 1st extent if less than buffer page size
1004         * (dtExtendPage() reurns leaf page unpinned)
1005         */
1006        pxd = &sp->header.self;
1007        xlen = lengthPXD(pxd);
1008        xsize = xlen << sbi->l2bsize;
1009        if (xsize < PSIZE) {
1010                xaddr = addressPXD(pxd);
1011                n = xsize >> L2DTSLOTSIZE;
1012                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
1013                if ((n + sp->header.freecnt) <= split->nslot)
1014                        n = xlen + (xlen << 1);
1015                else
1016                        n = xlen;
1017
1018                /* Allocate blocks to quota. */
1019                rc = dquot_alloc_block(ip, n);
1020                if (rc)
1021                        goto extendOut;
1022                quota_allocation += n;
1023
1024                if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1025                                    (s64) n, &nxaddr)))
1026                        goto extendOut;
1027
1028                pxdlist.maxnpxd = 1;
1029                pxdlist.npxd = 0;
1030                pxd = &pxdlist.pxd[0];
1031                PXDaddress(pxd, nxaddr);
1032                PXDlength(pxd, xlen + n);
1033                split->pxdlist = &pxdlist;
1034                if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1035                        nxaddr = addressPXD(pxd);
1036                        if (xaddr != nxaddr) {
1037                                /* free relocated extent */
1038                                xlen = lengthPXD(pxd);
1039                                dbFree(ip, nxaddr, (s64) xlen);
1040                        } else {
1041                                /* free extended delta */
1042                                xlen = lengthPXD(pxd) - n;
1043                                xaddr = addressPXD(pxd) + xlen;
1044                                dbFree(ip, xaddr, (s64) n);
1045                        }
1046                } else if (!DO_INDEX(ip))
1047                        ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
1048
1049
1050              extendOut:
1051                DT_PUTPAGE(smp);
1052                goto freeKeyName;
1053        }
1054
1055        /*
1056         *      split leaf page <sp> into <sp> and a new right page <rp>.
1057         *
1058         * return <rp> pinned and its extent descriptor <rpxd>
1059         */
1060        /*
1061         * allocate new directory page extent and
1062         * new index page(s) to cover page split(s)
1063         *
1064         * allocation hint: ?
1065         */
1066        n = btstack->nsplit;
1067        pxdlist.maxnpxd = pxdlist.npxd = 0;
1068        xlen = sbi->nbperpage;
1069        for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1070                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1071                        PXDaddress(pxd, xaddr);
1072                        PXDlength(pxd, xlen);
1073                        pxdlist.maxnpxd++;
1074                        continue;
1075                }
1076
1077                DT_PUTPAGE(smp);
1078
1079                /* undo allocation */
1080                goto splitOut;
1081        }
1082
1083        split->pxdlist = &pxdlist;
1084        if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1085                DT_PUTPAGE(smp);
1086
1087                /* undo allocation */
1088                goto splitOut;
1089        }
1090
1091        if (!DO_INDEX(ip))
1092                ip->i_size += PSIZE;
1093
1094        /*
1095         * propagate up the router entry for the leaf page just split
1096         *
1097         * insert a router entry for the new page into the parent page,
1098         * propagate the insert/split up the tree by walking back the stack
1099         * of (bn of parent page, index of child page entry in parent page)
1100         * that were traversed during the search for the page that split.
1101         *
1102         * the propagation of insert/split up the tree stops if the root
1103         * splits or the page inserted into doesn't have to split to hold
1104         * the new entry.
1105         *
1106         * the parent entry for the split page remains the same, and
1107         * a new entry is inserted at its right with the first key and
1108         * block number of the new right page.
1109         *
1110         * There are a maximum of 4 pages pinned at any time:
1111         * two children, left parent and right parent (when the parent splits).
1112         * keep the child pages pinned while working on the parent.
1113         * make sure that all pins are released at exit.
1114         */
1115        while ((parent = BT_POP(btstack)) != NULL) {
1116                /* parent page specified by stack frame <parent> */
1117
1118                /* keep current child pages (<lp>, <rp>) pinned */
1119                lmp = smp;
1120                lp = sp;
1121
1122                /*
1123                 * insert router entry in parent for new right child page <rp>
1124                 */
1125                /* get the parent page <sp> */
1126                DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1127                if (rc) {
1128                        DT_PUTPAGE(lmp);
1129                        DT_PUTPAGE(rmp);
1130                        goto splitOut;
1131                }
1132
1133                /*
1134                 * The new key entry goes ONE AFTER the index of parent entry,
1135                 * because the split was to the right.
1136                 */
1137                skip = parent->index + 1;
1138
1139                /*
1140                 * compute the key for the router entry
1141                 *
1142                 * key suffix compression:
1143                 * for internal pages that have leaf pages as children,
1144                 * retain only what's needed to distinguish between
1145                 * the new entry and the entry on the page to its left.
1146                 * If the keys compare equal, retain the entire key.
1147                 *
1148                 * note that compression is performed only at computing
1149                 * router key at the lowest internal level.
1150                 * further compression of the key between pairs of higher
1151                 * level internal pages loses too much information and
1152                 * the search may fail.
1153                 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1154                 * results in two adjacent parent entries (a)(xx).
1155                 * if split occurs between these two entries, and
1156                 * if compression is applied, the router key of parent entry
1157                 * of right page (x) will divert search for x into right
1158                 * subtree and miss x in the left subtree.)
1159                 *
1160                 * the entire key must be retained for the next-to-leftmost
1161                 * internal key at any level of the tree, or search may fail
1162                 * (e.g., ?)
1163                 */
1164                switch (rp->header.flag & BT_TYPE) {
1165                case BT_LEAF:
1166                        /*
1167                         * compute the length of prefix for suffix compression
1168                         * between last entry of left page and first entry
1169                         * of right page
1170                         */
1171                        if ((sp->header.flag & BT_ROOT && skip > 1) ||
1172                            sp->header.prev != 0 || skip > 1) {
1173                                /* compute uppercase router prefix key */
1174                                rc = ciGetLeafPrefixKey(lp,
1175                                                        lp->header.nextindex-1,
1176                                                        rp, 0, &key,
1177                                                        sbi->mntflag);
1178                                if (rc) {
1179                                        DT_PUTPAGE(lmp);
1180                                        DT_PUTPAGE(rmp);
1181                                        DT_PUTPAGE(smp);
1182                                        goto splitOut;
1183                                }
1184                        } else {
1185                                /* next to leftmost entry of
1186                                   lowest internal level */
1187
1188                                /* compute uppercase router key */
1189                                dtGetKey(rp, 0, &key, sbi->mntflag);
1190                                key.name[key.namlen] = 0;
1191
1192                                if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1193                                        ciToUpper(&key);
1194                        }
1195
1196                        n = NDTINTERNAL(key.namlen);
1197                        break;
1198
1199                case BT_INTERNAL:
1200                        dtGetKey(rp, 0, &key, sbi->mntflag);
1201                        n = NDTINTERNAL(key.namlen);
1202                        break;
1203
1204                default:
1205                        jfs_err("dtSplitUp(): UFO!");
1206                        break;
1207                }
1208
1209                /* unpin left child page */
1210                DT_PUTPAGE(lmp);
1211
1212                /*
1213                 * compute the data for the router entry
1214                 */
1215                data->xd = rpxd;        /* child page xd */
1216
1217                /*
1218                 * parent page is full - split the parent page
1219                 */
1220                if (n > sp->header.freecnt) {
1221                        /* init for parent page split */
1222                        split->mp = smp;
1223                        split->index = skip;    /* index at insert */
1224                        split->nslot = n;
1225                        split->key = &key;
1226                        /* split->data = data; */
1227
1228                        /* unpin right child page */
1229                        DT_PUTPAGE(rmp);
1230
1231                        /* The split routines insert the new entry,
1232                         * acquire txLock as appropriate.
1233                         * return <rp> pinned and its block number <rbn>.
1234                         */
1235                        rc = (sp->header.flag & BT_ROOT) ?
1236                            dtSplitRoot(tid, ip, split, &rmp) :
1237                            dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1238                        if (rc) {
1239                                DT_PUTPAGE(smp);
1240                                goto splitOut;
1241                        }
1242
1243                        /* smp and rmp are pinned */
1244                }
1245                /*
1246                 * parent page is not full - insert router entry in parent page
1247                 */
1248                else {
1249                        BT_MARK_DIRTY(smp, ip);
1250                        /*
1251                         * acquire a transaction lock on the parent page
1252                         */
1253                        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1254                        dtlck = (struct dt_lock *) & tlck->lock;
1255                        ASSERT(dtlck->index == 0);
1256                        lv = & dtlck->lv[0];
1257
1258                        /* linelock header */
1259                        lv->offset = 0;
1260                        lv->length = 1;
1261                        dtlck->index++;
1262
1263                        /* linelock stbl of non-root parent page */
1264                        if (!(sp->header.flag & BT_ROOT)) {
1265                                lv++;
1266                                n = skip >> L2DTSLOTSIZE;
1267                                lv->offset = sp->header.stblindex + n;
1268                                lv->length =
1269                                    ((sp->header.nextindex -
1270                                      1) >> L2DTSLOTSIZE) - n + 1;
1271                                dtlck->index++;
1272                        }
1273
1274                        dtInsertEntry(sp, skip, &key, data, &dtlck);
1275
1276                        /* exit propagate up */
1277                        break;
1278                }
1279        }
1280
1281        /* unpin current split and its right page */
1282        DT_PUTPAGE(smp);
1283        DT_PUTPAGE(rmp);
1284
1285        /*
1286         * free remaining extents allocated for split
1287         */
1288      splitOut:
1289        n = pxdlist.npxd;
1290        pxd = &pxdlist.pxd[n];
1291        for (; n < pxdlist.maxnpxd; n++, pxd++)
1292                dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1293
1294      freeKeyName:
1295        kfree(key.name);
1296
1297        /* Rollback quota allocation */
1298        if (rc && quota_allocation)
1299                dquot_free_block(ip, quota_allocation);
1300
1301      dtSplitUp_Exit:
1302
1303        return rc;
1304}
1305
1306
1307/*
1308 *      dtSplitPage()
1309 *
1310 * function: Split a non-root page of a btree.
1311 *
1312 * parameter:
1313 *
1314 * return: 0 - success;
1315 *         errno - failure;
1316 *      return split and new page pinned;
1317 */
1318static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1319            struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1320{
1321        int rc = 0;
1322        struct metapage *smp;
1323        dtpage_t *sp;
1324        struct metapage *rmp;
1325        dtpage_t *rp;           /* new right page allocated */
1326        s64 rbn;                /* new right page block number */
1327        struct metapage *mp;
1328        dtpage_t *p;
1329        s64 nextbn;
1330        struct pxdlist *pxdlist;
1331        pxd_t *pxd;
1332        int skip, nextindex, half, left, nxt, off, si;
1333        struct ldtentry *ldtentry;
1334        struct idtentry *idtentry;
1335        u8 *stbl;
1336        struct dtslot *f;
1337        int fsi, stblsize;
1338        int n;
1339        struct dt_lock *sdtlck, *rdtlck;
1340        struct tlock *tlck;
1341        struct dt_lock *dtlck;
1342        struct lv *slv, *rlv, *lv;
1343
1344        /* get split page */
1345        smp = split->mp;
1346        sp = DT_PAGE(ip, smp);
1347
1348        /*
1349         * allocate the new right page for the split
1350         */
1351        pxdlist = split->pxdlist;
1352        pxd = &pxdlist->pxd[pxdlist->npxd];
1353        pxdlist->npxd++;
1354        rbn = addressPXD(pxd);
1355        rmp = get_metapage(ip, rbn, PSIZE, 1);
1356        if (rmp == NULL)
1357                return -EIO;
1358
1359        /* Allocate blocks to quota. */
1360        rc = dquot_alloc_block(ip, lengthPXD(pxd));
1361        if (rc) {
1362                release_metapage(rmp);
1363                return rc;
1364        }
1365
1366        jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1367
1368        BT_MARK_DIRTY(rmp, ip);
1369        /*
1370         * acquire a transaction lock on the new right page
1371         */
1372        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1373        rdtlck = (struct dt_lock *) & tlck->lock;
1374
1375        rp = (dtpage_t *) rmp->data;
1376        *rpp = rp;
1377        rp->header.self = *pxd;
1378
1379        BT_MARK_DIRTY(smp, ip);
1380        /*
1381         * acquire a transaction lock on the split page
1382         *
1383         * action:
1384         */
1385        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1386        sdtlck = (struct dt_lock *) & tlck->lock;
1387
1388        /* linelock header of split page */
1389        ASSERT(sdtlck->index == 0);
1390        slv = & sdtlck->lv[0];
1391        slv->offset = 0;
1392        slv->length = 1;
1393        sdtlck->index++;
1394
1395        /*
1396         * initialize/update sibling pointers between sp and rp
1397         */
1398        nextbn = le64_to_cpu(sp->header.next);
1399        rp->header.next = cpu_to_le64(nextbn);
1400        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1401        sp->header.next = cpu_to_le64(rbn);
1402
1403        /*
1404         * initialize new right page
1405         */
1406        rp->header.flag = sp->header.flag;
1407
1408        /* compute sorted entry table at start of extent data area */
1409        rp->header.nextindex = 0;
1410        rp->header.stblindex = 1;
1411
1412        n = PSIZE >> L2DTSLOTSIZE;
1413        rp->header.maxslot = n;
1414        stblsize = (n + 31) >> L2DTSLOTSIZE;    /* in unit of slot */
1415
1416        /* init freelist */
1417        fsi = rp->header.stblindex + stblsize;
1418        rp->header.freelist = fsi;
1419        rp->header.freecnt = rp->header.maxslot - fsi;
1420
1421        /*
1422         *      sequential append at tail: append without split
1423         *
1424         * If splitting the last page on a level because of appending
1425         * a entry to it (skip is maxentry), it's likely that the access is
1426         * sequential. Adding an empty page on the side of the level is less
1427         * work and can push the fill factor much higher than normal.
1428         * If we're wrong it's no big deal, we'll just do the split the right
1429         * way next time.
1430         * (It may look like it's equally easy to do a similar hack for
1431         * reverse sorted data, that is, split the tree left,
1432         * but it's not. Be my guest.)
1433         */
1434        if (nextbn == 0 && split->index == sp->header.nextindex) {
1435                /* linelock header + stbl (first slot) of new page */
1436                rlv = & rdtlck->lv[rdtlck->index];
1437                rlv->offset = 0;
1438                rlv->length = 2;
1439                rdtlck->index++;
1440
1441                /*
1442                 * initialize freelist of new right page
1443                 */
1444                f = &rp->slot[fsi];
1445                for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1446                        f->next = fsi;
1447                f->next = -1;
1448
1449                /* insert entry at the first entry of the new right page */
1450                dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1451
1452                goto out;
1453        }
1454
1455        /*
1456         *      non-sequential insert (at possibly middle page)
1457         */
1458
1459        /*
1460         * update prev pointer of previous right sibling page;
1461         */
1462        if (nextbn != 0) {
1463                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1464                if (rc) {
1465                        discard_metapage(rmp);
1466                        return rc;
1467                }
1468
1469                BT_MARK_DIRTY(mp, ip);
1470                /*
1471                 * acquire a transaction lock on the next page
1472                 */
1473                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1474                jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1475                        tlck, ip, mp);
1476                dtlck = (struct dt_lock *) & tlck->lock;
1477
1478                /* linelock header of previous right sibling page */
1479                lv = & dtlck->lv[dtlck->index];
1480                lv->offset = 0;
1481                lv->length = 1;
1482                dtlck->index++;
1483
1484                p->header.prev = cpu_to_le64(rbn);
1485
1486                DT_PUTPAGE(mp);
1487        }
1488
1489        /*
1490         * split the data between the split and right pages.
1491         */
1492        skip = split->index;
1493        half = (PSIZE >> L2DTSLOTSIZE) >> 1;    /* swag */
1494        left = 0;
1495
1496        /*
1497         *      compute fill factor for split pages
1498         *
1499         * <nxt> traces the next entry to move to rp
1500         * <off> traces the next entry to stay in sp
1501         */
1502        stbl = (u8 *) & sp->slot[sp->header.stblindex];
1503        nextindex = sp->header.nextindex;
1504        for (nxt = off = 0; nxt < nextindex; ++off) {
1505                if (off == skip)
1506                        /* check for fill factor with new entry size */
1507                        n = split->nslot;
1508                else {
1509                        si = stbl[nxt];
1510                        switch (sp->header.flag & BT_TYPE) {
1511                        case BT_LEAF:
1512                                ldtentry = (struct ldtentry *) & sp->slot[si];
1513                                if (DO_INDEX(ip))
1514                                        n = NDTLEAF(ldtentry->namlen);
1515                                else
1516                                        n = NDTLEAF_LEGACY(ldtentry->
1517                                                           namlen);
1518                                break;
1519
1520                        case BT_INTERNAL:
1521                                idtentry = (struct idtentry *) & sp->slot[si];
1522                                n = NDTINTERNAL(idtentry->namlen);
1523                                break;
1524
1525                        default:
1526                                break;
1527                        }
1528
1529                        ++nxt;  /* advance to next entry to move in sp */
1530                }
1531
1532                left += n;
1533                if (left >= half)
1534                        break;
1535        }
1536
1537        /* <nxt> poins to the 1st entry to move */
1538
1539        /*
1540         *      move entries to right page
1541         *
1542         * dtMoveEntry() initializes rp and reserves entry for insertion
1543         *
1544         * split page moved out entries are linelocked;
1545         * new/right page moved in entries are linelocked;
1546         */
1547        /* linelock header + stbl of new right page */
1548        rlv = & rdtlck->lv[rdtlck->index];
1549        rlv->offset = 0;
1550        rlv->length = 5;
1551        rdtlck->index++;
1552
1553        dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1554
1555        sp->header.nextindex = nxt;
1556
1557        /*
1558         * finalize freelist of new right page
1559         */
1560        fsi = rp->header.freelist;
1561        f = &rp->slot[fsi];
1562        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1563                f->next = fsi;
1564        f->next = -1;
1565
1566        /*
1567         * Update directory index table for entries now in right page
1568         */
1569        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1570                s64 lblock;
1571
1572                mp = NULL;
1573                stbl = DT_GETSTBL(rp);
1574                for (n = 0; n < rp->header.nextindex; n++) {
1575                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1576                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1577                                     rbn, n, &mp, &lblock);
1578                }
1579                if (mp)
1580                        release_metapage(mp);
1581        }
1582
1583        /*
1584         * the skipped index was on the left page,
1585         */
1586        if (skip <= off) {
1587                /* insert the new entry in the split page */
1588                dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1589
1590                /* linelock stbl of split page */
1591                if (sdtlck->index >= sdtlck->maxcnt)
1592                        sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1593                slv = & sdtlck->lv[sdtlck->index];
1594                n = skip >> L2DTSLOTSIZE;
1595                slv->offset = sp->header.stblindex + n;
1596                slv->length =
1597                    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1598                sdtlck->index++;
1599        }
1600        /*
1601         * the skipped index was on the right page,
1602         */
1603        else {
1604                /* adjust the skip index to reflect the new position */
1605                skip -= nxt;
1606
1607                /* insert the new entry in the right page */
1608                dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1609        }
1610
1611      out:
1612        *rmpp = rmp;
1613        *rpxdp = *pxd;
1614
1615        return rc;
1616}
1617
1618
1619/*
1620 *      dtExtendPage()
1621 *
1622 * function: extend 1st/only directory leaf page
1623 *
1624 * parameter:
1625 *
1626 * return: 0 - success;
1627 *         errno - failure;
1628 *      return extended page pinned;
1629 */
1630static int dtExtendPage(tid_t tid,
1631             struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1632{
1633        struct super_block *sb = ip->i_sb;
1634        int rc;
1635        struct metapage *smp, *pmp, *mp;
1636        dtpage_t *sp, *pp;
1637        struct pxdlist *pxdlist;
1638        pxd_t *pxd, *tpxd;
1639        int xlen, xsize;
1640        int newstblindex, newstblsize;
1641        int oldstblindex, oldstblsize;
1642        int fsi, last;
1643        struct dtslot *f;
1644        struct btframe *parent;
1645        int n;
1646        struct dt_lock *dtlck;
1647        s64 xaddr, txaddr;
1648        struct tlock *tlck;
1649        struct pxd_lock *pxdlock;
1650        struct lv *lv;
1651        uint type;
1652        struct ldtentry *ldtentry;
1653        u8 *stbl;
1654
1655        /* get page to extend */
1656        smp = split->mp;
1657        sp = DT_PAGE(ip, smp);
1658
1659        /* get parent/root page */
1660        parent = BT_POP(btstack);
1661        DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1662        if (rc)
1663                return (rc);
1664
1665        /*
1666         *      extend the extent
1667         */
1668        pxdlist = split->pxdlist;
1669        pxd = &pxdlist->pxd[pxdlist->npxd];
1670        pxdlist->npxd++;
1671
1672        xaddr = addressPXD(pxd);
1673        tpxd = &sp->header.self;
1674        txaddr = addressPXD(tpxd);
1675        /* in-place extension */
1676        if (xaddr == txaddr) {
1677                type = tlckEXTEND;
1678        }
1679        /* relocation */
1680        else {
1681                type = tlckNEW;
1682
1683                /* save moved extent descriptor for later free */
1684                tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1685                pxdlock = (struct pxd_lock *) & tlck->lock;
1686                pxdlock->flag = mlckFREEPXD;
1687                pxdlock->pxd = sp->header.self;
1688                pxdlock->index = 1;
1689
1690                /*
1691                 * Update directory index table to reflect new page address
1692                 */
1693                if (DO_INDEX(ip)) {
1694                        s64 lblock;
1695
1696                        mp = NULL;
1697                        stbl = DT_GETSTBL(sp);
1698                        for (n = 0; n < sp->header.nextindex; n++) {
1699                                ldtentry =
1700                                    (struct ldtentry *) & sp->slot[stbl[n]];
1701                                modify_index(tid, ip,
1702                                             le32_to_cpu(ldtentry->index),
1703                                             xaddr, n, &mp, &lblock);
1704                        }
1705                        if (mp)
1706                                release_metapage(mp);
1707                }
1708        }
1709
1710        /*
1711         *      extend the page
1712         */
1713        sp->header.self = *pxd;
1714
1715        jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1716
1717        BT_MARK_DIRTY(smp, ip);
1718        /*
1719         * acquire a transaction lock on the extended/leaf page
1720         */
1721        tlck = txLock(tid, ip, smp, tlckDTREE | type);
1722        dtlck = (struct dt_lock *) & tlck->lock;
1723        lv = & dtlck->lv[0];
1724
1725        /* update buffer extent descriptor of extended page */
1726        xlen = lengthPXD(pxd);
1727        xsize = xlen << JFS_SBI(sb)->l2bsize;
1728
1729        /*
1730         * copy old stbl to new stbl at start of extended area
1731         */
1732        oldstblindex = sp->header.stblindex;
1733        oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1734        newstblindex = sp->header.maxslot;
1735        n = xsize >> L2DTSLOTSIZE;
1736        newstblsize = (n + 31) >> L2DTSLOTSIZE;
1737        memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1738               sp->header.nextindex);
1739
1740        /*
1741         * in-line extension: linelock old area of extended page
1742         */
1743        if (type == tlckEXTEND) {
1744                /* linelock header */
1745                lv->offset = 0;
1746                lv->length = 1;
1747                dtlck->index++;
1748                lv++;
1749
1750                /* linelock new stbl of extended page */
1751                lv->offset = newstblindex;
1752                lv->length = newstblsize;
1753        }
1754        /*
1755         * relocation: linelock whole relocated area
1756         */
1757        else {
1758                lv->offset = 0;
1759                lv->length = sp->header.maxslot + newstblsize;
1760        }
1761
1762        dtlck->index++;
1763
1764        sp->header.maxslot = n;
1765        sp->header.stblindex = newstblindex;
1766        /* sp->header.nextindex remains the same */
1767
1768        /*
1769         * add old stbl region at head of freelist
1770         */
1771        fsi = oldstblindex;
1772        f = &sp->slot[fsi];
1773        last = sp->header.freelist;
1774        for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1775                f->next = last;
1776                last = fsi;
1777        }
1778        sp->header.freelist = last;
1779        sp->header.freecnt += oldstblsize;
1780
1781        /*
1782         * append free region of newly extended area at tail of freelist
1783         */
1784        /* init free region of newly extended area */
1785        fsi = n = newstblindex + newstblsize;
1786        f = &sp->slot[fsi];
1787        for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1788                f->next = fsi;
1789        f->next = -1;
1790
1791        /* append new free region at tail of old freelist */
1792        fsi = sp->header.freelist;
1793        if (fsi == -1)
1794                sp->header.freelist = n;
1795        else {
1796                do {
1797                        f = &sp->slot[fsi];
1798                        fsi = f->next;
1799                } while (fsi != -1);
1800
1801                f->next = n;
1802        }
1803
1804        sp->header.freecnt += sp->header.maxslot - n;
1805
1806        /*
1807         * insert the new entry
1808         */
1809        dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1810
1811        BT_MARK_DIRTY(pmp, ip);
1812        /*
1813         * linelock any freeslots residing in old extent
1814         */
1815        if (type == tlckEXTEND) {
1816                n = sp->header.maxslot >> 2;
1817                if (sp->header.freelist < n)
1818                        dtLinelockFreelist(sp, n, &dtlck);
1819        }
1820
1821        /*
1822         *      update parent entry on the parent/root page
1823         */
1824        /*
1825         * acquire a transaction lock on the parent/root page
1826         */
1827        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1828        dtlck = (struct dt_lock *) & tlck->lock;
1829        lv = & dtlck->lv[dtlck->index];
1830
1831        /* linelock parent entry - 1st slot */
1832        lv->offset = 1;
1833        lv->length = 1;
1834        dtlck->index++;
1835
1836        /* update the parent pxd for page extension */
1837        tpxd = (pxd_t *) & pp->slot[1];
1838        *tpxd = *pxd;
1839
1840        DT_PUTPAGE(pmp);
1841        return 0;
1842}
1843
1844
1845/*
1846 *      dtSplitRoot()
1847 *
1848 * function:
1849 *      split the full root page into
1850 *      original/root/split page and new right page
1851 *      i.e., root remains fixed in tree anchor (inode) and
1852 *      the root is copied to a single new right child page
1853 *      since root page << non-root page, and
1854 *      the split root page contains a single entry for the
1855 *      new right child page.
1856 *
1857 * parameter:
1858 *
1859 * return: 0 - success;
1860 *         errno - failure;
1861 *      return new page pinned;
1862 */
1863static int dtSplitRoot(tid_t tid,
1864            struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1865{
1866        struct super_block *sb = ip->i_sb;
1867        struct metapage *smp;
1868        dtroot_t *sp;
1869        struct metapage *rmp;
1870        dtpage_t *rp;
1871        s64 rbn;
1872        int xlen;
1873        int xsize;
1874        struct dtslot *f;
1875        s8 *stbl;
1876        int fsi, stblsize, n;
1877        struct idtentry *s;
1878        pxd_t *ppxd;
1879        struct pxdlist *pxdlist;
1880        pxd_t *pxd;
1881        struct dt_lock *dtlck;
1882        struct tlock *tlck;
1883        struct lv *lv;
1884        int rc;
1885
1886        /* get split root page */
1887        smp = split->mp;
1888        sp = &JFS_IP(ip)->i_dtroot;
1889
1890        /*
1891         *      allocate/initialize a single (right) child page
1892         *
1893         * N.B. at first split, a one (or two) block to fit new entry
1894         * is allocated; at subsequent split, a full page is allocated;
1895         */
1896        pxdlist = split->pxdlist;
1897        pxd = &pxdlist->pxd[pxdlist->npxd];
1898        pxdlist->npxd++;
1899        rbn = addressPXD(pxd);
1900        xlen = lengthPXD(pxd);
1901        xsize = xlen << JFS_SBI(sb)->l2bsize;
1902        rmp = get_metapage(ip, rbn, xsize, 1);
1903        if (!rmp)
1904                return -EIO;
1905
1906        rp = rmp->data;
1907
1908        /* Allocate blocks to quota. */
1909        rc = dquot_alloc_block(ip, lengthPXD(pxd));
1910        if (rc) {
1911                release_metapage(rmp);
1912                return rc;
1913        }
1914
1915        BT_MARK_DIRTY(rmp, ip);
1916        /*
1917         * acquire a transaction lock on the new right page
1918         */
1919        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1920        dtlck = (struct dt_lock *) & tlck->lock;
1921
1922        rp->header.flag =
1923            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1924        rp->header.self = *pxd;
1925
1926        /* initialize sibling pointers */
1927        rp->header.next = 0;
1928        rp->header.prev = 0;
1929
1930        /*
1931         *      move in-line root page into new right page extent
1932         */
1933        /* linelock header + copied entries + new stbl (1st slot) in new page */
1934        ASSERT(dtlck->index == 0);
1935        lv = & dtlck->lv[0];
1936        lv->offset = 0;
1937        lv->length = 10;        /* 1 + 8 + 1 */
1938        dtlck->index++;
1939
1940        n = xsize >> L2DTSLOTSIZE;
1941        rp->header.maxslot = n;
1942        stblsize = (n + 31) >> L2DTSLOTSIZE;
1943
1944        /* copy old stbl to new stbl at start of extended area */
1945        rp->header.stblindex = DTROOTMAXSLOT;
1946        stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1947        memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1948        rp->header.nextindex = sp->header.nextindex;
1949
1950        /* copy old data area to start of new data area */
1951        memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1952
1953        /*
1954         * append free region of newly extended area at tail of freelist
1955         */
1956        /* init free region of newly extended area */
1957        fsi = n = DTROOTMAXSLOT + stblsize;
1958        f = &rp->slot[fsi];
1959        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1960                f->next = fsi;
1961        f->next = -1;
1962
1963        /* append new free region at tail of old freelist */
1964        fsi = sp->header.freelist;
1965        if (fsi == -1)
1966                rp->header.freelist = n;
1967        else {
1968                rp->header.freelist = fsi;
1969
1970                do {
1971                        f = &rp->slot[fsi];
1972                        fsi = f->next;
1973                } while (fsi != -1);
1974
1975                f->next = n;
1976        }
1977
1978        rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1979
1980        /*
1981         * Update directory index table for entries now in right page
1982         */
1983        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1984                s64 lblock;
1985                struct metapage *mp = NULL;
1986                struct ldtentry *ldtentry;
1987
1988                stbl = DT_GETSTBL(rp);
1989                for (n = 0; n < rp->header.nextindex; n++) {
1990                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1991                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1992                                     rbn, n, &mp, &lblock);
1993                }
1994                if (mp)
1995                        release_metapage(mp);
1996        }
1997        /*
1998         * insert the new entry into the new right/child page
1999         * (skip index in the new right page will not change)
2000         */
2001        dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2002
2003        /*
2004         *      reset parent/root page
2005         *
2006         * set the 1st entry offset to 0, which force the left-most key
2007         * at any level of the tree to be less than any search key.
2008         *
2009         * The btree comparison code guarantees that the left-most key on any
2010         * level of the tree is never used, so it doesn't need to be filled in.
2011         */
2012        BT_MARK_DIRTY(smp, ip);
2013        /*
2014         * acquire a transaction lock on the root page (in-memory inode)
2015         */
2016        tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
2017        dtlck = (struct dt_lock *) & tlck->lock;
2018
2019        /* linelock root */
2020        ASSERT(dtlck->index == 0);
2021        lv = & dtlck->lv[0];
2022        lv->offset = 0;
2023        lv->length = DTROOTMAXSLOT;
2024        dtlck->index++;
2025
2026        /* update page header of root */
2027        if (sp->header.flag & BT_LEAF) {
2028                sp->header.flag &= ~BT_LEAF;
2029                sp->header.flag |= BT_INTERNAL;
2030        }
2031
2032        /* init the first entry */
2033        s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2034        ppxd = (pxd_t *) s;
2035        *ppxd = *pxd;
2036        s->next = -1;
2037        s->namlen = 0;
2038
2039        stbl = sp->header.stbl;
2040        stbl[0] = DTENTRYSTART;
2041        sp->header.nextindex = 1;
2042
2043        /* init freelist */
2044        fsi = DTENTRYSTART + 1;
2045        f = &sp->slot[fsi];
2046
2047        /* init free region of remaining area */
2048        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2049                f->next = fsi;
2050        f->next = -1;
2051
2052        sp->header.freelist = DTENTRYSTART + 1;
2053        sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2054
2055        *rmpp = rmp;
2056
2057        return 0;
2058}
2059
2060
2061/*
2062 *      dtDelete()
2063 *
2064 * function: delete the entry(s) referenced by a key.
2065 *
2066 * parameter:
2067 *
2068 * return:
2069 */
2070int dtDelete(tid_t tid,
2071         struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2072{
2073        int rc = 0;
2074        s64 bn;
2075        struct metapage *mp, *imp;
2076        dtpage_t *p;
2077        int index;
2078        struct btstack btstack;
2079        struct dt_lock *dtlck;
2080        struct tlock *tlck;
2081        struct lv *lv;
2082        int i;
2083        struct ldtentry *ldtentry;
2084        u8 *stbl;
2085        u32 table_index, next_index;
2086        struct metapage *nmp;
2087        dtpage_t *np;
2088
2089        /*
2090         *      search for the entry to delete:
2091         *
2092         * dtSearch() returns (leaf page pinned, index at which to delete).
2093         */
2094        if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2095                return rc;
2096
2097        /* retrieve search result */
2098        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2099
2100        /*
2101         * We need to find put the index of the next entry into the
2102         * directory index table in order to resume a readdir from this
2103         * entry.
2104         */
2105        if (DO_INDEX(ip)) {
2106                stbl = DT_GETSTBL(p);
2107                ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2108                table_index = le32_to_cpu(ldtentry->index);
2109                if (index == (p->header.nextindex - 1)) {
2110                        /*
2111                         * Last entry in this leaf page
2112                         */
2113                        if ((p->header.flag & BT_ROOT)
2114                            || (p->header.next == 0))
2115                                next_index = -1;
2116                        else {
2117                                /* Read next leaf page */
2118                                DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2119                                           nmp, PSIZE, np, rc);
2120                                if (rc)
2121                                        next_index = -1;
2122                                else {
2123                                        stbl = DT_GETSTBL(np);
2124                                        ldtentry =
2125                                            (struct ldtentry *) & np->
2126                                            slot[stbl[0]];
2127                                        next_index =
2128                                            le32_to_cpu(ldtentry->index);
2129                                        DT_PUTPAGE(nmp);
2130                                }
2131                        }
2132                } else {
2133                        ldtentry =
2134                            (struct ldtentry *) & p->slot[stbl[index + 1]];
2135                        next_index = le32_to_cpu(ldtentry->index);
2136                }
2137                free_index(tid, ip, table_index, next_index);
2138        }
2139        /*
2140         * the leaf page becomes empty, delete the page
2141         */
2142        if (p->header.nextindex == 1) {
2143                /* delete empty page */
2144                rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2145        }
2146        /*
2147         * the leaf page has other entries remaining:
2148         *
2149         * delete the entry from the leaf page.
2150         */
2151        else {
2152                BT_MARK_DIRTY(mp, ip);
2153                /*
2154                 * acquire a transaction lock on the leaf page
2155                 */
2156                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2157                dtlck = (struct dt_lock *) & tlck->lock;
2158
2159                /*
2160                 * Do not assume that dtlck->index will be zero.  During a
2161                 * rename within a directory, this transaction may have
2162                 * modified this page already when adding the new entry.
2163                 */
2164
2165                /* linelock header */
2166                if (dtlck->index >= dtlck->maxcnt)
2167                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2168                lv = & dtlck->lv[dtlck->index];
2169                lv->offset = 0;
2170                lv->length = 1;
2171                dtlck->index++;
2172
2173                /* linelock stbl of non-root leaf page */
2174                if (!(p->header.flag & BT_ROOT)) {
2175                        if (dtlck->index >= dtlck->maxcnt)
2176                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2177                        lv = & dtlck->lv[dtlck->index];
2178                        i = index >> L2DTSLOTSIZE;
2179                        lv->offset = p->header.stblindex + i;
2180                        lv->length =
2181                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2182                            i + 1;
2183                        dtlck->index++;
2184                }
2185
2186                /* free the leaf entry */
2187                dtDeleteEntry(p, index, &dtlck);
2188
2189                /*
2190                 * Update directory index table for entries moved in stbl
2191                 */
2192                if (DO_INDEX(ip) && index < p->header.nextindex) {
2193                        s64 lblock;
2194
2195                        imp = NULL;
2196                        stbl = DT_GETSTBL(p);
2197                        for (i = index; i < p->header.nextindex; i++) {
2198                                ldtentry =
2199                                    (struct ldtentry *) & p->slot[stbl[i]];
2200                                modify_index(tid, ip,
2201                                             le32_to_cpu(ldtentry->index),
2202                                             bn, i, &imp, &lblock);
2203                        }
2204                        if (imp)
2205                                release_metapage(imp);
2206                }
2207
2208                DT_PUTPAGE(mp);
2209        }
2210
2211        return rc;
2212}
2213
2214
2215/*
2216 *      dtDeleteUp()
2217 *
2218 * function:
2219 *      free empty pages as propagating deletion up the tree
2220 *
2221 * parameter:
2222 *
2223 * return:
2224 */
2225static int dtDeleteUp(tid_t tid, struct inode *ip,
2226           struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2227{
2228        int rc = 0;
2229        struct metapage *mp;
2230        dtpage_t *p;
2231        int index, nextindex;
2232        int xlen;
2233        struct btframe *parent;
2234        struct dt_lock *dtlck;
2235        struct tlock *tlck;
2236        struct lv *lv;
2237        struct pxd_lock *pxdlock;
2238        int i;
2239
2240        /*
2241         *      keep the root leaf page which has become empty
2242         */
2243        if (BT_IS_ROOT(fmp)) {
2244                /*
2245                 * reset the root
2246                 *
2247                 * dtInitRoot() acquires txlock on the root
2248                 */
2249                dtInitRoot(tid, ip, PARENT(ip));
2250
2251                DT_PUTPAGE(fmp);
2252
2253                return 0;
2254        }
2255
2256        /*
2257         *      free the non-root leaf page
2258         */
2259        /*
2260         * acquire a transaction lock on the page
2261         *
2262         * write FREEXTENT|NOREDOPAGE log record
2263         * N.B. linelock is overlaid as freed extent descriptor, and
2264         * the buffer page is freed;
2265         */
2266        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2267        pxdlock = (struct pxd_lock *) & tlck->lock;
2268        pxdlock->flag = mlckFREEPXD;
2269        pxdlock->pxd = fp->header.self;
2270        pxdlock->index = 1;
2271
2272        /* update sibling pointers */
2273        if ((rc = dtRelink(tid, ip, fp))) {
2274                BT_PUTPAGE(fmp);
2275                return rc;
2276        }
2277
2278        xlen = lengthPXD(&fp->header.self);
2279
2280        /* Free quota allocation. */
2281        dquot_free_block(ip, xlen);
2282
2283        /* free/invalidate its buffer page */
2284        discard_metapage(fmp);
2285
2286        /*
2287         *      propagate page deletion up the directory tree
2288         *
2289         * If the delete from the parent page makes it empty,
2290         * continue all the way up the tree.
2291         * stop if the root page is reached (which is never deleted) or
2292         * if the entry deletion does not empty the page.
2293         */
2294        while ((parent = BT_POP(btstack)) != NULL) {
2295                /* pin the parent page <sp> */
2296                DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2297                if (rc)
2298                        return rc;
2299
2300                /*
2301                 * free the extent of the child page deleted
2302                 */
2303                index = parent->index;
2304
2305                /*
2306                 * delete the entry for the child page from parent
2307                 */
2308                nextindex = p->header.nextindex;
2309
2310                /*
2311                 * the parent has the single entry being deleted:
2312                 *
2313                 * free the parent page which has become empty.
2314                 */
2315                if (nextindex == 1) {
2316                        /*
2317                         * keep the root internal page which has become empty
2318                         */
2319                        if (p->header.flag & BT_ROOT) {
2320                                /*
2321                                 * reset the root
2322                                 *
2323                                 * dtInitRoot() acquires txlock on the root
2324                                 */
2325                                dtInitRoot(tid, ip, PARENT(ip));
2326
2327                                DT_PUTPAGE(mp);
2328
2329                                return 0;
2330                        }
2331                        /*
2332                         * free the parent page
2333                         */
2334                        else {
2335                                /*
2336                                 * acquire a transaction lock on the page
2337                                 *
2338                                 * write FREEXTENT|NOREDOPAGE log record
2339                                 */
2340                                tlck =
2341                                    txMaplock(tid, ip,
2342                                              tlckDTREE | tlckFREE);
2343                                pxdlock = (struct pxd_lock *) & tlck->lock;
2344                                pxdlock->flag = mlckFREEPXD;
2345                                pxdlock->pxd = p->header.self;
2346                                pxdlock->index = 1;
2347
2348                                /* update sibling pointers */
2349                                if ((rc = dtRelink(tid, ip, p))) {
2350                                        DT_PUTPAGE(mp);
2351                                        return rc;
2352                                }
2353
2354                                xlen = lengthPXD(&p->header.self);
2355
2356                                /* Free quota allocation */
2357                                dquot_free_block(ip, xlen);
2358
2359                                /* free/invalidate its buffer page */
2360                                discard_metapage(mp);
2361
2362                                /* propagate up */
2363                                continue;
2364                        }
2365                }
2366
2367                /*
2368                 * the parent has other entries remaining:
2369                 *
2370                 * delete the router entry from the parent page.
2371                 */
2372                BT_MARK_DIRTY(mp, ip);
2373                /*
2374                 * acquire a transaction lock on the page
2375                 *
2376                 * action: router entry deletion
2377                 */
2378                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2379                dtlck = (struct dt_lock *) & tlck->lock;
2380
2381                /* linelock header */
2382                if (dtlck->index >= dtlck->maxcnt)
2383                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2384                lv = & dtlck->lv[dtlck->index];
2385                lv->offset = 0;
2386                lv->length = 1;
2387                dtlck->index++;
2388
2389                /* linelock stbl of non-root leaf page */
2390                if (!(p->header.flag & BT_ROOT)) {
2391                        if (dtlck->index < dtlck->maxcnt)
2392                                lv++;
2393                        else {
2394                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2395                                lv = & dtlck->lv[0];
2396                        }
2397                        i = index >> L2DTSLOTSIZE;
2398                        lv->offset = p->header.stblindex + i;
2399                        lv->length =
2400                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2401                            i + 1;
2402                        dtlck->index++;
2403                }
2404
2405                /* free the router entry */
2406                dtDeleteEntry(p, index, &dtlck);
2407
2408                /* reset key of new leftmost entry of level (for consistency) */
2409                if (index == 0 &&
2410                    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2411                        dtTruncateEntry(p, 0, &dtlck);
2412
2413                /* unpin the parent page */
2414                DT_PUTPAGE(mp);
2415
2416                /* exit propagation up */
2417                break;
2418        }
2419
2420        if (!DO_INDEX(ip))
2421                ip->i_size -= PSIZE;
2422
2423        return 0;
2424}
2425
2426#ifdef _NOTYET
2427/*
2428 * NAME:        dtRelocate()
2429 *
2430 * FUNCTION:    relocate dtpage (internal or leaf) of directory;
2431 *              This function is mainly used by defragfs utility.
2432 */
2433int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2434               s64 nxaddr)
2435{
2436        int rc = 0;
2437        struct metapage *mp, *pmp, *lmp, *rmp;
2438        dtpage_t *p, *pp, *rp = 0, *lp= 0;
2439        s64 bn;
2440        int index;
2441        struct btstack btstack;
2442        pxd_t *pxd;
2443        s64 oxaddr, nextbn, prevbn;
2444        int xlen, xsize;
2445        struct tlock *tlck;
2446        struct dt_lock *dtlck;
2447        struct pxd_lock *pxdlock;
2448        s8 *stbl;
2449        struct lv *lv;
2450
2451        oxaddr = addressPXD(opxd);
2452        xlen = lengthPXD(opxd);
2453
2454        jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
2455                   (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
2456                   xlen);
2457
2458        /*
2459         *      1. get the internal parent dtpage covering
2460         *      router entry for the tartget page to be relocated;
2461         */
2462        rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2463        if (rc)
2464                return rc;
2465
2466        /* retrieve search result */
2467        DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2468        jfs_info("dtRelocate: parent router entry validated.");
2469
2470        /*
2471         *      2. relocate the target dtpage
2472         */
2473        /* read in the target page from src extent */
2474        DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2475        if (rc) {
2476                /* release the pinned parent page */
2477                DT_PUTPAGE(pmp);
2478                return rc;
2479        }
2480
2481        /*
2482         * read in sibling pages if any to update sibling pointers;
2483         */
2484        rmp = NULL;
2485        if (p->header.next) {
2486                nextbn = le64_to_cpu(p->header.next);
2487                DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2488                if (rc) {
2489                        DT_PUTPAGE(mp);
2490                        DT_PUTPAGE(pmp);
2491                        return (rc);
2492                }
2493        }
2494
2495        lmp = NULL;
2496        if (p->header.prev) {
2497                prevbn = le64_to_cpu(p->header.prev);
2498                DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2499                if (rc) {
2500                        DT_PUTPAGE(mp);
2501                        DT_PUTPAGE(pmp);
2502                        if (rmp)
2503                                DT_PUTPAGE(rmp);
2504                        return (rc);
2505                }
2506        }
2507
2508        /* at this point, all xtpages to be updated are in memory */
2509
2510        /*
2511         * update sibling pointers of sibling dtpages if any;
2512         */
2513        if (lmp) {
2514                tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
2515                dtlck = (struct dt_lock *) & tlck->lock;
2516                /* linelock header */
2517                ASSERT(dtlck->index == 0);
2518                lv = & dtlck->lv[0];
2519                lv->offset = 0;
2520                lv->length = 1;
2521                dtlck->index++;
2522
2523                lp->header.next = cpu_to_le64(nxaddr);
2524                DT_PUTPAGE(lmp);
2525        }
2526
2527        if (rmp) {
2528                tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
2529                dtlck = (struct dt_lock *) & tlck->lock;
2530                /* linelock header */
2531                ASSERT(dtlck->index == 0);
2532                lv = & dtlck->lv[0];
2533                lv->offset = 0;
2534                lv->length = 1;
2535                dtlck->index++;
2536
2537                rp->header.prev = cpu_to_le64(nxaddr);
2538                DT_PUTPAGE(rmp);
2539        }
2540
2541        /*
2542         * update the target dtpage to be relocated
2543         *
2544         * write LOG_REDOPAGE of LOG_NEW type for dst page
2545         * for the whole target page (logredo() will apply
2546         * after image and update bmap for allocation of the
2547         * dst extent), and update bmap for allocation of
2548         * the dst extent;
2549         */
2550        tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
2551        dtlck = (struct dt_lock *) & tlck->lock;
2552        /* linelock header */
2553        ASSERT(dtlck->index == 0);
2554        lv = & dtlck->lv[0];
2555
2556        /* update the self address in the dtpage header */
2557        pxd = &p->header.self;
2558        PXDaddress(pxd, nxaddr);
2559
2560        /* the dst page is the same as the src page, i.e.,
2561         * linelock for afterimage of the whole page;
2562         */
2563        lv->offset = 0;
2564        lv->length = p->header.maxslot;
2565        dtlck->index++;
2566
2567        /* update the buffer extent descriptor of the dtpage */
2568        xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2569
2570        /* unpin the relocated page */
2571        DT_PUTPAGE(mp);
2572        jfs_info("dtRelocate: target dtpage relocated.");
2573
2574        /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
2575         * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
2576         * will also force a bmap update ).
2577         */
2578
2579        /*
2580         *      3. acquire maplock for the source extent to be freed;
2581         */
2582        /* for dtpage relocation, write a LOG_NOREDOPAGE record
2583         * for the source dtpage (logredo() will init NoRedoPage
2584         * filter and will also update bmap for free of the source
2585         * dtpage), and upadte bmap for free of the source dtpage;
2586         */
2587        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2588        pxdlock = (struct pxd_lock *) & tlck->lock;
2589        pxdlock->flag = mlckFREEPXD;
2590        PXDaddress(&pxdlock->pxd, oxaddr);
2591        PXDlength(&pxdlock->pxd, xlen);
2592        pxdlock->index = 1;
2593
2594        /*
2595         *      4. update the parent router entry for relocation;
2596         *
2597         * acquire tlck for the parent entry covering the target dtpage;
2598         * write LOG_REDOPAGE to apply after image only;
2599         */
2600        jfs_info("dtRelocate: update parent router entry.");
2601        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
2602        dtlck = (struct dt_lock *) & tlck->lock;
2603        lv = & dtlck->lv[dtlck->index];
2604
2605        /* update the PXD with the new address */
2606        stbl = DT_GETSTBL(pp);
2607        pxd = (pxd_t *) & pp->slot[stbl[index]];
2608        PXDaddress(pxd, nxaddr);
2609        lv->offset = stbl[index];
2610        lv->length = 1;
2611        dtlck->index++;
2612
2613        /* unpin the parent dtpage */
2614        DT_PUTPAGE(pmp);
2615
2616        return rc;
2617}
2618
2619/*
2620 * NAME:        dtSearchNode()
2621 *
2622 * FUNCTION:    Search for an dtpage containing a specified address
2623 *              This function is mainly used by defragfs utility.
2624 *
2625 * NOTE:        Search result on stack, the found page is pinned at exit.
2626 *              The result page must be an internal dtpage.
2627 *              lmxaddr give the address of the left most page of the
2628 *              dtree level, in which the required dtpage resides.
2629 */
2630static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2631                        struct btstack * btstack)
2632{
2633        int rc = 0;
2634        s64 bn;
2635        struct metapage *mp;
2636        dtpage_t *p;
2637        int psize = 288;        /* initial in-line directory */
2638        s8 *stbl;
2639        int i;
2640        pxd_t *pxd;
2641        struct btframe *btsp;
2642
2643        BT_CLR(btstack);        /* reset stack */
2644
2645        /*
2646         *      descend tree to the level with specified leftmost page
2647         *
2648         *  by convention, root bn = 0.
2649         */
2650        for (bn = 0;;) {
2651                /* get/pin the page to search */
2652                DT_GETPAGE(ip, bn, mp, psize, p, rc);
2653                if (rc)
2654                        return rc;
2655
2656                /* does the xaddr of leftmost page of the levevl
2657                 * matches levevl search key ?
2658                 */
2659                if (p->header.flag & BT_ROOT) {
2660                        if (lmxaddr == 0)
2661                                break;
2662                } else if (addressPXD(&p->header.self) == lmxaddr)
2663                        break;
2664
2665                /*
2666                 * descend down to leftmost child page
2667                 */
2668                if (p->header.flag & BT_LEAF) {
2669                        DT_PUTPAGE(mp);
2670                        return -ESTALE;
2671                }
2672
2673                /* get the leftmost entry */
2674                stbl = DT_GETSTBL(p);
2675                pxd = (pxd_t *) & p->slot[stbl[0]];
2676
2677                /* get the child page block address */
2678                bn = addressPXD(pxd);
2679                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
2680                /* unpin the parent page */
2681                DT_PUTPAGE(mp);
2682        }
2683
2684        /*
2685         *      search each page at the current levevl
2686         */
2687      loop:
2688        stbl = DT_GETSTBL(p);
2689        for (i = 0; i < p->header.nextindex; i++) {
2690                pxd = (pxd_t *) & p->slot[stbl[i]];
2691
2692                /* found the specified router entry */
2693                if (addressPXD(pxd) == addressPXD(kpxd) &&
2694                    lengthPXD(pxd) == lengthPXD(kpxd)) {
2695                        btsp = btstack->top;
2696                        btsp->bn = bn;
2697                        btsp->index = i;
2698                        btsp->mp = mp;
2699
2700                        return 0;
2701                }
2702        }
2703
2704        /* get the right sibling page if any */
2705        if (p->header.next)
2706                bn = le64_to_cpu(p->header.next);
2707        else {
2708                DT_PUTPAGE(mp);
2709                return -ESTALE;
2710        }
2711
2712        /* unpin current page */
2713        DT_PUTPAGE(mp);
2714
2715        /* get the right sibling page */
2716        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2717        if (rc)
2718                return rc;
2719
2720        goto loop;
2721}
2722#endif /* _NOTYET */
2723
2724/*
2725 *      dtRelink()
2726 *
2727 * function:
2728 *      link around a freed page.
2729 *
2730 * parameter:
2731 *      fp:     page to be freed
2732 *
2733 * return:
2734 */
2735static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2736{
2737        int rc;
2738        struct metapage *mp;
2739        s64 nextbn, prevbn;
2740        struct tlock *tlck;
2741        struct dt_lock *dtlck;
2742        struct lv *lv;
2743
2744        nextbn = le64_to_cpu(p->header.next);
2745        prevbn = le64_to_cpu(p->header.prev);
2746
2747        /* update prev pointer of the next page */
2748        if (nextbn != 0) {
2749                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2750                if (rc)
2751                        return rc;
2752
2753                BT_MARK_DIRTY(mp, ip);
2754                /*
2755                 * acquire a transaction lock on the next page
2756                 *
2757                 * action: update prev pointer;
2758                 */
2759                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2760                jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2761                        tlck, ip, mp);
2762                dtlck = (struct dt_lock *) & tlck->lock;
2763
2764                /* linelock header */
2765                if (dtlck->index >= dtlck->maxcnt)
2766                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2767                lv = & dtlck->lv[dtlck->index];
2768                lv->offset = 0;
2769                lv->length = 1;
2770                dtlck->index++;
2771
2772                p->header.prev = cpu_to_le64(prevbn);
2773                DT_PUTPAGE(mp);
2774        }
2775
2776        /* update next pointer of the previous page */
2777        if (prevbn != 0) {
2778                DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2779                if (rc)
2780                        return rc;
2781
2782                BT_MARK_DIRTY(mp, ip);
2783                /*
2784                 * acquire a transaction lock on the prev page
2785                 *
2786                 * action: update next pointer;
2787                 */
2788                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2789                jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2790                        tlck, ip, mp);
2791                dtlck = (struct dt_lock *) & tlck->lock;
2792
2793                /* linelock header */
2794                if (dtlck->index >= dtlck->maxcnt)
2795                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2796                lv = & dtlck->lv[dtlck->index];
2797                lv->offset = 0;
2798                lv->length = 1;
2799                dtlck->index++;
2800
2801                p->header.next = cpu_to_le64(nextbn);
2802                DT_PUTPAGE(mp);
2803        }
2804
2805        return 0;
2806}
2807
2808
2809/*
2810 *      dtInitRoot()
2811 *
2812 * initialize directory root (inline in inode)
2813 */
2814void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2815{
2816        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2817        dtroot_t *p;
2818        int fsi;
2819        struct dtslot *f;
2820        struct tlock *tlck;
2821        struct dt_lock *dtlck;
2822        struct lv *lv;
2823        u16 xflag_save;
2824
2825        /*
2826         * If this was previously an non-empty directory, we need to remove
2827         * the old directory table.
2828         */
2829        if (DO_INDEX(ip)) {
2830                if (!jfs_dirtable_inline(ip)) {
2831                        struct tblock *tblk = tid_to_tblock(tid);
2832                        /*
2833                         * We're playing games with the tid's xflag.  If
2834                         * we're removing a regular file, the file's xtree
2835                         * is committed with COMMIT_PMAP, but we always
2836                         * commit the directories xtree with COMMIT_PWMAP.
2837                         */
2838                        xflag_save = tblk->xflag;
2839                        tblk->xflag = 0;
2840                        /*
2841                         * xtTruncate isn't guaranteed to fully truncate
2842                         * the xtree.  The caller needs to check i_size
2843                         * after committing the transaction to see if
2844                         * additional truncation is needed.  The
2845                         * COMMIT_Stale flag tells caller that we
2846                         * initiated the truncation.
2847                         */
2848                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2849                        set_cflag(COMMIT_Stale, ip);
2850
2851                        tblk->xflag = xflag_save;
2852                } else
2853                        ip->i_size = 1;
2854
2855                jfs_ip->next_index = 2;
2856        } else
2857                ip->i_size = IDATASIZE;
2858
2859        /*
2860         * acquire a transaction lock on the root
2861         *
2862         * action: directory initialization;
2863         */
2864        tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2865                      tlckDTREE | tlckENTRY | tlckBTROOT);
2866        dtlck = (struct dt_lock *) & tlck->lock;
2867
2868        /* linelock root */
2869        ASSERT(dtlck->index == 0);
2870        lv = & dtlck->lv[0];
2871        lv->offset = 0;
2872        lv->length = DTROOTMAXSLOT;
2873        dtlck->index++;
2874
2875        p = &jfs_ip->i_dtroot;
2876
2877        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2878
2879        p->header.nextindex = 0;
2880
2881        /* init freelist */
2882        fsi = 1;
2883        f = &p->slot[fsi];
2884
2885        /* init data area of root */
2886        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2887                f->next = fsi;
2888        f->next = -1;
2889
2890        p->header.freelist = 1;
2891        p->header.freecnt = 8;
2892
2893        /* init '..' entry */
2894        p->header.idotdot = cpu_to_le32(idotdot);
2895
2896        return;
2897}
2898
2899/*
2900 *      add_missing_indices()
2901 *
2902 * function: Fix dtree page in which one or more entries has an invalid index.
2903 *           fsck.jfs should really fix this, but it currently does not.
2904 *           Called from jfs_readdir when bad index is detected.
2905 */
2906static void add_missing_indices(struct inode *inode, s64 bn)
2907{
2908        struct ldtentry *d;
2909        struct dt_lock *dtlck;
2910        int i;
2911        uint index;
2912        struct lv *lv;
2913        struct metapage *mp;
2914        dtpage_t *p;
2915        int rc;
2916        s8 *stbl;
2917        tid_t tid;
2918        struct tlock *tlck;
2919
2920        tid = txBegin(inode->i_sb, 0);
2921
2922        DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2923
2924        if (rc) {
2925                printk(KERN_ERR "DT_GETPAGE failed!\n");
2926                goto end;
2927        }
2928        BT_MARK_DIRTY(mp, inode);
2929
2930        ASSERT(p->header.flag & BT_LEAF);
2931
2932        tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2933        if (BT_IS_ROOT(mp))
2934                tlck->type |= tlckBTROOT;
2935
2936        dtlck = (struct dt_lock *) &tlck->lock;
2937
2938        stbl = DT_GETSTBL(p);
2939        for (i = 0; i < p->header.nextindex; i++) {
2940                d = (struct ldtentry *) &p->slot[stbl[i]];
2941                index = le32_to_cpu(d->index);
2942                if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2943                        d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2944                        if (dtlck->index >= dtlck->maxcnt)
2945                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2946                        lv = &dtlck->lv[dtlck->index];
2947                        lv->offset = stbl[i];
2948                        lv->length = 1;
2949                        dtlck->index++;
2950                }
2951        }
2952
2953        DT_PUTPAGE(mp);
2954        (void) txCommit(tid, 1, &inode, 0);
2955end:
2956        txEnd(tid);
2957}
2958
2959/*
2960 * Buffer to hold directory entry info while traversing a dtree page
2961 * before being fed to the filldir function
2962 */
2963struct jfs_dirent {
2964        loff_t position;
2965        int ino;
2966        u16 name_len;
2967        char name[];
2968};
2969
2970/*
2971 * function to determine next variable-sized jfs_dirent in buffer
2972 */
2973static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2974{
2975        return (struct jfs_dirent *)
2976                ((char *)dirent +
2977                 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2978                   sizeof (loff_t) - 1) &
2979                  ~(sizeof (loff_t) - 1)));
2980}
2981
2982/*
2983 *      jfs_readdir()
2984 *
2985 * function: read directory entries sequentially
2986 *      from the specified entry offset
2987 *
2988 * parameter:
2989 *
2990 * return: offset = (pn, index) of start entry
2991 *      of next jfs_readdir()/dtRead()
2992 */
2993int jfs_readdir(struct file *file, struct dir_context *ctx)
2994{
2995        struct inode *ip = file_inode(file);
2996        struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
2997        int rc = 0;
2998        loff_t dtpos;   /* legacy OS/2 style position */
2999        struct dtoffset {
3000                s16 pn;
3001                s16 index;
3002                s32 unused;
3003        } *dtoffset = (struct dtoffset *) &dtpos;
3004        s64 bn;
3005        struct metapage *mp;
3006        dtpage_t *p;
3007        int index;
3008        s8 *stbl;
3009        struct btstack btstack;
3010        int i, next;
3011        struct ldtentry *d;
3012        struct dtslot *t;
3013        int d_namleft, len, outlen;
3014        unsigned long dirent_buf;
3015        char *name_ptr;
3016        u32 dir_index;
3017        int do_index = 0;
3018        uint loop_count = 0;
3019        struct jfs_dirent *jfs_dirent;
3020        int jfs_dirents;
3021        int overflow, fix_page, page_fixed = 0;
3022        static int unique_pos = 2;      /* If we can't fix broken index */
3023
3024        if (ctx->pos == DIREND)
3025                return 0;
3026
3027        if (DO_INDEX(ip)) {
3028                /*
3029                 * persistent index is stored in directory entries.
3030                 * Special cases:        0 = .
3031                 *                       1 = ..
3032                 *                      -1 = End of directory
3033                 */
3034                do_index = 1;
3035
3036                dir_index = (u32) ctx->pos;
3037
3038                /*
3039                 * NFSv4 reserves cookies 1 and 2 for . and .. so the value
3040                 * we return to the vfs is one greater than the one we use
3041                 * internally.
3042                 */
3043                if (dir_index)
3044                        dir_index--;
3045
3046                if (dir_index > 1) {
3047                        struct dir_table_slot dirtab_slot;
3048
3049                        if (dtEmpty(ip) ||
3050                            (dir_index >= JFS_IP(ip)->next_index)) {
3051                                /* Stale position.  Directory has shrunk */
3052                                ctx->pos = DIREND;
3053                                return 0;
3054                        }
3055                      repeat:
3056                        rc = read_index(ip, dir_index, &dirtab_slot);
3057                        if (rc) {
3058                                ctx->pos = DIREND;
3059                                return rc;
3060                        }
3061                        if (dirtab_slot.flag == DIR_INDEX_FREE) {
3062                                if (loop_count++ > JFS_IP(ip)->next_index) {
3063                                        jfs_err("jfs_readdir detected infinite loop!");
3064                                        ctx->pos = DIREND;
3065                                        return 0;
3066                                }
3067                                dir_index = le32_to_cpu(dirtab_slot.addr2);
3068                                if (dir_index == -1) {
3069                                        ctx->pos = DIREND;
3070                                        return 0;
3071                                }
3072                                goto repeat;
3073                        }
3074                        bn = addressDTS(&dirtab_slot);
3075                        index = dirtab_slot.slot;
3076                        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3077                        if (rc) {
3078                                ctx->pos = DIREND;
3079                                return 0;
3080                        }
3081                        if (p->header.flag & BT_INTERNAL) {
3082                                jfs_err("jfs_readdir: bad index table");
3083                                DT_PUTPAGE(mp);
3084                                ctx->pos = DIREND;
3085                                return 0;
3086                        }
3087                } else {
3088                        if (dir_index == 0) {
3089                                /*
3090                                 * self "."
3091                                 */
3092                                ctx->pos = 1;
3093                                if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
3094                                        return 0;
3095                        }
3096                        /*
3097                         * parent ".."
3098                         */
3099                        ctx->pos = 2;
3100                        if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
3101                                return 0;
3102
3103                        /*
3104                         * Find first entry of left-most leaf
3105                         */
3106                        if (dtEmpty(ip)) {
3107                                ctx->pos = DIREND;
3108                                return 0;
3109                        }
3110
3111                        if ((rc = dtReadFirst(ip, &btstack)))
3112                                return rc;
3113
3114                        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3115                }
3116        } else {
3117                /*
3118                 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3119                 *
3120                 * pn = 0; index = 1:   First entry "."
3121                 * pn = 0; index = 2:   Second entry ".."
3122                 * pn > 0:              Real entries, pn=1 -> leftmost page
3123                 * pn = index = -1:     No more entries
3124                 */
3125                dtpos = ctx->pos;
3126                if (dtpos < 2) {
3127                        /* build "." entry */
3128                        ctx->pos = 1;
3129                        if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
3130                                return 0;
3131                        dtoffset->index = 2;
3132                        ctx->pos = dtpos;
3133                }
3134
3135                if (dtoffset->pn == 0) {
3136                        if (dtoffset->index == 2) {
3137                                /* build ".." entry */
3138                                if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
3139                                        return 0;
3140                        } else {
3141                                jfs_err("jfs_readdir called with invalid offset!");
3142                        }
3143                        dtoffset->pn = 1;
3144                        dtoffset->index = 0;
3145                        ctx->pos = dtpos;
3146                }
3147
3148                if (dtEmpty(ip)) {
3149                        ctx->pos = DIREND;
3150                        return 0;
3151                }
3152
3153                if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) {
3154                        jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext",
3155                                rc);
3156                        ctx->pos = DIREND;
3157                        return 0;
3158                }
3159                /* get start leaf page and index */
3160                DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3161
3162                /* offset beyond directory eof ? */
3163                if (bn < 0) {
3164                        ctx->pos = DIREND;
3165                        return 0;
3166                }
3167        }
3168
3169        dirent_buf = __get_free_page(GFP_KERNEL);
3170        if (dirent_buf == 0) {
3171                DT_PUTPAGE(mp);
3172                jfs_warn("jfs_readdir: __get_free_page failed!");
3173                ctx->pos = DIREND;
3174                return -ENOMEM;
3175        }
3176
3177        while (1) {
3178                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3179                jfs_dirents = 0;
3180                overflow = fix_page = 0;
3181
3182                stbl = DT_GETSTBL(p);
3183
3184                for (i = index; i < p->header.nextindex; i++) {
3185                        d = (struct ldtentry *) & p->slot[stbl[i]];
3186
3187                        if (((long) jfs_dirent + d->namlen + 1) >
3188                            (dirent_buf + PAGE_SIZE)) {
3189                                /* DBCS codepages could overrun dirent_buf */
3190                                index = i;
3191                                overflow = 1;
3192                                break;
3193                        }
3194
3195                        d_namleft = d->namlen;
3196                        name_ptr = jfs_dirent->name;
3197                        jfs_dirent->ino = le32_to_cpu(d->inumber);
3198
3199                        if (do_index) {
3200                                len = min(d_namleft, DTLHDRDATALEN);
3201                                jfs_dirent->position = le32_to_cpu(d->index);
3202                                /*
3203                                 * d->index should always be valid, but it
3204                                 * isn't.  fsck.jfs doesn't create the
3205                                 * directory index for the lost+found
3206                                 * directory.  Rather than let it go,
3207                                 * we can try to fix it.
3208                                 */
3209                                if ((jfs_dirent->position < 2) ||
3210                                    (jfs_dirent->position >=
3211                                     JFS_IP(ip)->next_index)) {
3212                                        if (!page_fixed && !isReadOnly(ip)) {
3213                                                fix_page = 1;
3214                                                /*
3215                                                 * setting overflow and setting
3216                                                 * index to i will cause the
3217                                                 * same page to be processed
3218                                                 * again starting here
3219                                                 */
3220                                                overflow = 1;
3221                                                index = i;
3222                                                break;
3223                                        }
3224                                        jfs_dirent->position = unique_pos++;
3225                                }
3226                                /*
3227                                 * We add 1 to the index because we may
3228                                 * use a value of 2 internally, and NFSv4
3229                                 * doesn't like that.
3230                                 */
3231                                jfs_dirent->position++;
3232                        } else {
3233                                jfs_dirent->position = dtpos;
3234                                len = min(d_namleft, DTLHDRDATALEN_LEGACY);
3235                        }
3236
3237                        /* copy the name of head/only segment */
3238                        outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
3239                                                   codepage);
3240                        jfs_dirent->name_len = outlen;
3241
3242                        /* copy name in the additional segment(s) */
3243                        next = d->next;
3244                        while (next >= 0) {
3245                                t = (struct dtslot *) & p->slot[next];
3246                                name_ptr += outlen;
3247                                d_namleft -= len;
3248                                /* Sanity Check */
3249                                if (d_namleft == 0) {
3250                                        jfs_error(ip->i_sb,
3251                                                  "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n",
3252                                                  (long)ip->i_ino,
3253                                                  (long long)bn,
3254                                                  i);
3255                                        goto skip_one;
3256                                }
3257                                len = min(d_namleft, DTSLOTDATALEN);
3258                                outlen = jfs_strfromUCS_le(name_ptr, t->name,
3259                                                           len, codepage);
3260                                jfs_dirent->name_len += outlen;
3261
3262                                next = t->next;
3263                        }
3264
3265                        jfs_dirents++;
3266                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3267skip_one:
3268                        if (!do_index)
3269                                dtoffset->index++;
3270                }
3271
3272                if (!overflow) {
3273                        /* Point to next leaf page */
3274                        if (p->header.flag & BT_ROOT)
3275                                bn = 0;
3276                        else {
3277                                bn = le64_to_cpu(p->header.next);
3278                                index = 0;
3279                                /* update offset (pn:index) for new page */
3280                                if (!do_index) {
3281                                        dtoffset->pn++;
3282                                        dtoffset->index = 0;
3283                                }
3284                        }
3285                        page_fixed = 0;
3286                }
3287
3288                /* unpin previous leaf page */
3289                DT_PUTPAGE(mp);
3290
3291                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3292                while (jfs_dirents--) {
3293                        ctx->pos = jfs_dirent->position;
3294                        if (!dir_emit(ctx, jfs_dirent->name,
3295                                    jfs_dirent->name_len,
3296                                    jfs_dirent->ino, DT_UNKNOWN))
3297                                goto out;
3298                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3299                }
3300
3301                if (fix_page) {
3302                        add_missing_indices(ip, bn);
3303                        page_fixed = 1;
3304                }
3305
3306                if (!overflow && (bn == 0)) {
3307                        ctx->pos = DIREND;
3308                        break;
3309                }
3310
3311                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3312                if (rc) {
3313                        free_page(dirent_buf);
3314                        return rc;
3315                }
3316        }
3317
3318      out:
3319        free_page(dirent_buf);
3320
3321        return rc;
3322}
3323
3324
3325/*
3326 *      dtReadFirst()
3327 *
3328 * function: get the leftmost page of the directory
3329 */
3330static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3331{
3332        int rc = 0;
3333        s64 bn;
3334        int psize = 288;        /* initial in-line directory */
3335        struct metapage *mp;
3336        dtpage_t *p;
3337        s8 *stbl;
3338        struct btframe *btsp;
3339        pxd_t *xd;
3340
3341        BT_CLR(btstack);        /* reset stack */
3342
3343        /*
3344         *      descend leftmost path of the tree
3345         *
3346         * by convention, root bn = 0.
3347         */
3348        for (bn = 0;;) {
3349                DT_GETPAGE(ip, bn, mp, psize, p, rc);
3350                if (rc)
3351                        return rc;
3352
3353                /*
3354                 * leftmost leaf page
3355                 */
3356                if (p->header.flag & BT_LEAF) {
3357                        /* return leftmost entry */
3358                        btsp = btstack->top;
3359                        btsp->bn = bn;
3360                        btsp->index = 0;
3361                        btsp->mp = mp;
3362
3363                        return 0;
3364                }
3365
3366                /*
3367                 * descend down to leftmost child page
3368                 */
3369                if (BT_STACK_FULL(btstack)) {
3370                        DT_PUTPAGE(mp);
3371                        jfs_error(ip->i_sb, "btstack overrun\n");
3372                        BT_STACK_DUMP(btstack);
3373                        return -EIO;
3374                }
3375                /* push (bn, index) of the parent page/entry */
3376                BT_PUSH(btstack, bn, 0);
3377
3378                /* get the leftmost entry */
3379                stbl = DT_GETSTBL(p);
3380                xd = (pxd_t *) & p->slot[stbl[0]];
3381
3382                /* get the child page block address */
3383                bn = addressPXD(xd);
3384                psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3385
3386                /* unpin the parent page */
3387                DT_PUTPAGE(mp);
3388        }
3389}
3390
3391
3392/*
3393 *      dtReadNext()
3394 *
3395 * function: get the page of the specified offset (pn:index)
3396 *
3397 * return: if (offset > eof), bn = -1;
3398 *
3399 * note: if index > nextindex of the target leaf page,
3400 * start with 1st entry of next leaf page;
3401 */
3402static int dtReadNext(struct inode *ip, loff_t * offset,
3403                      struct btstack * btstack)
3404{
3405        int rc = 0;
3406        struct dtoffset {
3407                s16 pn;
3408                s16 index;
3409                s32 unused;
3410        } *dtoffset = (struct dtoffset *) offset;
3411        s64 bn;
3412        struct metapage *mp;
3413        dtpage_t *p;
3414        int index;
3415        int pn;
3416        s8 *stbl;
3417        struct btframe *btsp, *parent;
3418        pxd_t *xd;
3419
3420        /*
3421         * get leftmost leaf page pinned
3422         */
3423        if ((rc = dtReadFirst(ip, btstack)))
3424                return rc;
3425
3426        /* get leaf page */
3427        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3428
3429        /* get the start offset (pn:index) */
3430        pn = dtoffset->pn - 1;  /* Now pn = 0 represents leftmost leaf */
3431        index = dtoffset->index;
3432
3433        /* start at leftmost page ? */
3434        if (pn == 0) {
3435                /* offset beyond eof ? */
3436                if (index < p->header.nextindex)
3437                        goto out;
3438
3439                if (p->header.flag & BT_ROOT) {
3440                        bn = -1;
3441                        goto out;
3442                }
3443
3444                /* start with 1st entry of next leaf page */
3445                dtoffset->pn++;
3446                dtoffset->index = index = 0;
3447                goto a;
3448        }
3449
3450        /* start at non-leftmost page: scan parent pages for large pn */
3451        if (p->header.flag & BT_ROOT) {
3452                bn = -1;
3453                goto out;
3454        }
3455
3456        /* start after next leaf page ? */
3457        if (pn > 1)
3458                goto b;
3459
3460        /* get leaf page pn = 1 */
3461      a:
3462        bn = le64_to_cpu(p->header.next);
3463
3464        /* unpin leaf page */
3465        DT_PUTPAGE(mp);
3466
3467        /* offset beyond eof ? */
3468        if (bn == 0) {
3469                bn = -1;
3470                goto out;
3471        }
3472
3473        goto c;
3474
3475        /*
3476         * scan last internal page level to get target leaf page
3477         */
3478      b:
3479        /* unpin leftmost leaf page */
3480        DT_PUTPAGE(mp);
3481
3482        /* get left most parent page */
3483        btsp = btstack->top;
3484        parent = btsp - 1;
3485        bn = parent->bn;
3486        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3487        if (rc)
3488                return rc;
3489
3490        /* scan parent pages at last internal page level */
3491        while (pn >= p->header.nextindex) {
3492                pn -= p->header.nextindex;
3493
3494                /* get next parent page address */
3495                bn = le64_to_cpu(p->header.next);
3496
3497                /* unpin current parent page */
3498                DT_PUTPAGE(mp);
3499
3500                /* offset beyond eof ? */
3501                if (bn == 0) {
3502                        bn = -1;
3503                        goto out;
3504                }
3505
3506                /* get next parent page */
3507                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3508                if (rc)
3509                        return rc;
3510
3511                /* update parent page stack frame */
3512                parent->bn = bn;
3513        }
3514
3515        /* get leaf page address */
3516        stbl = DT_GETSTBL(p);
3517        xd = (pxd_t *) & p->slot[stbl[pn]];
3518        bn = addressPXD(xd);
3519
3520        /* unpin parent page */
3521        DT_PUTPAGE(mp);
3522
3523        /*
3524         * get target leaf page
3525         */
3526      c:
3527        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3528        if (rc)
3529                return rc;
3530
3531        /*
3532         * leaf page has been completed:
3533         * start with 1st entry of next leaf page
3534         */
3535        if (index >= p->header.nextindex) {
3536                bn = le64_to_cpu(p->header.next);
3537
3538                /* unpin leaf page */
3539                DT_PUTPAGE(mp);
3540
3541                /* offset beyond eof ? */
3542                if (bn == 0) {
3543                        bn = -1;
3544                        goto out;
3545                }
3546
3547                /* get next leaf page */
3548                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3549                if (rc)
3550                        return rc;
3551
3552                /* start with 1st entry of next leaf page */
3553                dtoffset->pn++;
3554                dtoffset->index = 0;
3555        }
3556
3557      out:
3558        /* return target leaf page pinned */
3559        btsp = btstack->top;
3560        btsp->bn = bn;
3561        btsp->index = dtoffset->index;
3562        btsp->mp = mp;
3563
3564        return 0;
3565}
3566
3567
3568/*
3569 *      dtCompare()
3570 *
3571 * function: compare search key with an internal entry
3572 *
3573 * return:
3574 *      < 0 if k is < record
3575 *      = 0 if k is = record
3576 *      > 0 if k is > record
3577 */
3578static int dtCompare(struct component_name * key,       /* search key */
3579                     dtpage_t * p,      /* directory page */
3580                     int si)
3581{                               /* entry slot index */
3582        wchar_t *kname;
3583        __le16 *name;
3584        int klen, namlen, len, rc;
3585        struct idtentry *ih;
3586        struct dtslot *t;
3587
3588        /*
3589         * force the left-most key on internal pages, at any level of
3590         * the tree, to be less than any search key.
3591         * this obviates having to update the leftmost key on an internal
3592         * page when the user inserts a new key in the tree smaller than
3593         * anything that has been stored.
3594         *
3595         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3596         * at any internal page at any level of the tree,
3597         * it descends to child of the entry anyway -
3598         * ? make the entry as min size dummy entry)
3599         *
3600         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3601         * return (1);
3602         */
3603
3604        kname = key->name;
3605        klen = key->namlen;
3606
3607        ih = (struct idtentry *) & p->slot[si];
3608        si = ih->next;
3609        name = ih->name;
3610        namlen = ih->namlen;
3611        len = min(namlen, DTIHDRDATALEN);
3612
3613        /* compare with head/only segment */
3614        len = min(klen, len);
3615        if ((rc = UniStrncmp_le(kname, name, len)))
3616                return rc;
3617
3618        klen -= len;
3619        namlen -= len;
3620
3621        /* compare with additional segment(s) */
3622        kname += len;
3623        while (klen > 0 && namlen > 0) {
3624                /* compare with next name segment */
3625                t = (struct dtslot *) & p->slot[si];
3626                len = min(namlen, DTSLOTDATALEN);
3627                len = min(klen, len);
3628                name = t->name;
3629                if ((rc = UniStrncmp_le(kname, name, len)))
3630                        return rc;
3631
3632                klen -= len;
3633                namlen -= len;
3634                kname += len;
3635                si = t->next;
3636        }
3637
3638        return (klen - namlen);
3639}
3640
3641
3642
3643
3644/*
3645 *      ciCompare()
3646 *
3647 * function: compare search key with an (leaf/internal) entry
3648 *
3649 * return:
3650 *      < 0 if k is < record
3651 *      = 0 if k is = record
3652 *      > 0 if k is > record
3653 */
3654static int ciCompare(struct component_name * key,       /* search key */
3655                     dtpage_t * p,      /* directory page */
3656                     int si,    /* entry slot index */
3657                     int flag)
3658{
3659        wchar_t *kname, x;
3660        __le16 *name;
3661        int klen, namlen, len, rc;
3662        struct ldtentry *lh;
3663        struct idtentry *ih;
3664        struct dtslot *t;
3665        int i;
3666
3667        /*
3668         * force the left-most key on internal pages, at any level of
3669         * the tree, to be less than any search key.
3670         * this obviates having to update the leftmost key on an internal
3671         * page when the user inserts a new key in the tree smaller than
3672         * anything that has been stored.
3673         *
3674         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3675         * at any internal page at any level of the tree,
3676         * it descends to child of the entry anyway -
3677         * ? make the entry as min size dummy entry)
3678         *
3679         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3680         * return (1);
3681         */
3682
3683        kname = key->name;
3684        klen = key->namlen;
3685
3686        /*
3687         * leaf page entry
3688         */
3689        if (p->header.flag & BT_LEAF) {
3690                lh = (struct ldtentry *) & p->slot[si];
3691                si = lh->next;
3692                name = lh->name;
3693                namlen = lh->namlen;
3694                if (flag & JFS_DIR_INDEX)
3695                        len = min(namlen, DTLHDRDATALEN);
3696                else
3697                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3698        }
3699        /*
3700         * internal page entry
3701         */
3702        else {
3703                ih = (struct idtentry *) & p->slot[si];
3704                si = ih->next;
3705                name = ih->name;
3706                namlen = ih->namlen;
3707                len = min(namlen, DTIHDRDATALEN);
3708        }
3709
3710        /* compare with head/only segment */
3711        len = min(klen, len);
3712        for (i = 0; i < len; i++, kname++, name++) {
3713                /* only uppercase if case-insensitive support is on */
3714                if ((flag & JFS_OS2) == JFS_OS2)
3715                        x = UniToupper(le16_to_cpu(*name));
3716                else
3717                        x = le16_to_cpu(*name);
3718                if ((rc = *kname - x))
3719                        return rc;
3720        }
3721
3722        klen -= len;
3723        namlen -= len;
3724
3725        /* compare with additional segment(s) */
3726        while (klen > 0 && namlen > 0) {
3727                /* compare with next name segment */
3728                t = (struct dtslot *) & p->slot[si];
3729                len = min(namlen, DTSLOTDATALEN);
3730                len = min(klen, len);
3731                name = t->name;
3732                for (i = 0; i < len; i++, kname++, name++) {
3733                        /* only uppercase if case-insensitive support is on */
3734                        if ((flag & JFS_OS2) == JFS_OS2)
3735                                x = UniToupper(le16_to_cpu(*name));
3736                        else
3737                                x = le16_to_cpu(*name);
3738
3739                        if ((rc = *kname - x))
3740                                return rc;
3741                }
3742
3743                klen -= len;
3744                namlen -= len;
3745                si = t->next;
3746        }
3747
3748        return (klen - namlen);
3749}
3750
3751
3752/*
3753 *      ciGetLeafPrefixKey()
3754 *
3755 * function: compute prefix of suffix compression
3756 *           from two adjacent leaf entries
3757 *           across page boundary
3758 *
3759 * return: non-zero on error
3760 *
3761 */
3762static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3763                               int ri, struct component_name * key, int flag)
3764{
3765        int klen, namlen;
3766        wchar_t *pl, *pr, *kname;
3767        struct component_name lkey;
3768        struct component_name rkey;
3769
3770        lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
3771                                        GFP_KERNEL);
3772        if (lkey.name == NULL)
3773                return -ENOMEM;
3774
3775        rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
3776                                        GFP_KERNEL);
3777        if (rkey.name == NULL) {
3778                kfree(lkey.name);
3779                return -ENOMEM;
3780        }
3781
3782        /* get left and right key */
3783        dtGetKey(lp, li, &lkey, flag);
3784        lkey.name[lkey.namlen] = 0;
3785
3786        if ((flag & JFS_OS2) == JFS_OS2)
3787                ciToUpper(&lkey);
3788
3789        dtGetKey(rp, ri, &rkey, flag);
3790        rkey.name[rkey.namlen] = 0;
3791
3792
3793        if ((flag & JFS_OS2) == JFS_OS2)
3794                ciToUpper(&rkey);
3795
3796        /* compute prefix */
3797        klen = 0;
3798        kname = key->name;
3799        namlen = min(lkey.namlen, rkey.namlen);
3800        for (pl = lkey.name, pr = rkey.name;
3801             namlen; pl++, pr++, namlen--, klen++, kname++) {
3802                *kname = *pr;
3803                if (*pl != *pr) {
3804                        key->namlen = klen + 1;
3805                        goto free_names;
3806                }
3807        }
3808
3809        /* l->namlen <= r->namlen since l <= r */
3810        if (lkey.namlen < rkey.namlen) {
3811                *kname = *pr;
3812                key->namlen = klen + 1;
3813        } else                  /* l->namelen == r->namelen */
3814                key->namlen = klen;
3815
3816free_names:
3817        kfree(lkey.name);
3818        kfree(rkey.name);
3819        return 0;
3820}
3821
3822
3823
3824/*
3825 *      dtGetKey()
3826 *
3827 * function: get key of the entry
3828 */
3829static void dtGetKey(dtpage_t * p, int i,       /* entry index */
3830                     struct component_name * key, int flag)
3831{
3832        int si;
3833        s8 *stbl;
3834        struct ldtentry *lh;
3835        struct idtentry *ih;
3836        struct dtslot *t;
3837        int namlen, len;
3838        wchar_t *kname;
3839        __le16 *name;
3840
3841        /* get entry */
3842        stbl = DT_GETSTBL(p);
3843        si = stbl[i];
3844        if (p->header.flag & BT_LEAF) {
3845                lh = (struct ldtentry *) & p->slot[si];
3846                si = lh->next;
3847                namlen = lh->namlen;
3848                name = lh->name;
3849                if (flag & JFS_DIR_INDEX)
3850                        len = min(namlen, DTLHDRDATALEN);
3851                else
3852                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3853        } else {
3854                ih = (struct idtentry *) & p->slot[si];
3855                si = ih->next;
3856                namlen = ih->namlen;
3857                name = ih->name;
3858                len = min(namlen, DTIHDRDATALEN);
3859        }
3860
3861        key->namlen = namlen;
3862        kname = key->name;
3863
3864        /*
3865         * move head/only segment
3866         */
3867        UniStrncpy_from_le(kname, name, len);
3868
3869        /*
3870         * move additional segment(s)
3871         */
3872        while (si >= 0) {
3873                /* get next segment */
3874                t = &p->slot[si];
3875                kname += len;
3876                namlen -= len;
3877                len = min(namlen, DTSLOTDATALEN);
3878                UniStrncpy_from_le(kname, t->name, len);
3879
3880                si = t->next;
3881        }
3882}
3883
3884
3885/*
3886 *      dtInsertEntry()
3887 *
3888 * function: allocate free slot(s) and
3889 *           write a leaf/internal entry
3890 *
3891 * return: entry slot index
3892 */
3893static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3894                          ddata_t * data, struct dt_lock ** dtlock)
3895{
3896        struct dtslot *h, *t;
3897        struct ldtentry *lh = NULL;
3898        struct idtentry *ih = NULL;
3899        int hsi, fsi, klen, len, nextindex;
3900        wchar_t *kname;
3901        __le16 *name;
3902        s8 *stbl;
3903        pxd_t *xd;
3904        struct dt_lock *dtlck = *dtlock;
3905        struct lv *lv;
3906        int xsi, n;
3907        s64 bn = 0;
3908        struct metapage *mp = NULL;
3909
3910        klen = key->namlen;
3911        kname = key->name;
3912
3913        /* allocate a free slot */
3914        hsi = fsi = p->header.freelist;
3915        h = &p->slot[fsi];
3916        p->header.freelist = h->next;
3917        --p->header.freecnt;
3918
3919        /* open new linelock */
3920        if (dtlck->index >= dtlck->maxcnt)
3921                dtlck = (struct dt_lock *) txLinelock(dtlck);
3922
3923        lv = & dtlck->lv[dtlck->index];
3924        lv->offset = hsi;
3925
3926        /* write head/only segment */
3927        if (p->header.flag & BT_LEAF) {
3928                lh = (struct ldtentry *) h;
3929                lh->next = h->next;
3930                lh->inumber = cpu_to_le32(data->leaf.ino);
3931                lh->namlen = klen;
3932                name = lh->name;
3933                if (data->leaf.ip) {
3934                        len = min(klen, DTLHDRDATALEN);
3935                        if (!(p->header.flag & BT_ROOT))
3936                                bn = addressPXD(&p->header.self);
3937                        lh->index = cpu_to_le32(add_index(data->leaf.tid,
3938                                                          data->leaf.ip,
3939                                                          bn, index));
3940                } else
3941                        len = min(klen, DTLHDRDATALEN_LEGACY);
3942        } else {
3943                ih = (struct idtentry *) h;
3944                ih->next = h->next;
3945                xd = (pxd_t *) ih;
3946                *xd = data->xd;
3947                ih->namlen = klen;
3948                name = ih->name;
3949                len = min(klen, DTIHDRDATALEN);
3950        }
3951
3952        UniStrncpy_to_le(name, kname, len);
3953
3954        n = 1;
3955        xsi = hsi;
3956
3957        /* write additional segment(s) */
3958        t = h;
3959        klen -= len;
3960        while (klen) {
3961                /* get free slot */
3962                fsi = p->header.freelist;
3963                t = &p->slot[fsi];
3964                p->header.freelist = t->next;
3965                --p->header.freecnt;
3966
3967                /* is next slot contiguous ? */
3968                if (fsi != xsi + 1) {
3969                        /* close current linelock */
3970                        lv->length = n;
3971                        dtlck->index++;
3972
3973                        /* open new linelock */
3974                        if (dtlck->index < dtlck->maxcnt)
3975                                lv++;
3976                        else {
3977                                dtlck = (struct dt_lock *) txLinelock(dtlck);
3978                                lv = & dtlck->lv[0];
3979                        }
3980
3981                        lv->offset = fsi;
3982                        n = 0;
3983                }
3984
3985                kname += len;
3986                len = min(klen, DTSLOTDATALEN);
3987                UniStrncpy_to_le(t->name, kname, len);
3988
3989                n++;
3990                xsi = fsi;
3991                klen -= len;
3992        }
3993
3994        /* close current linelock */
3995        lv->length = n;
3996        dtlck->index++;
3997
3998        *dtlock = dtlck;
3999
4000        /* terminate last/only segment */
4001        if (h == t) {
4002                /* single segment entry */
4003                if (p->header.flag & BT_LEAF)
4004                        lh->next = -1;
4005                else
4006                        ih->next = -1;
4007        } else
4008                /* multi-segment entry */
4009                t->next = -1;
4010
4011        /* if insert into middle, shift right succeeding entries in stbl */
4012        stbl = DT_GETSTBL(p);
4013        nextindex = p->header.nextindex;
4014        if (index < nextindex) {
4015                memmove(stbl + index + 1, stbl + index, nextindex - index);
4016
4017                if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
4018                        s64 lblock;
4019
4020                        /*
4021                         * Need to update slot number for entries that moved
4022                         * in the stbl
4023                         */
4024                        mp = NULL;
4025                        for (n = index + 1; n <= nextindex; n++) {
4026                                lh = (struct ldtentry *) & (p->slot[stbl[n]]);
4027                                modify_index(data->leaf.tid, data->leaf.ip,
4028                                             le32_to_cpu(lh->index), bn, n,
4029                                             &mp, &lblock);
4030                        }
4031                        if (mp)
4032                                release_metapage(mp);
4033                }
4034        }
4035
4036        stbl[index] = hsi;
4037
4038        /* advance next available entry index of stbl */
4039        ++p->header.nextindex;
4040}
4041
4042
4043/*
4044 *      dtMoveEntry()
4045 *
4046 * function: move entries from split/left page to new/right page
4047 *
4048 *      nextindex of dst page and freelist/freecnt of both pages
4049 *      are updated.
4050 */
4051static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
4052                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
4053                        int do_index)
4054{
4055        int ssi, next;          /* src slot index */
4056        int di;                 /* dst entry index */
4057        int dsi;                /* dst slot index */
4058        s8 *sstbl, *dstbl;      /* sorted entry table */
4059        int snamlen, len;
4060        struct ldtentry *slh, *dlh = NULL;
4061        struct idtentry *sih, *dih = NULL;
4062        struct dtslot *h, *s, *d;
4063        struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
4064        struct lv *slv, *dlv;
4065        int xssi, ns, nd;
4066        int sfsi;
4067
4068        sstbl = (s8 *) & sp->slot[sp->header.stblindex];
4069        dstbl = (s8 *) & dp->slot[dp->header.stblindex];
4070
4071        dsi = dp->header.freelist;      /* first (whole page) free slot */
4072        sfsi = sp->header.freelist;
4073
4074        /* linelock destination entry slot */
4075        dlv = & ddtlck->lv[ddtlck->index];
4076        dlv->offset = dsi;
4077
4078        /* linelock source entry slot */
4079        slv = & sdtlck->lv[sdtlck->index];
4080        slv->offset = sstbl[si];
4081        xssi = slv->offset - 1;
4082
4083        /*
4084         * move entries
4085         */
4086        ns = nd = 0;
4087        for (di = 0; si < sp->header.nextindex; si++, di++) {
4088                ssi = sstbl[si];
4089                dstbl[di] = dsi;
4090
4091                /* is next slot contiguous ? */
4092                if (ssi != xssi + 1) {
4093                        /* close current linelock */
4094                        slv->length = ns;
4095                        sdtlck->index++;
4096
4097                        /* open new linelock */
4098                        if (sdtlck->index < sdtlck->maxcnt)
4099                                slv++;
4100                        else {
4101                                sdtlck = (struct dt_lock *) txLinelock(sdtlck);
4102                                slv = & sdtlck->lv[0];
4103                        }
4104
4105                        slv->offset = ssi;
4106                        ns = 0;
4107                }
4108
4109                /*
4110                 * move head/only segment of an entry
4111                 */
4112                /* get dst slot */
4113                h = d = &dp->slot[dsi];
4114
4115                /* get src slot and move */
4116                s = &sp->slot[ssi];
4117                if (sp->header.flag & BT_LEAF) {
4118                        /* get source entry */
4119                        slh = (struct ldtentry *) s;
4120                        dlh = (struct ldtentry *) h;
4121                        snamlen = slh->namlen;
4122
4123                        if (do_index) {
4124                                len = min(snamlen, DTLHDRDATALEN);
4125                                dlh->index = slh->index; /* little-endian */
4126                        } else
4127                                len = min(snamlen, DTLHDRDATALEN_LEGACY);
4128
4129                        memcpy(dlh, slh, 6 + len * 2);
4130
4131                        next = slh->next;
4132
4133                        /* update dst head/only segment next field */
4134                        dsi++;
4135                        dlh->next = dsi;
4136                } else {
4137                        sih = (struct idtentry *) s;
4138                        snamlen = sih->namlen;
4139
4140                        len = min(snamlen, DTIHDRDATALEN);
4141                        dih = (struct idtentry *) h;
4142                        memcpy(dih, sih, 10 + len * 2);
4143                        next = sih->next;
4144
4145                        dsi++;
4146                        dih->next = dsi;
4147                }
4148
4149                /* free src head/only segment */
4150                s->next = sfsi;
4151                s->cnt = 1;
4152                sfsi = ssi;
4153
4154                ns++;
4155                nd++;
4156                xssi = ssi;
4157
4158                /*
4159                 * move additional segment(s) of the entry
4160                 */
4161                snamlen -= len;
4162                while ((ssi = next) >= 0) {
4163                        /* is next slot contiguous ? */
4164                        if (ssi != xssi + 1) {
4165                                /* close current linelock */
4166                                slv->length = ns;
4167                                sdtlck->index++;
4168
4169                                /* open new linelock */
4170                                if (sdtlck->index < sdtlck->maxcnt)
4171                                        slv++;
4172                                else {
4173                                        sdtlck =
4174                                            (struct dt_lock *)
4175                                            txLinelock(sdtlck);
4176                                        slv = & sdtlck->lv[0];
4177                                }
4178
4179                                slv->offset = ssi;
4180                                ns = 0;
4181                        }
4182
4183                        /* get next source segment */
4184                        s = &sp->slot[ssi];
4185
4186                        /* get next destination free slot */
4187                        d++;
4188
4189                        len = min(snamlen, DTSLOTDATALEN);
4190                        UniStrncpy_le(d->name, s->name, len);
4191
4192                        ns++;
4193                        nd++;
4194                        xssi = ssi;
4195
4196                        dsi++;
4197                        d->next = dsi;
4198
4199                        /* free source segment */
4200                        next = s->next;
4201                        s->next = sfsi;
4202                        s->cnt = 1;
4203                        sfsi = ssi;
4204
4205                        snamlen -= len;
4206                }               /* end while */
4207
4208                /* terminate dst last/only segment */
4209                if (h == d) {
4210                        /* single segment entry */
4211                        if (dp->header.flag & BT_LEAF)
4212                                dlh->next = -1;
4213                        else
4214                                dih->next = -1;
4215                } else
4216                        /* multi-segment entry */
4217                        d->next = -1;
4218        }                       /* end for */
4219
4220        /* close current linelock */
4221        slv->length = ns;
4222        sdtlck->index++;
4223        *sdtlock = sdtlck;
4224
4225        dlv->length = nd;
4226        ddtlck->index++;
4227        *ddtlock = ddtlck;
4228
4229        /* update source header */
4230        sp->header.freelist = sfsi;
4231        sp->header.freecnt += nd;
4232
4233        /* update destination header */
4234        dp->header.nextindex = di;
4235
4236        dp->header.freelist = dsi;
4237        dp->header.freecnt -= nd;
4238}
4239
4240
4241/*
4242 *      dtDeleteEntry()
4243 *
4244 * function: free a (leaf/internal) entry
4245 *
4246 * log freelist header, stbl, and each segment slot of entry
4247 * (even though last/only segment next field is modified,
4248 * physical image logging requires all segment slots of
4249 * the entry logged to avoid applying previous updates
4250 * to the same slots)
4251 */
4252static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
4253{
4254        int fsi;                /* free entry slot index */
4255        s8 *stbl;
4256        struct dtslot *t;
4257        int si, freecnt;
4258        struct dt_lock *dtlck = *dtlock;
4259        struct lv *lv;
4260        int xsi, n;
4261
4262        /* get free entry slot index */
4263        stbl = DT_GETSTBL(p);
4264        fsi = stbl[fi];
4265
4266        /* open new linelock */
4267        if (dtlck->index >= dtlck->maxcnt)
4268                dtlck = (struct dt_lock *) txLinelock(dtlck);
4269        lv = & dtlck->lv[dtlck->index];
4270
4271        lv->offset = fsi;
4272
4273        /* get the head/only segment */
4274        t = &p->slot[fsi];
4275        if (p->header.flag & BT_LEAF)
4276                si = ((struct ldtentry *) t)->next;
4277        else
4278                si = ((struct idtentry *) t)->next;
4279        t->next = si;
4280        t->cnt = 1;
4281
4282        n = freecnt = 1;
4283        xsi = fsi;
4284
4285        /* find the last/only segment */
4286        while (si >= 0) {
4287                /* is next slot contiguous ? */
4288                if (si != xsi + 1) {
4289                        /* close current linelock */
4290                        lv->length = n;
4291                        dtlck->index++;
4292
4293                        /* open new linelock */
4294                        if (dtlck->index < dtlck->maxcnt)
4295                                lv++;
4296                        else {
4297                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4298                                lv = & dtlck->lv[0];
4299                        }
4300
4301                        lv->offset = si;
4302                        n = 0;
4303                }
4304
4305                n++;
4306                xsi = si;
4307                freecnt++;
4308
4309                t = &p->slot[si];
4310                t->cnt = 1;
4311                si = t->next;
4312        }
4313
4314        /* close current linelock */
4315        lv->length = n;
4316        dtlck->index++;
4317
4318        *dtlock = dtlck;
4319
4320        /* update freelist */
4321        t->next = p->header.freelist;
4322        p->header.freelist = fsi;
4323        p->header.freecnt += freecnt;
4324
4325        /* if delete from middle,
4326         * shift left the succedding entries in the stbl
4327         */
4328        si = p->header.nextindex;
4329        if (fi < si - 1)
4330                memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4331
4332        p->header.nextindex--;
4333}
4334
4335
4336/*
4337 *      dtTruncateEntry()
4338 *
4339 * function: truncate a (leaf/internal) entry
4340 *
4341 * log freelist header, stbl, and each segment slot of entry
4342 * (even though last/only segment next field is modified,
4343 * physical image logging requires all segment slots of
4344 * the entry logged to avoid applying previous updates
4345 * to the same slots)
4346 */
4347static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4348{
4349        int tsi;                /* truncate entry slot index */
4350        s8 *stbl;
4351        struct dtslot *t;
4352        int si, freecnt;
4353        struct dt_lock *dtlck = *dtlock;
4354        struct lv *lv;
4355        int fsi, xsi, n;
4356
4357        /* get free entry slot index */
4358        stbl = DT_GETSTBL(p);
4359        tsi = stbl[ti];
4360
4361        /* open new linelock */
4362        if (dtlck->index >= dtlck->maxcnt)
4363                dtlck = (struct dt_lock *) txLinelock(dtlck);
4364        lv = & dtlck->lv[dtlck->index];
4365
4366        lv->offset = tsi;
4367
4368        /* get the head/only segment */
4369        t = &p->slot[tsi];
4370        ASSERT(p->header.flag & BT_INTERNAL);
4371        ((struct idtentry *) t)->namlen = 0;
4372        si = ((struct idtentry *) t)->next;
4373        ((struct idtentry *) t)->next = -1;
4374
4375        n = 1;
4376        freecnt = 0;
4377        fsi = si;
4378        xsi = tsi;
4379
4380        /* find the last/only segment */
4381        while (si >= 0) {
4382                /* is next slot contiguous ? */
4383                if (si != xsi + 1) {
4384                        /* close current linelock */
4385                        lv->length = n;
4386                        dtlck->index++;
4387
4388                        /* open new linelock */
4389                        if (dtlck->index < dtlck->maxcnt)
4390                                lv++;
4391                        else {
4392                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4393                                lv = & dtlck->lv[0];
4394                        }
4395
4396                        lv->offset = si;
4397                        n = 0;
4398                }
4399
4400                n++;
4401                xsi = si;
4402                freecnt++;
4403
4404                t = &p->slot[si];
4405                t->cnt = 1;
4406                si = t->next;
4407        }
4408
4409        /* close current linelock */
4410        lv->length = n;
4411        dtlck->index++;
4412
4413        *dtlock = dtlck;
4414
4415        /* update freelist */
4416        if (freecnt == 0)
4417                return;
4418        t->next = p->header.freelist;
4419        p->header.freelist = fsi;
4420        p->header.freecnt += freecnt;
4421}
4422
4423
4424/*
4425 *      dtLinelockFreelist()
4426 */
4427static void dtLinelockFreelist(dtpage_t * p,    /* directory page */
4428                               int m,   /* max slot index */
4429                               struct dt_lock ** dtlock)
4430{
4431        int fsi;                /* free entry slot index */
4432        struct dtslot *t;
4433        int si;
4434        struct dt_lock *dtlck = *dtlock;
4435        struct lv *lv;
4436        int xsi, n;
4437
4438        /* get free entry slot index */
4439        fsi = p->header.freelist;
4440
4441        /* open new linelock */
4442        if (dtlck->index >= dtlck->maxcnt)
4443                dtlck = (struct dt_lock *) txLinelock(dtlck);
4444        lv = & dtlck->lv[dtlck->index];
4445
4446        lv->offset = fsi;
4447
4448        n = 1;
4449        xsi = fsi;
4450
4451        t = &p->slot[fsi];
4452        si = t->next;
4453
4454        /* find the last/only segment */
4455        while (si < m && si >= 0) {
4456                /* is next slot contiguous ? */
4457                if (si != xsi + 1) {
4458                        /* close current linelock */
4459                        lv->length = n;
4460                        dtlck->index++;
4461
4462                        /* open new linelock */
4463                        if (dtlck->index < dtlck->maxcnt)
4464                                lv++;
4465                        else {
4466                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4467                                lv = & dtlck->lv[0];
4468                        }
4469
4470                        lv->offset = si;
4471                        n = 0;
4472                }
4473
4474                n++;
4475                xsi = si;
4476
4477                t = &p->slot[si];
4478                si = t->next;
4479        }
4480
4481        /* close current linelock */
4482        lv->length = n;
4483        dtlck->index++;
4484
4485        *dtlock = dtlck;
4486}
4487
4488
4489/*
4490 * NAME: dtModify
4491 *
4492 * FUNCTION: Modify the inode number part of a directory entry
4493 *
4494 * PARAMETERS:
4495 *      tid     - Transaction id
4496 *      ip      - Inode of parent directory
4497 *      key     - Name of entry to be modified
4498 *      orig_ino        - Original inode number expected in entry
4499 *      new_ino - New inode number to put into entry
4500 *      flag    - JFS_RENAME
4501 *
4502 * RETURNS:
4503 *      -ESTALE - If entry found does not match orig_ino passed in
4504 *      -ENOENT - If no entry can be found to match key
4505 *      0       - If successfully modified entry
4506 */
4507int dtModify(tid_t tid, struct inode *ip,
4508         struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4509{
4510        int rc;
4511        s64 bn;
4512        struct metapage *mp;
4513        dtpage_t *p;
4514        int index;
4515        struct btstack btstack;
4516        struct tlock *tlck;
4517        struct dt_lock *dtlck;
4518        struct lv *lv;
4519        s8 *stbl;
4520        int entry_si;           /* entry slot index */
4521        struct ldtentry *entry;
4522
4523        /*
4524         *      search for the entry to modify:
4525         *
4526         * dtSearch() returns (leaf page pinned, index at which to modify).
4527         */
4528        if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4529                return rc;
4530
4531        /* retrieve search result */
4532        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4533
4534        BT_MARK_DIRTY(mp, ip);
4535        /*
4536         * acquire a transaction lock on the leaf page of named entry
4537         */
4538        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4539        dtlck = (struct dt_lock *) & tlck->lock;
4540
4541        /* get slot index of the entry */
4542        stbl = DT_GETSTBL(p);
4543        entry_si = stbl[index];
4544
4545        /* linelock entry */
4546        ASSERT(dtlck->index == 0);
4547        lv = & dtlck->lv[0];
4548        lv->offset = entry_si;
4549        lv->length = 1;
4550        dtlck->index++;
4551
4552        /* get the head/only segment */
4553        entry = (struct ldtentry *) & p->slot[entry_si];
4554
4555        /* substitute the inode number of the entry */
4556        entry->inumber = cpu_to_le32(new_ino);
4557
4558        /* unpin the leaf page */
4559        DT_PUTPAGE(mp);
4560
4561        return 0;
4562}
4563