linux/fs/jfs/jfs_dtree.c
<<
>>
Prefs
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *
   4 *   This program is free software;  you can redistribute it and/or modify
   5 *   it under the terms of the GNU General Public License as published by
   6 *   the Free Software Foundation; either version 2 of the License, or
   7 *   (at your option) any later version.
   8 *
   9 *   This program is distributed in the hope that it will be useful,
  10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  12 *   the GNU General Public License for more details.
  13 *
  14 *   You should have received a copy of the GNU General Public License
  15 *   along with this program;  if not, write to the Free Software
  16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 */
  18
  19/*
  20 *      jfs_dtree.c: directory B+-tree manager
  21 *
  22 * B+-tree with variable length key directory:
  23 *
  24 * each directory page is structured as an array of 32-byte
  25 * directory entry slots initialized as a freelist
  26 * to avoid search/compaction of free space at insertion.
  27 * when an entry is inserted, a number of slots are allocated
  28 * from the freelist as required to store variable length data
  29 * of the entry; when the entry is deleted, slots of the entry
  30 * are returned to freelist.
  31 *
  32 * leaf entry stores full name as key and file serial number
  33 * (aka inode number) as data.
  34 * internal/router entry stores sufffix compressed name
  35 * as key and simple extent descriptor as data.
  36 *
  37 * each directory page maintains a sorted entry index table
  38 * which stores the start slot index of sorted entries
  39 * to allow binary search on the table.
  40 *
  41 * directory starts as a root/leaf page in on-disk inode
  42 * inline data area.
  43 * when it becomes full, it starts a leaf of a external extent
  44 * of length of 1 block. each time the first leaf becomes full,
  45 * it is extended rather than split (its size is doubled),
  46 * until its length becoms 4 KBytes, from then the extent is split
  47 * with new 4 Kbyte extent when it becomes full
  48 * to reduce external fragmentation of small directories.
  49 *
  50 * blah, blah, blah, for linear scan of directory in pieces by
  51 * readdir().
  52 *
  53 *
  54 *      case-insensitive directory file system
  55 *
  56 * names are stored in case-sensitive way in leaf entry.
  57 * but stored, searched and compared in case-insensitive (uppercase) order
  58 * (i.e., both search key and entry key are folded for search/compare):
  59 * (note that case-sensitive order is BROKEN in storage, e.g.,
  60 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
  61 *
  62 *  entries which folds to the same key makes up a equivalent class
  63 *  whose members are stored as contiguous cluster (may cross page boundary)
  64 *  but whose order is arbitrary and acts as duplicate, e.g.,
  65 *  abc, Abc, aBc, abC)
  66 *
  67 * once match is found at leaf, requires scan forward/backward
  68 * either for, in case-insensitive search, duplicate
  69 * or for, in case-sensitive search, for exact match
  70 *
  71 * router entry must be created/stored in case-insensitive way
  72 * in internal entry:
  73 * (right most key of left page and left most key of right page
  74 * are folded, and its suffix compression is propagated as router
  75 * key in parent)
  76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
  77 * should be made the router key for the split)
  78 *
  79 * case-insensitive search:
  80 *
  81 *      fold search key;
  82 *
  83 *      case-insensitive search of B-tree:
  84 *      for internal entry, router key is already folded;
  85 *      for leaf entry, fold the entry key before comparison.
  86 *
  87 *      if (leaf entry case-insensitive match found)
  88 *              if (next entry satisfies case-insensitive match)
  89 *                      return EDUPLICATE;
  90 *              if (prev entry satisfies case-insensitive match)
  91 *                      return EDUPLICATE;
  92 *              return match;
  93 *      else
  94 *              return no match;
  95 *
  96 *      serialization:
  97 * target directory inode lock is being held on entry/exit
  98 * of all main directory service routines.
  99 *
 100 *      log based recovery:
 101 */
 102
 103#include <linux/fs.h>
 104#include <linux/quotaops.h>
 105#include <linux/slab.h>
 106#include "jfs_incore.h"
 107#include "jfs_superblock.h"
 108#include "jfs_filsys.h"
 109#include "jfs_metapage.h"
 110#include "jfs_dmap.h"
 111#include "jfs_unicode.h"
 112#include "jfs_debug.h"
 113
 114/* dtree split parameter */
 115struct dtsplit {
 116        struct metapage *mp;
 117        s16 index;
 118        s16 nslot;
 119        struct component_name *key;
 120        ddata_t *data;
 121        struct pxdlist *pxdlist;
 122};
 123
 124#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
 125
 126/* get page buffer for specified block address */
 127#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
 128{\
 129        BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\
 130        if (!(RC))\
 131        {\
 132                if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\
 133                    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\
 134                {\
 135                        BT_PUTPAGE(MP);\
 136                        jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\
 137                        MP = NULL;\
 138                        RC = -EIO;\
 139                }\
 140        }\
 141}
 142
 143/* for consistency */
 144#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
 145
 146#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
 147        BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
 148
 149/*
 150 * forward references
 151 */
 152static int dtSplitUp(tid_t tid, struct inode *ip,
 153                     struct dtsplit * split, struct btstack * btstack);
 154
 155static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 156                       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
 157
 158static int dtExtendPage(tid_t tid, struct inode *ip,
 159                        struct dtsplit * split, struct btstack * btstack);
 160
 161static int dtSplitRoot(tid_t tid, struct inode *ip,
 162                       struct dtsplit * split, struct metapage ** rmpp);
 163
 164static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 165                      dtpage_t * fp, struct btstack * btstack);
 166
 167static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
 168
 169static int dtReadFirst(struct inode *ip, struct btstack * btstack);
 170
 171static int dtReadNext(struct inode *ip,
 172                      loff_t * offset, struct btstack * btstack);
 173
 174static int dtCompare(struct component_name * key, dtpage_t * p, int si);
 175
 176static int ciCompare(struct component_name * key, dtpage_t * p, int si,
 177                     int flag);
 178
 179static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
 180                     int flag);
 181
 182static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
 183                              int ri, struct component_name * key, int flag);
 184
 185static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
 186                          ddata_t * data, struct dt_lock **);
 187
 188static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
 189                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
 190                        int do_index);
 191
 192static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
 193
 194static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
 195
 196static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
 197
 198#define ciToUpper(c)    UniStrupr((c)->name)
 199
 200/*
 201 *      read_index_page()
 202 *
 203 *      Reads a page of a directory's index table.
 204 *      Having metadata mapped into the directory inode's address space
 205 *      presents a multitude of problems.  We avoid this by mapping to
 206 *      the absolute address space outside of the *_metapage routines
 207 */
 208static struct metapage *read_index_page(struct inode *inode, s64 blkno)
 209{
 210        int rc;
 211        s64 xaddr;
 212        int xflag;
 213        s32 xlen;
 214
 215        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 216        if (rc || (xaddr == 0))
 217                return NULL;
 218
 219        return read_metapage(inode, xaddr, PSIZE, 1);
 220}
 221
 222/*
 223 *      get_index_page()
 224 *
 225 *      Same as get_index_page(), but get's a new page without reading
 226 */
 227static struct metapage *get_index_page(struct inode *inode, s64 blkno)
 228{
 229        int rc;
 230        s64 xaddr;
 231        int xflag;
 232        s32 xlen;
 233
 234        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 235        if (rc || (xaddr == 0))
 236                return NULL;
 237
 238        return get_metapage(inode, xaddr, PSIZE, 1);
 239}
 240
 241/*
 242 *      find_index()
 243 *
 244 *      Returns dtree page containing directory table entry for specified
 245 *      index and pointer to its entry.
 246 *
 247 *      mp must be released by caller.
 248 */
 249static struct dir_table_slot *find_index(struct inode *ip, u32 index,
 250                                         struct metapage ** mp, s64 *lblock)
 251{
 252        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 253        s64 blkno;
 254        s64 offset;
 255        int page_offset;
 256        struct dir_table_slot *slot;
 257        static int maxWarnings = 10;
 258
 259        if (index < 2) {
 260                if (maxWarnings) {
 261                        jfs_warn("find_entry called with index = %d", index);
 262                        maxWarnings--;
 263                }
 264                return NULL;
 265        }
 266
 267        if (index >= jfs_ip->next_index) {
 268                jfs_warn("find_entry called with index >= next_index");
 269                return NULL;
 270        }
 271
 272        if (jfs_dirtable_inline(ip)) {
 273                /*
 274                 * Inline directory table
 275                 */
 276                *mp = NULL;
 277                slot = &jfs_ip->i_dirtable[index - 2];
 278        } else {
 279                offset = (index - 2) * sizeof(struct dir_table_slot);
 280                page_offset = offset & (PSIZE - 1);
 281                blkno = ((offset + 1) >> L2PSIZE) <<
 282                    JFS_SBI(ip->i_sb)->l2nbperpage;
 283
 284                if (*mp && (*lblock != blkno)) {
 285                        release_metapage(*mp);
 286                        *mp = NULL;
 287                }
 288                if (!(*mp)) {
 289                        *lblock = blkno;
 290                        *mp = read_index_page(ip, blkno);
 291                }
 292                if (!(*mp)) {
 293                        jfs_err("free_index: error reading directory table");
 294                        return NULL;
 295                }
 296
 297                slot =
 298                    (struct dir_table_slot *) ((char *) (*mp)->data +
 299                                               page_offset);
 300        }
 301        return slot;
 302}
 303
 304static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
 305                              u32 index)
 306{
 307        struct tlock *tlck;
 308        struct linelock *llck;
 309        struct lv *lv;
 310
 311        tlck = txLock(tid, ip, mp, tlckDATA);
 312        llck = (struct linelock *) tlck->lock;
 313
 314        if (llck->index >= llck->maxcnt)
 315                llck = txLinelock(llck);
 316        lv = &llck->lv[llck->index];
 317
 318        /*
 319         *      Linelock slot size is twice the size of directory table
 320         *      slot size.  512 entries per page.
 321         */
 322        lv->offset = ((index - 2) & 511) >> 1;
 323        lv->length = 1;
 324        llck->index++;
 325}
 326
 327/*
 328 *      add_index()
 329 *
 330 *      Adds an entry to the directory index table.  This is used to provide
 331 *      each directory entry with a persistent index in which to resume
 332 *      directory traversals
 333 */
 334static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 335{
 336        struct super_block *sb = ip->i_sb;
 337        struct jfs_sb_info *sbi = JFS_SBI(sb);
 338        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 339        u64 blkno;
 340        struct dir_table_slot *dirtab_slot;
 341        u32 index;
 342        struct linelock *llck;
 343        struct lv *lv;
 344        struct metapage *mp;
 345        s64 offset;
 346        uint page_offset;
 347        struct tlock *tlck;
 348        s64 xaddr;
 349
 350        ASSERT(DO_INDEX(ip));
 351
 352        if (jfs_ip->next_index < 2) {
 353                jfs_warn("add_index: next_index = %d.  Resetting!",
 354                           jfs_ip->next_index);
 355                jfs_ip->next_index = 2;
 356        }
 357
 358        index = jfs_ip->next_index++;
 359
 360        if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
 361                /*
 362                 * i_size reflects size of index table, or 8 bytes per entry.
 363                 */
 364                ip->i_size = (loff_t) (index - 1) << 3;
 365
 366                /*
 367                 * dir table fits inline within inode
 368                 */
 369                dirtab_slot = &jfs_ip->i_dirtable[index-2];
 370                dirtab_slot->flag = DIR_INDEX_VALID;
 371                dirtab_slot->slot = slot;
 372                DTSaddress(dirtab_slot, bn);
 373
 374                set_cflag(COMMIT_Dirtable, ip);
 375
 376                return index;
 377        }
 378        if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
 379                struct dir_table_slot temp_table[12];
 380
 381                /*
 382                 * It's time to move the inline table to an external
 383                 * page and begin to build the xtree
 384                 */
 385                if (dquot_alloc_block(ip, sbi->nbperpage))
 386                        goto clean_up;
 387                if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
 388                        dquot_free_block(ip, sbi->nbperpage);
 389                        goto clean_up;
 390                }
 391
 392                /*
 393                 * Save the table, we're going to overwrite it with the
 394                 * xtree root
 395                 */
 396                memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
 397
 398                /*
 399                 * Initialize empty x-tree
 400                 */
 401                xtInitRoot(tid, ip);
 402
 403                /*
 404                 * Add the first block to the xtree
 405                 */
 406                if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
 407                        /* This really shouldn't fail */
 408                        jfs_warn("add_index: xtInsert failed!");
 409                        memcpy(&jfs_ip->i_dirtable, temp_table,
 410                               sizeof (temp_table));
 411                        dbFree(ip, xaddr, sbi->nbperpage);
 412                        dquot_free_block(ip, sbi->nbperpage);
 413                        goto clean_up;
 414                }
 415                ip->i_size = PSIZE;
 416
 417                mp = get_index_page(ip, 0);
 418                if (!mp) {
 419                        jfs_err("add_index: get_metapage failed!");
 420                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
 421                        memcpy(&jfs_ip->i_dirtable, temp_table,
 422                               sizeof (temp_table));
 423                        goto clean_up;
 424                }
 425                tlck = txLock(tid, ip, mp, tlckDATA);
 426                llck = (struct linelock *) & tlck->lock;
 427                ASSERT(llck->index == 0);
 428                lv = &llck->lv[0];
 429
 430                lv->offset = 0;
 431                lv->length = 6; /* tlckDATA slot size is 16 bytes */
 432                llck->index++;
 433
 434                memcpy(mp->data, temp_table, sizeof(temp_table));
 435
 436                mark_metapage_dirty(mp);
 437                release_metapage(mp);
 438
 439                /*
 440                 * Logging is now directed by xtree tlocks
 441                 */
 442                clear_cflag(COMMIT_Dirtable, ip);
 443        }
 444
 445        offset = (index - 2) * sizeof(struct dir_table_slot);
 446        page_offset = offset & (PSIZE - 1);
 447        blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
 448        if (page_offset == 0) {
 449                /*
 450                 * This will be the beginning of a new page
 451                 */
 452                xaddr = 0;
 453                if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
 454                        jfs_warn("add_index: xtInsert failed!");
 455                        goto clean_up;
 456                }
 457                ip->i_size += PSIZE;
 458
 459                if ((mp = get_index_page(ip, blkno)))
 460                        memset(mp->data, 0, PSIZE);     /* Just looks better */
 461                else
 462                        xtTruncate(tid, ip, offset, COMMIT_PWMAP);
 463        } else
 464                mp = read_index_page(ip, blkno);
 465
 466        if (!mp) {
 467                jfs_err("add_index: get/read_metapage failed!");
 468                goto clean_up;
 469        }
 470
 471        lock_index(tid, ip, mp, index);
 472
 473        dirtab_slot =
 474            (struct dir_table_slot *) ((char *) mp->data + page_offset);
 475        dirtab_slot->flag = DIR_INDEX_VALID;
 476        dirtab_slot->slot = slot;
 477        DTSaddress(dirtab_slot, bn);
 478
 479        mark_metapage_dirty(mp);
 480        release_metapage(mp);
 481
 482        return index;
 483
 484      clean_up:
 485
 486        jfs_ip->next_index--;
 487
 488        return 0;
 489}
 490
 491/*
 492 *      free_index()
 493 *
 494 *      Marks an entry to the directory index table as free.
 495 */
 496static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
 497{
 498        struct dir_table_slot *dirtab_slot;
 499        s64 lblock;
 500        struct metapage *mp = NULL;
 501
 502        dirtab_slot = find_index(ip, index, &mp, &lblock);
 503
 504        if (!dirtab_slot)
 505                return;
 506
 507        dirtab_slot->flag = DIR_INDEX_FREE;
 508        dirtab_slot->slot = dirtab_slot->addr1 = 0;
 509        dirtab_slot->addr2 = cpu_to_le32(next);
 510
 511        if (mp) {
 512                lock_index(tid, ip, mp, index);
 513                mark_metapage_dirty(mp);
 514                release_metapage(mp);
 515        } else
 516                set_cflag(COMMIT_Dirtable, ip);
 517}
 518
 519/*
 520 *      modify_index()
 521 *
 522 *      Changes an entry in the directory index table
 523 */
 524static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
 525                         int slot, struct metapage ** mp, s64 *lblock)
 526{
 527        struct dir_table_slot *dirtab_slot;
 528
 529        dirtab_slot = find_index(ip, index, mp, lblock);
 530
 531        if (!dirtab_slot)
 532                return;
 533
 534        DTSaddress(dirtab_slot, bn);
 535        dirtab_slot->slot = slot;
 536
 537        if (*mp) {
 538                lock_index(tid, ip, *mp, index);
 539                mark_metapage_dirty(*mp);
 540        } else
 541                set_cflag(COMMIT_Dirtable, ip);
 542}
 543
 544/*
 545 *      read_index()
 546 *
 547 *      reads a directory table slot
 548 */
 549static int read_index(struct inode *ip, u32 index,
 550                     struct dir_table_slot * dirtab_slot)
 551{
 552        s64 lblock;
 553        struct metapage *mp = NULL;
 554        struct dir_table_slot *slot;
 555
 556        slot = find_index(ip, index, &mp, &lblock);
 557        if (!slot) {
 558                return -EIO;
 559        }
 560
 561        memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
 562
 563        if (mp)
 564                release_metapage(mp);
 565
 566        return 0;
 567}
 568
 569/*
 570 *      dtSearch()
 571 *
 572 * function:
 573 *      Search for the entry with specified key
 574 *
 575 * parameter:
 576 *
 577 * return: 0 - search result on stack, leaf page pinned;
 578 *         errno - I/O error
 579 */
 580int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
 581             struct btstack * btstack, int flag)
 582{
 583        int rc = 0;
 584        int cmp = 1;            /* init for empty page */
 585        s64 bn;
 586        struct metapage *mp;
 587        dtpage_t *p;
 588        s8 *stbl;
 589        int base, index, lim;
 590        struct btframe *btsp;
 591        pxd_t *pxd;
 592        int psize = 288;        /* initial in-line directory */
 593        ino_t inumber;
 594        struct component_name ciKey;
 595        struct super_block *sb = ip->i_sb;
 596
 597        ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS);
 598        if (!ciKey.name) {
 599                rc = -ENOMEM;
 600                goto dtSearch_Exit2;
 601        }
 602
 603
 604        /* uppercase search key for c-i directory */
 605        UniStrcpy(ciKey.name, key->name);
 606        ciKey.namlen = key->namlen;
 607
 608        /* only uppercase if case-insensitive support is on */
 609        if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
 610                ciToUpper(&ciKey);
 611        }
 612        BT_CLR(btstack);        /* reset stack */
 613
 614        /* init level count for max pages to split */
 615        btstack->nsplit = 1;
 616
 617        /*
 618         *      search down tree from root:
 619         *
 620         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 621         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 622         *
 623         * if entry with search key K is not found
 624         * internal page search find the entry with largest key Ki
 625         * less than K which point to the child page to search;
 626         * leaf page search find the entry with smallest key Kj
 627         * greater than K so that the returned index is the position of
 628         * the entry to be shifted right for insertion of new entry.
 629         * for empty tree, search key is greater than any key of the tree.
 630         *
 631         * by convention, root bn = 0.
 632         */
 633        for (bn = 0;;) {
 634                /* get/pin the page to search */
 635                DT_GETPAGE(ip, bn, mp, psize, p, rc);
 636                if (rc)
 637                        goto dtSearch_Exit1;
 638
 639                /* get sorted entry table of the page */
 640                stbl = DT_GETSTBL(p);
 641
 642                /*
 643                 * binary search with search key K on the current page.
 644                 */
 645                for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
 646                        index = base + (lim >> 1);
 647
 648                        if (p->header.flag & BT_LEAF) {
 649                                /* uppercase leaf name to compare */
 650                                cmp =
 651                                    ciCompare(&ciKey, p, stbl[index],
 652                                              JFS_SBI(sb)->mntflag);
 653                        } else {
 654                                /* router key is in uppercase */
 655
 656                                cmp = dtCompare(&ciKey, p, stbl[index]);
 657
 658
 659                        }
 660                        if (cmp == 0) {
 661                                /*
 662                                 *      search hit
 663                                 */
 664                                /* search hit - leaf page:
 665                                 * return the entry found
 666                                 */
 667                                if (p->header.flag & BT_LEAF) {
 668                                        inumber = le32_to_cpu(
 669                        ((struct ldtentry *) & p->slot[stbl[index]])->inumber);
 670
 671                                        /*
 672                                         * search for JFS_LOOKUP
 673                                         */
 674                                        if (flag == JFS_LOOKUP) {
 675                                                *data = inumber;
 676                                                rc = 0;
 677                                                goto out;
 678                                        }
 679
 680                                        /*
 681                                         * search for JFS_CREATE
 682                                         */
 683                                        if (flag == JFS_CREATE) {
 684                                                *data = inumber;
 685                                                rc = -EEXIST;
 686                                                goto out;
 687                                        }
 688
 689                                        /*
 690                                         * search for JFS_REMOVE or JFS_RENAME
 691                                         */
 692                                        if ((flag == JFS_REMOVE ||
 693                                             flag == JFS_RENAME) &&
 694                                            *data != inumber) {
 695                                                rc = -ESTALE;
 696                                                goto out;
 697                                        }
 698
 699                                        /*
 700                                         * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
 701                                         */
 702                                        /* save search result */
 703                                        *data = inumber;
 704                                        btsp = btstack->top;
 705                                        btsp->bn = bn;
 706                                        btsp->index = index;
 707                                        btsp->mp = mp;
 708
 709                                        rc = 0;
 710                                        goto dtSearch_Exit1;
 711                                }
 712
 713                                /* search hit - internal page:
 714                                 * descend/search its child page
 715                                 */
 716                                goto getChild;
 717                        }
 718
 719                        if (cmp > 0) {
 720                                base = index + 1;
 721                                --lim;
 722                        }
 723                }
 724
 725                /*
 726                 *      search miss
 727                 *
 728                 * base is the smallest index with key (Kj) greater than
 729                 * search key (K) and may be zero or (maxindex + 1) index.
 730                 */
 731                /*
 732                 * search miss - leaf page
 733                 *
 734                 * return location of entry (base) where new entry with
 735                 * search key K is to be inserted.
 736                 */
 737                if (p->header.flag & BT_LEAF) {
 738                        /*
 739                         * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
 740                         */
 741                        if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
 742                            flag == JFS_RENAME) {
 743                                rc = -ENOENT;
 744                                goto out;
 745                        }
 746
 747                        /*
 748                         * search for JFS_CREATE|JFS_FINDDIR:
 749                         *
 750                         * save search result
 751                         */
 752                        *data = 0;
 753                        btsp = btstack->top;
 754                        btsp->bn = bn;
 755                        btsp->index = base;
 756                        btsp->mp = mp;
 757
 758                        rc = 0;
 759                        goto dtSearch_Exit1;
 760                }
 761
 762                /*
 763                 * search miss - internal page
 764                 *
 765                 * if base is non-zero, decrement base by one to get the parent
 766                 * entry of the child page to search.
 767                 */
 768                index = base ? base - 1 : base;
 769
 770                /*
 771                 * go down to child page
 772                 */
 773              getChild:
 774                /* update max. number of pages to split */
 775                if (BT_STACK_FULL(btstack)) {
 776                        /* Something's corrupted, mark filesystem dirty so
 777                         * chkdsk will fix it.
 778                         */
 779                        jfs_error(sb, "stack overrun in dtSearch!");
 780                        BT_STACK_DUMP(btstack);
 781                        rc = -EIO;
 782                        goto out;
 783                }
 784                btstack->nsplit++;
 785
 786                /* push (bn, index) of the parent page/entry */
 787                BT_PUSH(btstack, bn, index);
 788
 789                /* get the child page block number */
 790                pxd = (pxd_t *) & p->slot[stbl[index]];
 791                bn = addressPXD(pxd);
 792                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
 793
 794                /* unpin the parent page */
 795                DT_PUTPAGE(mp);
 796        }
 797
 798      out:
 799        DT_PUTPAGE(mp);
 800
 801      dtSearch_Exit1:
 802
 803        kfree(ciKey.name);
 804
 805      dtSearch_Exit2:
 806
 807        return rc;
 808}
 809
 810
 811/*
 812 *      dtInsert()
 813 *
 814 * function: insert an entry to directory tree
 815 *
 816 * parameter:
 817 *
 818 * return: 0 - success;
 819 *         errno - failure;
 820 */
 821int dtInsert(tid_t tid, struct inode *ip,
 822         struct component_name * name, ino_t * fsn, struct btstack * btstack)
 823{
 824        int rc = 0;
 825        struct metapage *mp;    /* meta-page buffer */
 826        dtpage_t *p;            /* base B+-tree index page */
 827        s64 bn;
 828        int index;
 829        struct dtsplit split;   /* split information */
 830        ddata_t data;
 831        struct dt_lock *dtlck;
 832        int n;
 833        struct tlock *tlck;
 834        struct lv *lv;
 835
 836        /*
 837         *      retrieve search result
 838         *
 839         * dtSearch() returns (leaf page pinned, index at which to insert).
 840         * n.b. dtSearch() may return index of (maxindex + 1) of
 841         * the full page.
 842         */
 843        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
 844
 845        /*
 846         *      insert entry for new key
 847         */
 848        if (DO_INDEX(ip)) {
 849                if (JFS_IP(ip)->next_index == DIREND) {
 850                        DT_PUTPAGE(mp);
 851                        return -EMLINK;
 852                }
 853                n = NDTLEAF(name->namlen);
 854                data.leaf.tid = tid;
 855                data.leaf.ip = ip;
 856        } else {
 857                n = NDTLEAF_LEGACY(name->namlen);
 858                data.leaf.ip = NULL;    /* signifies legacy directory format */
 859        }
 860        data.leaf.ino = *fsn;
 861
 862        /*
 863         *      leaf page does not have enough room for new entry:
 864         *
 865         *      extend/split the leaf page;
 866         *
 867         * dtSplitUp() will insert the entry and unpin the leaf page.
 868         */
 869        if (n > p->header.freecnt) {
 870                split.mp = mp;
 871                split.index = index;
 872                split.nslot = n;
 873                split.key = name;
 874                split.data = &data;
 875                rc = dtSplitUp(tid, ip, &split, btstack);
 876                return rc;
 877        }
 878
 879        /*
 880         *      leaf page does have enough room for new entry:
 881         *
 882         *      insert the new data entry into the leaf page;
 883         */
 884        BT_MARK_DIRTY(mp, ip);
 885        /*
 886         * acquire a transaction lock on the leaf page
 887         */
 888        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
 889        dtlck = (struct dt_lock *) & tlck->lock;
 890        ASSERT(dtlck->index == 0);
 891        lv = & dtlck->lv[0];
 892
 893        /* linelock header */
 894        lv->offset = 0;
 895        lv->length = 1;
 896        dtlck->index++;
 897
 898        dtInsertEntry(p, index, name, &data, &dtlck);
 899
 900        /* linelock stbl of non-root leaf page */
 901        if (!(p->header.flag & BT_ROOT)) {
 902                if (dtlck->index >= dtlck->maxcnt)
 903                        dtlck = (struct dt_lock *) txLinelock(dtlck);
 904                lv = & dtlck->lv[dtlck->index];
 905                n = index >> L2DTSLOTSIZE;
 906                lv->offset = p->header.stblindex + n;
 907                lv->length =
 908                    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
 909                dtlck->index++;
 910        }
 911
 912        /* unpin the leaf page */
 913        DT_PUTPAGE(mp);
 914
 915        return 0;
 916}
 917
 918
 919/*
 920 *      dtSplitUp()
 921 *
 922 * function: propagate insertion bottom up;
 923 *
 924 * parameter:
 925 *
 926 * return: 0 - success;
 927 *         errno - failure;
 928 *      leaf page unpinned;
 929 */
 930static int dtSplitUp(tid_t tid,
 931          struct inode *ip, struct dtsplit * split, struct btstack * btstack)
 932{
 933        struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 934        int rc = 0;
 935        struct metapage *smp;
 936        dtpage_t *sp;           /* split page */
 937        struct metapage *rmp;
 938        dtpage_t *rp;           /* new right page split from sp */
 939        pxd_t rpxd;             /* new right page extent descriptor */
 940        struct metapage *lmp;
 941        dtpage_t *lp;           /* left child page */
 942        int skip;               /* index of entry of insertion */
 943        struct btframe *parent; /* parent page entry on traverse stack */
 944        s64 xaddr, nxaddr;
 945        int xlen, xsize;
 946        struct pxdlist pxdlist;
 947        pxd_t *pxd;
 948        struct component_name key = { 0, NULL };
 949        ddata_t *data = split->data;
 950        int n;
 951        struct dt_lock *dtlck;
 952        struct tlock *tlck;
 953        struct lv *lv;
 954        int quota_allocation = 0;
 955
 956        /* get split page */
 957        smp = split->mp;
 958        sp = DT_PAGE(ip, smp);
 959
 960        key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS);
 961        if (!key.name) {
 962                DT_PUTPAGE(smp);
 963                rc = -ENOMEM;
 964                goto dtSplitUp_Exit;
 965        }
 966
 967        /*
 968         *      split leaf page
 969         *
 970         * The split routines insert the new entry, and
 971         * acquire txLock as appropriate.
 972         */
 973        /*
 974         *      split root leaf page:
 975         */
 976        if (sp->header.flag & BT_ROOT) {
 977                /*
 978                 * allocate a single extent child page
 979                 */
 980                xlen = 1;
 981                n = sbi->bsize >> L2DTSLOTSIZE;
 982                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
 983                n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
 984                if (n <= split->nslot)
 985                        xlen++;
 986                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
 987                        DT_PUTPAGE(smp);
 988                        goto freeKeyName;
 989                }
 990
 991                pxdlist.maxnpxd = 1;
 992                pxdlist.npxd = 0;
 993                pxd = &pxdlist.pxd[0];
 994                PXDaddress(pxd, xaddr);
 995                PXDlength(pxd, xlen);
 996                split->pxdlist = &pxdlist;
 997                rc = dtSplitRoot(tid, ip, split, &rmp);
 998
 999                if (rc)
1000                        dbFree(ip, xaddr, xlen);
1001                else
1002                        DT_PUTPAGE(rmp);
1003
1004                DT_PUTPAGE(smp);
1005
1006                if (!DO_INDEX(ip))
1007                        ip->i_size = xlen << sbi->l2bsize;
1008
1009                goto freeKeyName;
1010        }
1011
1012        /*
1013         *      extend first leaf page
1014         *
1015         * extend the 1st extent if less than buffer page size
1016         * (dtExtendPage() reurns leaf page unpinned)
1017         */
1018        pxd = &sp->header.self;
1019        xlen = lengthPXD(pxd);
1020        xsize = xlen << sbi->l2bsize;
1021        if (xsize < PSIZE) {
1022                xaddr = addressPXD(pxd);
1023                n = xsize >> L2DTSLOTSIZE;
1024                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
1025                if ((n + sp->header.freecnt) <= split->nslot)
1026                        n = xlen + (xlen << 1);
1027                else
1028                        n = xlen;
1029
1030                /* Allocate blocks to quota. */
1031                rc = dquot_alloc_block(ip, n);
1032                if (rc)
1033                        goto extendOut;
1034                quota_allocation += n;
1035
1036                if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1037                                    (s64) n, &nxaddr)))
1038                        goto extendOut;
1039
1040                pxdlist.maxnpxd = 1;
1041                pxdlist.npxd = 0;
1042                pxd = &pxdlist.pxd[0];
1043                PXDaddress(pxd, nxaddr)
1044                    PXDlength(pxd, xlen + n);
1045                split->pxdlist = &pxdlist;
1046                if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1047                        nxaddr = addressPXD(pxd);
1048                        if (xaddr != nxaddr) {
1049                                /* free relocated extent */
1050                                xlen = lengthPXD(pxd);
1051                                dbFree(ip, nxaddr, (s64) xlen);
1052                        } else {
1053                                /* free extended delta */
1054                                xlen = lengthPXD(pxd) - n;
1055                                xaddr = addressPXD(pxd) + xlen;
1056                                dbFree(ip, xaddr, (s64) n);
1057                        }
1058                } else if (!DO_INDEX(ip))
1059                        ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
1060
1061
1062              extendOut:
1063                DT_PUTPAGE(smp);
1064                goto freeKeyName;
1065        }
1066
1067        /*
1068         *      split leaf page <sp> into <sp> and a new right page <rp>.
1069         *
1070         * return <rp> pinned and its extent descriptor <rpxd>
1071         */
1072        /*
1073         * allocate new directory page extent and
1074         * new index page(s) to cover page split(s)
1075         *
1076         * allocation hint: ?
1077         */
1078        n = btstack->nsplit;
1079        pxdlist.maxnpxd = pxdlist.npxd = 0;
1080        xlen = sbi->nbperpage;
1081        for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1082                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1083                        PXDaddress(pxd, xaddr);
1084                        PXDlength(pxd, xlen);
1085                        pxdlist.maxnpxd++;
1086                        continue;
1087                }
1088
1089                DT_PUTPAGE(smp);
1090
1091                /* undo allocation */
1092                goto splitOut;
1093        }
1094
1095        split->pxdlist = &pxdlist;
1096        if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1097                DT_PUTPAGE(smp);
1098
1099                /* undo allocation */
1100                goto splitOut;
1101        }
1102
1103        if (!DO_INDEX(ip))
1104                ip->i_size += PSIZE;
1105
1106        /*
1107         * propagate up the router entry for the leaf page just split
1108         *
1109         * insert a router entry for the new page into the parent page,
1110         * propagate the insert/split up the tree by walking back the stack
1111         * of (bn of parent page, index of child page entry in parent page)
1112         * that were traversed during the search for the page that split.
1113         *
1114         * the propagation of insert/split up the tree stops if the root
1115         * splits or the page inserted into doesn't have to split to hold
1116         * the new entry.
1117         *
1118         * the parent entry for the split page remains the same, and
1119         * a new entry is inserted at its right with the first key and
1120         * block number of the new right page.
1121         *
1122         * There are a maximum of 4 pages pinned at any time:
1123         * two children, left parent and right parent (when the parent splits).
1124         * keep the child pages pinned while working on the parent.
1125         * make sure that all pins are released at exit.
1126         */
1127        while ((parent = BT_POP(btstack)) != NULL) {
1128                /* parent page specified by stack frame <parent> */
1129
1130                /* keep current child pages (<lp>, <rp>) pinned */
1131                lmp = smp;
1132                lp = sp;
1133
1134                /*
1135                 * insert router entry in parent for new right child page <rp>
1136                 */
1137                /* get the parent page <sp> */
1138                DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1139                if (rc) {
1140                        DT_PUTPAGE(lmp);
1141                        DT_PUTPAGE(rmp);
1142                        goto splitOut;
1143                }
1144
1145                /*
1146                 * The new key entry goes ONE AFTER the index of parent entry,
1147                 * because the split was to the right.
1148                 */
1149                skip = parent->index + 1;
1150
1151                /*
1152                 * compute the key for the router entry
1153                 *
1154                 * key suffix compression:
1155                 * for internal pages that have leaf pages as children,
1156                 * retain only what's needed to distinguish between
1157                 * the new entry and the entry on the page to its left.
1158                 * If the keys compare equal, retain the entire key.
1159                 *
1160                 * note that compression is performed only at computing
1161                 * router key at the lowest internal level.
1162                 * further compression of the key between pairs of higher
1163                 * level internal pages loses too much information and
1164                 * the search may fail.
1165                 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1166                 * results in two adjacent parent entries (a)(xx).
1167                 * if split occurs between these two entries, and
1168                 * if compression is applied, the router key of parent entry
1169                 * of right page (x) will divert search for x into right
1170                 * subtree and miss x in the left subtree.)
1171                 *
1172                 * the entire key must be retained for the next-to-leftmost
1173                 * internal key at any level of the tree, or search may fail
1174                 * (e.g., ?)
1175                 */
1176                switch (rp->header.flag & BT_TYPE) {
1177                case BT_LEAF:
1178                        /*
1179                         * compute the length of prefix for suffix compression
1180                         * between last entry of left page and first entry
1181                         * of right page
1182                         */
1183                        if ((sp->header.flag & BT_ROOT && skip > 1) ||
1184                            sp->header.prev != 0 || skip > 1) {
1185                                /* compute uppercase router prefix key */
1186                                rc = ciGetLeafPrefixKey(lp,
1187                                                        lp->header.nextindex-1,
1188                                                        rp, 0, &key,
1189                                                        sbi->mntflag);
1190                                if (rc) {
1191                                        DT_PUTPAGE(lmp);
1192                                        DT_PUTPAGE(rmp);
1193                                        DT_PUTPAGE(smp);
1194                                        goto splitOut;
1195                                }
1196                        } else {
1197                                /* next to leftmost entry of
1198                                   lowest internal level */
1199
1200                                /* compute uppercase router key */
1201                                dtGetKey(rp, 0, &key, sbi->mntflag);
1202                                key.name[key.namlen] = 0;
1203
1204                                if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1205                                        ciToUpper(&key);
1206                        }
1207
1208                        n = NDTINTERNAL(key.namlen);
1209                        break;
1210
1211                case BT_INTERNAL:
1212                        dtGetKey(rp, 0, &key, sbi->mntflag);
1213                        n = NDTINTERNAL(key.namlen);
1214                        break;
1215
1216                default:
1217                        jfs_err("dtSplitUp(): UFO!");
1218                        break;
1219                }
1220
1221                /* unpin left child page */
1222                DT_PUTPAGE(lmp);
1223
1224                /*
1225                 * compute the data for the router entry
1226                 */
1227                data->xd = rpxd;        /* child page xd */
1228
1229                /*
1230                 * parent page is full - split the parent page
1231                 */
1232                if (n > sp->header.freecnt) {
1233                        /* init for parent page split */
1234                        split->mp = smp;
1235                        split->index = skip;    /* index at insert */
1236                        split->nslot = n;
1237                        split->key = &key;
1238                        /* split->data = data; */
1239
1240                        /* unpin right child page */
1241                        DT_PUTPAGE(rmp);
1242
1243                        /* The split routines insert the new entry,
1244                         * acquire txLock as appropriate.
1245                         * return <rp> pinned and its block number <rbn>.
1246                         */
1247                        rc = (sp->header.flag & BT_ROOT) ?
1248                            dtSplitRoot(tid, ip, split, &rmp) :
1249                            dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1250                        if (rc) {
1251                                DT_PUTPAGE(smp);
1252                                goto splitOut;
1253                        }
1254
1255                        /* smp and rmp are pinned */
1256                }
1257                /*
1258                 * parent page is not full - insert router entry in parent page
1259                 */
1260                else {
1261                        BT_MARK_DIRTY(smp, ip);
1262                        /*
1263                         * acquire a transaction lock on the parent page
1264                         */
1265                        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1266                        dtlck = (struct dt_lock *) & tlck->lock;
1267                        ASSERT(dtlck->index == 0);
1268                        lv = & dtlck->lv[0];
1269
1270                        /* linelock header */
1271                        lv->offset = 0;
1272                        lv->length = 1;
1273                        dtlck->index++;
1274
1275                        /* linelock stbl of non-root parent page */
1276                        if (!(sp->header.flag & BT_ROOT)) {
1277                                lv++;
1278                                n = skip >> L2DTSLOTSIZE;
1279                                lv->offset = sp->header.stblindex + n;
1280                                lv->length =
1281                                    ((sp->header.nextindex -
1282                                      1) >> L2DTSLOTSIZE) - n + 1;
1283                                dtlck->index++;
1284                        }
1285
1286                        dtInsertEntry(sp, skip, &key, data, &dtlck);
1287
1288                        /* exit propagate up */
1289                        break;
1290                }
1291        }
1292
1293        /* unpin current split and its right page */
1294        DT_PUTPAGE(smp);
1295        DT_PUTPAGE(rmp);
1296
1297        /*
1298         * free remaining extents allocated for split
1299         */
1300      splitOut:
1301        n = pxdlist.npxd;
1302        pxd = &pxdlist.pxd[n];
1303        for (; n < pxdlist.maxnpxd; n++, pxd++)
1304                dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1305
1306      freeKeyName:
1307        kfree(key.name);
1308
1309        /* Rollback quota allocation */
1310        if (rc && quota_allocation)
1311                dquot_free_block(ip, quota_allocation);
1312
1313      dtSplitUp_Exit:
1314
1315        return rc;
1316}
1317
1318
1319/*
1320 *      dtSplitPage()
1321 *
1322 * function: Split a non-root page of a btree.
1323 *
1324 * parameter:
1325 *
1326 * return: 0 - success;
1327 *         errno - failure;
1328 *      return split and new page pinned;
1329 */
1330static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1331            struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1332{
1333        int rc = 0;
1334        struct metapage *smp;
1335        dtpage_t *sp;
1336        struct metapage *rmp;
1337        dtpage_t *rp;           /* new right page allocated */
1338        s64 rbn;                /* new right page block number */
1339        struct metapage *mp;
1340        dtpage_t *p;
1341        s64 nextbn;
1342        struct pxdlist *pxdlist;
1343        pxd_t *pxd;
1344        int skip, nextindex, half, left, nxt, off, si;
1345        struct ldtentry *ldtentry;
1346        struct idtentry *idtentry;
1347        u8 *stbl;
1348        struct dtslot *f;
1349        int fsi, stblsize;
1350        int n;
1351        struct dt_lock *sdtlck, *rdtlck;
1352        struct tlock *tlck;
1353        struct dt_lock *dtlck;
1354        struct lv *slv, *rlv, *lv;
1355
1356        /* get split page */
1357        smp = split->mp;
1358        sp = DT_PAGE(ip, smp);
1359
1360        /*
1361         * allocate the new right page for the split
1362         */
1363        pxdlist = split->pxdlist;
1364        pxd = &pxdlist->pxd[pxdlist->npxd];
1365        pxdlist->npxd++;
1366        rbn = addressPXD(pxd);
1367        rmp = get_metapage(ip, rbn, PSIZE, 1);
1368        if (rmp == NULL)
1369                return -EIO;
1370
1371        /* Allocate blocks to quota. */
1372        rc = dquot_alloc_block(ip, lengthPXD(pxd));
1373        if (rc) {
1374                release_metapage(rmp);
1375                return rc;
1376        }
1377
1378        jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1379
1380        BT_MARK_DIRTY(rmp, ip);
1381        /*
1382         * acquire a transaction lock on the new right page
1383         */
1384        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1385        rdtlck = (struct dt_lock *) & tlck->lock;
1386
1387        rp = (dtpage_t *) rmp->data;
1388        *rpp = rp;
1389        rp->header.self = *pxd;
1390
1391        BT_MARK_DIRTY(smp, ip);
1392        /*
1393         * acquire a transaction lock on the split page
1394         *
1395         * action:
1396         */
1397        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1398        sdtlck = (struct dt_lock *) & tlck->lock;
1399
1400        /* linelock header of split page */
1401        ASSERT(sdtlck->index == 0);
1402        slv = & sdtlck->lv[0];
1403        slv->offset = 0;
1404        slv->length = 1;
1405        sdtlck->index++;
1406
1407        /*
1408         * initialize/update sibling pointers between sp and rp
1409         */
1410        nextbn = le64_to_cpu(sp->header.next);
1411        rp->header.next = cpu_to_le64(nextbn);
1412        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1413        sp->header.next = cpu_to_le64(rbn);
1414
1415        /*
1416         * initialize new right page
1417         */
1418        rp->header.flag = sp->header.flag;
1419
1420        /* compute sorted entry table at start of extent data area */
1421        rp->header.nextindex = 0;
1422        rp->header.stblindex = 1;
1423
1424        n = PSIZE >> L2DTSLOTSIZE;
1425        rp->header.maxslot = n;
1426        stblsize = (n + 31) >> L2DTSLOTSIZE;    /* in unit of slot */
1427
1428        /* init freelist */
1429        fsi = rp->header.stblindex + stblsize;
1430        rp->header.freelist = fsi;
1431        rp->header.freecnt = rp->header.maxslot - fsi;
1432
1433        /*
1434         *      sequential append at tail: append without split
1435         *
1436         * If splitting the last page on a level because of appending
1437         * a entry to it (skip is maxentry), it's likely that the access is
1438         * sequential. Adding an empty page on the side of the level is less
1439         * work and can push the fill factor much higher than normal.
1440         * If we're wrong it's no big deal, we'll just do the split the right
1441         * way next time.
1442         * (It may look like it's equally easy to do a similar hack for
1443         * reverse sorted data, that is, split the tree left,
1444         * but it's not. Be my guest.)
1445         */
1446        if (nextbn == 0 && split->index == sp->header.nextindex) {
1447                /* linelock header + stbl (first slot) of new page */
1448                rlv = & rdtlck->lv[rdtlck->index];
1449                rlv->offset = 0;
1450                rlv->length = 2;
1451                rdtlck->index++;
1452
1453                /*
1454                 * initialize freelist of new right page
1455                 */
1456                f = &rp->slot[fsi];
1457                for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1458                        f->next = fsi;
1459                f->next = -1;
1460
1461                /* insert entry at the first entry of the new right page */
1462                dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1463
1464                goto out;
1465        }
1466
1467        /*
1468         *      non-sequential insert (at possibly middle page)
1469         */
1470
1471        /*
1472         * update prev pointer of previous right sibling page;
1473         */
1474        if (nextbn != 0) {
1475                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1476                if (rc) {
1477                        discard_metapage(rmp);
1478                        return rc;
1479                }
1480
1481                BT_MARK_DIRTY(mp, ip);
1482                /*
1483                 * acquire a transaction lock on the next page
1484                 */
1485                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1486                jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1487                        tlck, ip, mp);
1488                dtlck = (struct dt_lock *) & tlck->lock;
1489
1490                /* linelock header of previous right sibling page */
1491                lv = & dtlck->lv[dtlck->index];
1492                lv->offset = 0;
1493                lv->length = 1;
1494                dtlck->index++;
1495
1496                p->header.prev = cpu_to_le64(rbn);
1497
1498                DT_PUTPAGE(mp);
1499        }
1500
1501        /*
1502         * split the data between the split and right pages.
1503         */
1504        skip = split->index;
1505        half = (PSIZE >> L2DTSLOTSIZE) >> 1;    /* swag */
1506        left = 0;
1507
1508        /*
1509         *      compute fill factor for split pages
1510         *
1511         * <nxt> traces the next entry to move to rp
1512         * <off> traces the next entry to stay in sp
1513         */
1514        stbl = (u8 *) & sp->slot[sp->header.stblindex];
1515        nextindex = sp->header.nextindex;
1516        for (nxt = off = 0; nxt < nextindex; ++off) {
1517                if (off == skip)
1518                        /* check for fill factor with new entry size */
1519                        n = split->nslot;
1520                else {
1521                        si = stbl[nxt];
1522                        switch (sp->header.flag & BT_TYPE) {
1523                        case BT_LEAF:
1524                                ldtentry = (struct ldtentry *) & sp->slot[si];
1525                                if (DO_INDEX(ip))
1526                                        n = NDTLEAF(ldtentry->namlen);
1527                                else
1528                                        n = NDTLEAF_LEGACY(ldtentry->
1529                                                           namlen);
1530                                break;
1531
1532                        case BT_INTERNAL:
1533                                idtentry = (struct idtentry *) & sp->slot[si];
1534                                n = NDTINTERNAL(idtentry->namlen);
1535                                break;
1536
1537                        default:
1538                                break;
1539                        }
1540
1541                        ++nxt;  /* advance to next entry to move in sp */
1542                }
1543
1544                left += n;
1545                if (left >= half)
1546                        break;
1547        }
1548
1549        /* <nxt> poins to the 1st entry to move */
1550
1551        /*
1552         *      move entries to right page
1553         *
1554         * dtMoveEntry() initializes rp and reserves entry for insertion
1555         *
1556         * split page moved out entries are linelocked;
1557         * new/right page moved in entries are linelocked;
1558         */
1559        /* linelock header + stbl of new right page */
1560        rlv = & rdtlck->lv[rdtlck->index];
1561        rlv->offset = 0;
1562        rlv->length = 5;
1563        rdtlck->index++;
1564
1565        dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1566
1567        sp->header.nextindex = nxt;
1568
1569        /*
1570         * finalize freelist of new right page
1571         */
1572        fsi = rp->header.freelist;
1573        f = &rp->slot[fsi];
1574        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1575                f->next = fsi;
1576        f->next = -1;
1577
1578        /*
1579         * Update directory index table for entries now in right page
1580         */
1581        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1582                s64 lblock;
1583
1584                mp = NULL;
1585                stbl = DT_GETSTBL(rp);
1586                for (n = 0; n < rp->header.nextindex; n++) {
1587                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1588                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1589                                     rbn, n, &mp, &lblock);
1590                }
1591                if (mp)
1592                        release_metapage(mp);
1593        }
1594
1595        /*
1596         * the skipped index was on the left page,
1597         */
1598        if (skip <= off) {
1599                /* insert the new entry in the split page */
1600                dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1601
1602                /* linelock stbl of split page */
1603                if (sdtlck->index >= sdtlck->maxcnt)
1604                        sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1605                slv = & sdtlck->lv[sdtlck->index];
1606                n = skip >> L2DTSLOTSIZE;
1607                slv->offset = sp->header.stblindex + n;
1608                slv->length =
1609                    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1610                sdtlck->index++;
1611        }
1612        /*
1613         * the skipped index was on the right page,
1614         */
1615        else {
1616                /* adjust the skip index to reflect the new position */
1617                skip -= nxt;
1618
1619                /* insert the new entry in the right page */
1620                dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1621        }
1622
1623      out:
1624        *rmpp = rmp;
1625        *rpxdp = *pxd;
1626
1627        return rc;
1628}
1629
1630
1631/*
1632 *      dtExtendPage()
1633 *
1634 * function: extend 1st/only directory leaf page
1635 *
1636 * parameter:
1637 *
1638 * return: 0 - success;
1639 *         errno - failure;
1640 *      return extended page pinned;
1641 */
1642static int dtExtendPage(tid_t tid,
1643             struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1644{
1645        struct super_block *sb = ip->i_sb;
1646        int rc;
1647        struct metapage *smp, *pmp, *mp;
1648        dtpage_t *sp, *pp;
1649        struct pxdlist *pxdlist;
1650        pxd_t *pxd, *tpxd;
1651        int xlen, xsize;
1652        int newstblindex, newstblsize;
1653        int oldstblindex, oldstblsize;
1654        int fsi, last;
1655        struct dtslot *f;
1656        struct btframe *parent;
1657        int n;
1658        struct dt_lock *dtlck;
1659        s64 xaddr, txaddr;
1660        struct tlock *tlck;
1661        struct pxd_lock *pxdlock;
1662        struct lv *lv;
1663        uint type;
1664        struct ldtentry *ldtentry;
1665        u8 *stbl;
1666
1667        /* get page to extend */
1668        smp = split->mp;
1669        sp = DT_PAGE(ip, smp);
1670
1671        /* get parent/root page */
1672        parent = BT_POP(btstack);
1673        DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1674        if (rc)
1675                return (rc);
1676
1677        /*
1678         *      extend the extent
1679         */
1680        pxdlist = split->pxdlist;
1681        pxd = &pxdlist->pxd[pxdlist->npxd];
1682        pxdlist->npxd++;
1683
1684        xaddr = addressPXD(pxd);
1685        tpxd = &sp->header.self;
1686        txaddr = addressPXD(tpxd);
1687        /* in-place extension */
1688        if (xaddr == txaddr) {
1689                type = tlckEXTEND;
1690        }
1691        /* relocation */
1692        else {
1693                type = tlckNEW;
1694
1695                /* save moved extent descriptor for later free */
1696                tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1697                pxdlock = (struct pxd_lock *) & tlck->lock;
1698                pxdlock->flag = mlckFREEPXD;
1699                pxdlock->pxd = sp->header.self;
1700                pxdlock->index = 1;
1701
1702                /*
1703                 * Update directory index table to reflect new page address
1704                 */
1705                if (DO_INDEX(ip)) {
1706                        s64 lblock;
1707
1708                        mp = NULL;
1709                        stbl = DT_GETSTBL(sp);
1710                        for (n = 0; n < sp->header.nextindex; n++) {
1711                                ldtentry =
1712                                    (struct ldtentry *) & sp->slot[stbl[n]];
1713                                modify_index(tid, ip,
1714                                             le32_to_cpu(ldtentry->index),
1715                                             xaddr, n, &mp, &lblock);
1716                        }
1717                        if (mp)
1718                                release_metapage(mp);
1719                }
1720        }
1721
1722        /*
1723         *      extend the page
1724         */
1725        sp->header.self = *pxd;
1726
1727        jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1728
1729        BT_MARK_DIRTY(smp, ip);
1730        /*
1731         * acquire a transaction lock on the extended/leaf page
1732         */
1733        tlck = txLock(tid, ip, smp, tlckDTREE | type);
1734        dtlck = (struct dt_lock *) & tlck->lock;
1735        lv = & dtlck->lv[0];
1736
1737        /* update buffer extent descriptor of extended page */
1738        xlen = lengthPXD(pxd);
1739        xsize = xlen << JFS_SBI(sb)->l2bsize;
1740
1741        /*
1742         * copy old stbl to new stbl at start of extended area
1743         */
1744        oldstblindex = sp->header.stblindex;
1745        oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1746        newstblindex = sp->header.maxslot;
1747        n = xsize >> L2DTSLOTSIZE;
1748        newstblsize = (n + 31) >> L2DTSLOTSIZE;
1749        memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1750               sp->header.nextindex);
1751
1752        /*
1753         * in-line extension: linelock old area of extended page
1754         */
1755        if (type == tlckEXTEND) {
1756                /* linelock header */
1757                lv->offset = 0;
1758                lv->length = 1;
1759                dtlck->index++;
1760                lv++;
1761
1762                /* linelock new stbl of extended page */
1763                lv->offset = newstblindex;
1764                lv->length = newstblsize;
1765        }
1766        /*
1767         * relocation: linelock whole relocated area
1768         */
1769        else {
1770                lv->offset = 0;
1771                lv->length = sp->header.maxslot + newstblsize;
1772        }
1773
1774        dtlck->index++;
1775
1776        sp->header.maxslot = n;
1777        sp->header.stblindex = newstblindex;
1778        /* sp->header.nextindex remains the same */
1779
1780        /*
1781         * add old stbl region at head of freelist
1782         */
1783        fsi = oldstblindex;
1784        f = &sp->slot[fsi];
1785        last = sp->header.freelist;
1786        for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1787                f->next = last;
1788                last = fsi;
1789        }
1790        sp->header.freelist = last;
1791        sp->header.freecnt += oldstblsize;
1792
1793        /*
1794         * append free region of newly extended area at tail of freelist
1795         */
1796        /* init free region of newly extended area */
1797        fsi = n = newstblindex + newstblsize;
1798        f = &sp->slot[fsi];
1799        for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1800                f->next = fsi;
1801        f->next = -1;
1802
1803        /* append new free region at tail of old freelist */
1804        fsi = sp->header.freelist;
1805        if (fsi == -1)
1806                sp->header.freelist = n;
1807        else {
1808                do {
1809                        f = &sp->slot[fsi];
1810                        fsi = f->next;
1811                } while (fsi != -1);
1812
1813                f->next = n;
1814        }
1815
1816        sp->header.freecnt += sp->header.maxslot - n;
1817
1818        /*
1819         * insert the new entry
1820         */
1821        dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1822
1823        BT_MARK_DIRTY(pmp, ip);
1824        /*
1825         * linelock any freeslots residing in old extent
1826         */
1827        if (type == tlckEXTEND) {
1828                n = sp->header.maxslot >> 2;
1829                if (sp->header.freelist < n)
1830                        dtLinelockFreelist(sp, n, &dtlck);
1831        }
1832
1833        /*
1834         *      update parent entry on the parent/root page
1835         */
1836        /*
1837         * acquire a transaction lock on the parent/root page
1838         */
1839        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1840        dtlck = (struct dt_lock *) & tlck->lock;
1841        lv = & dtlck->lv[dtlck->index];
1842
1843        /* linelock parent entry - 1st slot */
1844        lv->offset = 1;
1845        lv->length = 1;
1846        dtlck->index++;
1847
1848        /* update the parent pxd for page extension */
1849        tpxd = (pxd_t *) & pp->slot[1];
1850        *tpxd = *pxd;
1851
1852        DT_PUTPAGE(pmp);
1853        return 0;
1854}
1855
1856
1857/*
1858 *      dtSplitRoot()
1859 *
1860 * function:
1861 *      split the full root page into
1862 *      original/root/split page and new right page
1863 *      i.e., root remains fixed in tree anchor (inode) and
1864 *      the root is copied to a single new right child page
1865 *      since root page << non-root page, and
1866 *      the split root page contains a single entry for the
1867 *      new right child page.
1868 *
1869 * parameter:
1870 *
1871 * return: 0 - success;
1872 *         errno - failure;
1873 *      return new page pinned;
1874 */
1875static int dtSplitRoot(tid_t tid,
1876            struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1877{
1878        struct super_block *sb = ip->i_sb;
1879        struct metapage *smp;
1880        dtroot_t *sp;
1881        struct metapage *rmp;
1882        dtpage_t *rp;
1883        s64 rbn;
1884        int xlen;
1885        int xsize;
1886        struct dtslot *f;
1887        s8 *stbl;
1888        int fsi, stblsize, n;
1889        struct idtentry *s;
1890        pxd_t *ppxd;
1891        struct pxdlist *pxdlist;
1892        pxd_t *pxd;
1893        struct dt_lock *dtlck;
1894        struct tlock *tlck;
1895        struct lv *lv;
1896        int rc;
1897
1898        /* get split root page */
1899        smp = split->mp;
1900        sp = &JFS_IP(ip)->i_dtroot;
1901
1902        /*
1903         *      allocate/initialize a single (right) child page
1904         *
1905         * N.B. at first split, a one (or two) block to fit new entry
1906         * is allocated; at subsequent split, a full page is allocated;
1907         */
1908        pxdlist = split->pxdlist;
1909        pxd = &pxdlist->pxd[pxdlist->npxd];
1910        pxdlist->npxd++;
1911        rbn = addressPXD(pxd);
1912        xlen = lengthPXD(pxd);
1913        xsize = xlen << JFS_SBI(sb)->l2bsize;
1914        rmp = get_metapage(ip, rbn, xsize, 1);
1915        if (!rmp)
1916                return -EIO;
1917
1918        rp = rmp->data;
1919
1920        /* Allocate blocks to quota. */
1921        rc = dquot_alloc_block(ip, lengthPXD(pxd));
1922        if (rc) {
1923                release_metapage(rmp);
1924                return rc;
1925        }
1926
1927        BT_MARK_DIRTY(rmp, ip);
1928        /*
1929         * acquire a transaction lock on the new right page
1930         */
1931        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1932        dtlck = (struct dt_lock *) & tlck->lock;
1933
1934        rp->header.flag =
1935            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1936        rp->header.self = *pxd;
1937
1938        /* initialize sibling pointers */
1939        rp->header.next = 0;
1940        rp->header.prev = 0;
1941
1942        /*
1943         *      move in-line root page into new right page extent
1944         */
1945        /* linelock header + copied entries + new stbl (1st slot) in new page */
1946        ASSERT(dtlck->index == 0);
1947        lv = & dtlck->lv[0];
1948        lv->offset = 0;
1949        lv->length = 10;        /* 1 + 8 + 1 */
1950        dtlck->index++;
1951
1952        n = xsize >> L2DTSLOTSIZE;
1953        rp->header.maxslot = n;
1954        stblsize = (n + 31) >> L2DTSLOTSIZE;
1955
1956        /* copy old stbl to new stbl at start of extended area */
1957        rp->header.stblindex = DTROOTMAXSLOT;
1958        stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1959        memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1960        rp->header.nextindex = sp->header.nextindex;
1961
1962        /* copy old data area to start of new data area */
1963        memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1964
1965        /*
1966         * append free region of newly extended area at tail of freelist
1967         */
1968        /* init free region of newly extended area */
1969        fsi = n = DTROOTMAXSLOT + stblsize;
1970        f = &rp->slot[fsi];
1971        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1972                f->next = fsi;
1973        f->next = -1;
1974
1975        /* append new free region at tail of old freelist */
1976        fsi = sp->header.freelist;
1977        if (fsi == -1)
1978                rp->header.freelist = n;
1979        else {
1980                rp->header.freelist = fsi;
1981
1982                do {
1983                        f = &rp->slot[fsi];
1984                        fsi = f->next;
1985                } while (fsi != -1);
1986
1987                f->next = n;
1988        }
1989
1990        rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1991
1992        /*
1993         * Update directory index table for entries now in right page
1994         */
1995        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1996                s64 lblock;
1997                struct metapage *mp = NULL;
1998                struct ldtentry *ldtentry;
1999
2000                stbl = DT_GETSTBL(rp);
2001                for (n = 0; n < rp->header.nextindex; n++) {
2002                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
2003                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
2004                                     rbn, n, &mp, &lblock);
2005                }
2006                if (mp)
2007                        release_metapage(mp);
2008        }
2009        /*
2010         * insert the new entry into the new right/child page
2011         * (skip index in the new right page will not change)
2012         */
2013        dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2014
2015        /*
2016         *      reset parent/root page
2017         *
2018         * set the 1st entry offset to 0, which force the left-most key
2019         * at any level of the tree to be less than any search key.
2020         *
2021         * The btree comparison code guarantees that the left-most key on any
2022         * level of the tree is never used, so it doesn't need to be filled in.
2023         */
2024        BT_MARK_DIRTY(smp, ip);
2025        /*
2026         * acquire a transaction lock on the root page (in-memory inode)
2027         */
2028        tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
2029        dtlck = (struct dt_lock *) & tlck->lock;
2030
2031        /* linelock root */
2032        ASSERT(dtlck->index == 0);
2033        lv = & dtlck->lv[0];
2034        lv->offset = 0;
2035        lv->length = DTROOTMAXSLOT;
2036        dtlck->index++;
2037
2038        /* update page header of root */
2039        if (sp->header.flag & BT_LEAF) {
2040                sp->header.flag &= ~BT_LEAF;
2041                sp->header.flag |= BT_INTERNAL;
2042        }
2043
2044        /* init the first entry */
2045        s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2046        ppxd = (pxd_t *) s;
2047        *ppxd = *pxd;
2048        s->next = -1;
2049        s->namlen = 0;
2050
2051        stbl = sp->header.stbl;
2052        stbl[0] = DTENTRYSTART;
2053        sp->header.nextindex = 1;
2054
2055        /* init freelist */
2056        fsi = DTENTRYSTART + 1;
2057        f = &sp->slot[fsi];
2058
2059        /* init free region of remaining area */
2060        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2061                f->next = fsi;
2062        f->next = -1;
2063
2064        sp->header.freelist = DTENTRYSTART + 1;
2065        sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2066
2067        *rmpp = rmp;
2068
2069        return 0;
2070}
2071
2072
2073/*
2074 *      dtDelete()
2075 *
2076 * function: delete the entry(s) referenced by a key.
2077 *
2078 * parameter:
2079 *
2080 * return:
2081 */
2082int dtDelete(tid_t tid,
2083         struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2084{
2085        int rc = 0;
2086        s64 bn;
2087        struct metapage *mp, *imp;
2088        dtpage_t *p;
2089        int index;
2090        struct btstack btstack;
2091        struct dt_lock *dtlck;
2092        struct tlock *tlck;
2093        struct lv *lv;
2094        int i;
2095        struct ldtentry *ldtentry;
2096        u8 *stbl;
2097        u32 table_index, next_index;
2098        struct metapage *nmp;
2099        dtpage_t *np;
2100
2101        /*
2102         *      search for the entry to delete:
2103         *
2104         * dtSearch() returns (leaf page pinned, index at which to delete).
2105         */
2106        if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2107                return rc;
2108
2109        /* retrieve search result */
2110        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2111
2112        /*
2113         * We need to find put the index of the next entry into the
2114         * directory index table in order to resume a readdir from this
2115         * entry.
2116         */
2117        if (DO_INDEX(ip)) {
2118                stbl = DT_GETSTBL(p);
2119                ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2120                table_index = le32_to_cpu(ldtentry->index);
2121                if (index == (p->header.nextindex - 1)) {
2122                        /*
2123                         * Last entry in this leaf page
2124                         */
2125                        if ((p->header.flag & BT_ROOT)
2126                            || (p->header.next == 0))
2127                                next_index = -1;
2128                        else {
2129                                /* Read next leaf page */
2130                                DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2131                                           nmp, PSIZE, np, rc);
2132                                if (rc)
2133                                        next_index = -1;
2134                                else {
2135                                        stbl = DT_GETSTBL(np);
2136                                        ldtentry =
2137                                            (struct ldtentry *) & np->
2138                                            slot[stbl[0]];
2139                                        next_index =
2140                                            le32_to_cpu(ldtentry->index);
2141                                        DT_PUTPAGE(nmp);
2142                                }
2143                        }
2144                } else {
2145                        ldtentry =
2146                            (struct ldtentry *) & p->slot[stbl[index + 1]];
2147                        next_index = le32_to_cpu(ldtentry->index);
2148                }
2149                free_index(tid, ip, table_index, next_index);
2150        }
2151        /*
2152         * the leaf page becomes empty, delete the page
2153         */
2154        if (p->header.nextindex == 1) {
2155                /* delete empty page */
2156                rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2157        }
2158        /*
2159         * the leaf page has other entries remaining:
2160         *
2161         * delete the entry from the leaf page.
2162         */
2163        else {
2164                BT_MARK_DIRTY(mp, ip);
2165                /*
2166                 * acquire a transaction lock on the leaf page
2167                 */
2168                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2169                dtlck = (struct dt_lock *) & tlck->lock;
2170
2171                /*
2172                 * Do not assume that dtlck->index will be zero.  During a
2173                 * rename within a directory, this transaction may have
2174                 * modified this page already when adding the new entry.
2175                 */
2176
2177                /* linelock header */
2178                if (dtlck->index >= dtlck->maxcnt)
2179                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2180                lv = & dtlck->lv[dtlck->index];
2181                lv->offset = 0;
2182                lv->length = 1;
2183                dtlck->index++;
2184
2185                /* linelock stbl of non-root leaf page */
2186                if (!(p->header.flag & BT_ROOT)) {
2187                        if (dtlck->index >= dtlck->maxcnt)
2188                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2189                        lv = & dtlck->lv[dtlck->index];
2190                        i = index >> L2DTSLOTSIZE;
2191                        lv->offset = p->header.stblindex + i;
2192                        lv->length =
2193                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2194                            i + 1;
2195                        dtlck->index++;
2196                }
2197
2198                /* free the leaf entry */
2199                dtDeleteEntry(p, index, &dtlck);
2200
2201                /*
2202                 * Update directory index table for entries moved in stbl
2203                 */
2204                if (DO_INDEX(ip) && index < p->header.nextindex) {
2205                        s64 lblock;
2206
2207                        imp = NULL;
2208                        stbl = DT_GETSTBL(p);
2209                        for (i = index; i < p->header.nextindex; i++) {
2210                                ldtentry =
2211                                    (struct ldtentry *) & p->slot[stbl[i]];
2212                                modify_index(tid, ip,
2213                                             le32_to_cpu(ldtentry->index),
2214                                             bn, i, &imp, &lblock);
2215                        }
2216                        if (imp)
2217                                release_metapage(imp);
2218                }
2219
2220                DT_PUTPAGE(mp);
2221        }
2222
2223        return rc;
2224}
2225
2226
2227/*
2228 *      dtDeleteUp()
2229 *
2230 * function:
2231 *      free empty pages as propagating deletion up the tree
2232 *
2233 * parameter:
2234 *
2235 * return:
2236 */
2237static int dtDeleteUp(tid_t tid, struct inode *ip,
2238           struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2239{
2240        int rc = 0;
2241        struct metapage *mp;
2242        dtpage_t *p;
2243        int index, nextindex;
2244        int xlen;
2245        struct btframe *parent;
2246        struct dt_lock *dtlck;
2247        struct tlock *tlck;
2248        struct lv *lv;
2249        struct pxd_lock *pxdlock;
2250        int i;
2251
2252        /*
2253         *      keep the root leaf page which has become empty
2254         */
2255        if (BT_IS_ROOT(fmp)) {
2256                /*
2257                 * reset the root
2258                 *
2259                 * dtInitRoot() acquires txlock on the root
2260                 */
2261                dtInitRoot(tid, ip, PARENT(ip));
2262
2263                DT_PUTPAGE(fmp);
2264
2265                return 0;
2266        }
2267
2268        /*
2269         *      free the non-root leaf page
2270         */
2271        /*
2272         * acquire a transaction lock on the page
2273         *
2274         * write FREEXTENT|NOREDOPAGE log record
2275         * N.B. linelock is overlaid as freed extent descriptor, and
2276         * the buffer page is freed;
2277         */
2278        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2279        pxdlock = (struct pxd_lock *) & tlck->lock;
2280        pxdlock->flag = mlckFREEPXD;
2281        pxdlock->pxd = fp->header.self;
2282        pxdlock->index = 1;
2283
2284        /* update sibling pointers */
2285        if ((rc = dtRelink(tid, ip, fp))) {
2286                BT_PUTPAGE(fmp);
2287                return rc;
2288        }
2289
2290        xlen = lengthPXD(&fp->header.self);
2291
2292        /* Free quota allocation. */
2293        dquot_free_block(ip, xlen);
2294
2295        /* free/invalidate its buffer page */
2296        discard_metapage(fmp);
2297
2298        /*
2299         *      propagate page deletion up the directory tree
2300         *
2301         * If the delete from the parent page makes it empty,
2302         * continue all the way up the tree.
2303         * stop if the root page is reached (which is never deleted) or
2304         * if the entry deletion does not empty the page.
2305         */
2306        while ((parent = BT_POP(btstack)) != NULL) {
2307                /* pin the parent page <sp> */
2308                DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2309                if (rc)
2310                        return rc;
2311
2312                /*
2313                 * free the extent of the child page deleted
2314                 */
2315                index = parent->index;
2316
2317                /*
2318                 * delete the entry for the child page from parent
2319                 */
2320                nextindex = p->header.nextindex;
2321
2322                /*
2323                 * the parent has the single entry being deleted:
2324                 *
2325                 * free the parent page which has become empty.
2326                 */
2327                if (nextindex == 1) {
2328                        /*
2329                         * keep the root internal page which has become empty
2330                         */
2331                        if (p->header.flag & BT_ROOT) {
2332                                /*
2333                                 * reset the root
2334                                 *
2335                                 * dtInitRoot() acquires txlock on the root
2336                                 */
2337                                dtInitRoot(tid, ip, PARENT(ip));
2338
2339                                DT_PUTPAGE(mp);
2340
2341                                return 0;
2342                        }
2343                        /*
2344                         * free the parent page
2345                         */
2346                        else {
2347                                /*
2348                                 * acquire a transaction lock on the page
2349                                 *
2350                                 * write FREEXTENT|NOREDOPAGE log record
2351                                 */
2352                                tlck =
2353                                    txMaplock(tid, ip,
2354                                              tlckDTREE | tlckFREE);
2355                                pxdlock = (struct pxd_lock *) & tlck->lock;
2356                                pxdlock->flag = mlckFREEPXD;
2357                                pxdlock->pxd = p->header.self;
2358                                pxdlock->index = 1;
2359
2360                                /* update sibling pointers */
2361                                if ((rc = dtRelink(tid, ip, p))) {
2362                                        DT_PUTPAGE(mp);
2363                                        return rc;
2364                                }
2365
2366                                xlen = lengthPXD(&p->header.self);
2367
2368                                /* Free quota allocation */
2369                                dquot_free_block(ip, xlen);
2370
2371                                /* free/invalidate its buffer page */
2372                                discard_metapage(mp);
2373
2374                                /* propagate up */
2375                                continue;
2376                        }
2377                }
2378
2379                /*
2380                 * the parent has other entries remaining:
2381                 *
2382                 * delete the router entry from the parent page.
2383                 */
2384                BT_MARK_DIRTY(mp, ip);
2385                /*
2386                 * acquire a transaction lock on the page
2387                 *
2388                 * action: router entry deletion
2389                 */
2390                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2391                dtlck = (struct dt_lock *) & tlck->lock;
2392
2393                /* linelock header */
2394                if (dtlck->index >= dtlck->maxcnt)
2395                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2396                lv = & dtlck->lv[dtlck->index];
2397                lv->offset = 0;
2398                lv->length = 1;
2399                dtlck->index++;
2400
2401                /* linelock stbl of non-root leaf page */
2402                if (!(p->header.flag & BT_ROOT)) {
2403                        if (dtlck->index < dtlck->maxcnt)
2404                                lv++;
2405                        else {
2406                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2407                                lv = & dtlck->lv[0];
2408                        }
2409                        i = index >> L2DTSLOTSIZE;
2410                        lv->offset = p->header.stblindex + i;
2411                        lv->length =
2412                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2413                            i + 1;
2414                        dtlck->index++;
2415                }
2416
2417                /* free the router entry */
2418                dtDeleteEntry(p, index, &dtlck);
2419
2420                /* reset key of new leftmost entry of level (for consistency) */
2421                if (index == 0 &&
2422                    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2423                        dtTruncateEntry(p, 0, &dtlck);
2424
2425                /* unpin the parent page */
2426                DT_PUTPAGE(mp);
2427
2428                /* exit propagation up */
2429                break;
2430        }
2431
2432        if (!DO_INDEX(ip))
2433                ip->i_size -= PSIZE;
2434
2435        return 0;
2436}
2437
2438#ifdef _NOTYET
2439/*
2440 * NAME:        dtRelocate()
2441 *
2442 * FUNCTION:    relocate dtpage (internal or leaf) of directory;
2443 *              This function is mainly used by defragfs utility.
2444 */
2445int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2446               s64 nxaddr)
2447{
2448        int rc = 0;
2449        struct metapage *mp, *pmp, *lmp, *rmp;
2450        dtpage_t *p, *pp, *rp = 0, *lp= 0;
2451        s64 bn;
2452        int index;
2453        struct btstack btstack;
2454        pxd_t *pxd;
2455        s64 oxaddr, nextbn, prevbn;
2456        int xlen, xsize;
2457        struct tlock *tlck;
2458        struct dt_lock *dtlck;
2459        struct pxd_lock *pxdlock;
2460        s8 *stbl;
2461        struct lv *lv;
2462
2463        oxaddr = addressPXD(opxd);
2464        xlen = lengthPXD(opxd);
2465
2466        jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
2467                   (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
2468                   xlen);
2469
2470        /*
2471         *      1. get the internal parent dtpage covering
2472         *      router entry for the tartget page to be relocated;
2473         */
2474        rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2475        if (rc)
2476                return rc;
2477
2478        /* retrieve search result */
2479        DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2480        jfs_info("dtRelocate: parent router entry validated.");
2481
2482        /*
2483         *      2. relocate the target dtpage
2484         */
2485        /* read in the target page from src extent */
2486        DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2487        if (rc) {
2488                /* release the pinned parent page */
2489                DT_PUTPAGE(pmp);
2490                return rc;
2491        }
2492
2493        /*
2494         * read in sibling pages if any to update sibling pointers;
2495         */
2496        rmp = NULL;
2497        if (p->header.next) {
2498                nextbn = le64_to_cpu(p->header.next);
2499                DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2500                if (rc) {
2501                        DT_PUTPAGE(mp);
2502                        DT_PUTPAGE(pmp);
2503                        return (rc);
2504                }
2505        }
2506
2507        lmp = NULL;
2508        if (p->header.prev) {
2509                prevbn = le64_to_cpu(p->header.prev);
2510                DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2511                if (rc) {
2512                        DT_PUTPAGE(mp);
2513                        DT_PUTPAGE(pmp);
2514                        if (rmp)
2515                                DT_PUTPAGE(rmp);
2516                        return (rc);
2517                }
2518        }
2519
2520        /* at this point, all xtpages to be updated are in memory */
2521
2522        /*
2523         * update sibling pointers of sibling dtpages if any;
2524         */
2525        if (lmp) {
2526                tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
2527                dtlck = (struct dt_lock *) & tlck->lock;
2528                /* linelock header */
2529                ASSERT(dtlck->index == 0);
2530                lv = & dtlck->lv[0];
2531                lv->offset = 0;
2532                lv->length = 1;
2533                dtlck->index++;
2534
2535                lp->header.next = cpu_to_le64(nxaddr);
2536                DT_PUTPAGE(lmp);
2537        }
2538
2539        if (rmp) {
2540                tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
2541                dtlck = (struct dt_lock *) & tlck->lock;
2542                /* linelock header */
2543                ASSERT(dtlck->index == 0);
2544                lv = & dtlck->lv[0];
2545                lv->offset = 0;
2546                lv->length = 1;
2547                dtlck->index++;
2548
2549                rp->header.prev = cpu_to_le64(nxaddr);
2550                DT_PUTPAGE(rmp);
2551        }
2552
2553        /*
2554         * update the target dtpage to be relocated
2555         *
2556         * write LOG_REDOPAGE of LOG_NEW type for dst page
2557         * for the whole target page (logredo() will apply
2558         * after image and update bmap for allocation of the
2559         * dst extent), and update bmap for allocation of
2560         * the dst extent;
2561         */
2562        tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
2563        dtlck = (struct dt_lock *) & tlck->lock;
2564        /* linelock header */
2565        ASSERT(dtlck->index == 0);
2566        lv = & dtlck->lv[0];
2567
2568        /* update the self address in the dtpage header */
2569        pxd = &p->header.self;
2570        PXDaddress(pxd, nxaddr);
2571
2572        /* the dst page is the same as the src page, i.e.,
2573         * linelock for afterimage of the whole page;
2574         */
2575        lv->offset = 0;
2576        lv->length = p->header.maxslot;
2577        dtlck->index++;
2578
2579        /* update the buffer extent descriptor of the dtpage */
2580        xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2581
2582        /* unpin the relocated page */
2583        DT_PUTPAGE(mp);
2584        jfs_info("dtRelocate: target dtpage relocated.");
2585
2586        /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
2587         * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
2588         * will also force a bmap update ).
2589         */
2590
2591        /*
2592         *      3. acquire maplock for the source extent to be freed;
2593         */
2594        /* for dtpage relocation, write a LOG_NOREDOPAGE record
2595         * for the source dtpage (logredo() will init NoRedoPage
2596         * filter and will also update bmap for free of the source
2597         * dtpage), and upadte bmap for free of the source dtpage;
2598         */
2599        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2600        pxdlock = (struct pxd_lock *) & tlck->lock;
2601        pxdlock->flag = mlckFREEPXD;
2602        PXDaddress(&pxdlock->pxd, oxaddr);
2603        PXDlength(&pxdlock->pxd, xlen);
2604        pxdlock->index = 1;
2605
2606        /*
2607         *      4. update the parent router entry for relocation;
2608         *
2609         * acquire tlck for the parent entry covering the target dtpage;
2610         * write LOG_REDOPAGE to apply after image only;
2611         */
2612        jfs_info("dtRelocate: update parent router entry.");
2613        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
2614        dtlck = (struct dt_lock *) & tlck->lock;
2615        lv = & dtlck->lv[dtlck->index];
2616
2617        /* update the PXD with the new address */
2618        stbl = DT_GETSTBL(pp);
2619        pxd = (pxd_t *) & pp->slot[stbl[index]];
2620        PXDaddress(pxd, nxaddr);
2621        lv->offset = stbl[index];
2622        lv->length = 1;
2623        dtlck->index++;
2624
2625        /* unpin the parent dtpage */
2626        DT_PUTPAGE(pmp);
2627
2628        return rc;
2629}
2630
2631/*
2632 * NAME:        dtSearchNode()
2633 *
2634 * FUNCTION:    Search for an dtpage containing a specified address
2635 *              This function is mainly used by defragfs utility.
2636 *
2637 * NOTE:        Search result on stack, the found page is pinned at exit.
2638 *              The result page must be an internal dtpage.
2639 *              lmxaddr give the address of the left most page of the
2640 *              dtree level, in which the required dtpage resides.
2641 */
2642static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2643                        struct btstack * btstack)
2644{
2645        int rc = 0;
2646        s64 bn;
2647        struct metapage *mp;
2648        dtpage_t *p;
2649        int psize = 288;        /* initial in-line directory */
2650        s8 *stbl;
2651        int i;
2652        pxd_t *pxd;
2653        struct btframe *btsp;
2654
2655        BT_CLR(btstack);        /* reset stack */
2656
2657        /*
2658         *      descend tree to the level with specified leftmost page
2659         *
2660         *  by convention, root bn = 0.
2661         */
2662        for (bn = 0;;) {
2663                /* get/pin the page to search */
2664                DT_GETPAGE(ip, bn, mp, psize, p, rc);
2665                if (rc)
2666                        return rc;
2667
2668                /* does the xaddr of leftmost page of the levevl
2669                 * matches levevl search key ?
2670                 */
2671                if (p->header.flag & BT_ROOT) {
2672                        if (lmxaddr == 0)
2673                                break;
2674                } else if (addressPXD(&p->header.self) == lmxaddr)
2675                        break;
2676
2677                /*
2678                 * descend down to leftmost child page
2679                 */
2680                if (p->header.flag & BT_LEAF) {
2681                        DT_PUTPAGE(mp);
2682                        return -ESTALE;
2683                }
2684
2685                /* get the leftmost entry */
2686                stbl = DT_GETSTBL(p);
2687                pxd = (pxd_t *) & p->slot[stbl[0]];
2688
2689                /* get the child page block address */
2690                bn = addressPXD(pxd);
2691                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
2692                /* unpin the parent page */
2693                DT_PUTPAGE(mp);
2694        }
2695
2696        /*
2697         *      search each page at the current levevl
2698         */
2699      loop:
2700        stbl = DT_GETSTBL(p);
2701        for (i = 0; i < p->header.nextindex; i++) {
2702                pxd = (pxd_t *) & p->slot[stbl[i]];
2703
2704                /* found the specified router entry */
2705                if (addressPXD(pxd) == addressPXD(kpxd) &&
2706                    lengthPXD(pxd) == lengthPXD(kpxd)) {
2707                        btsp = btstack->top;
2708                        btsp->bn = bn;
2709                        btsp->index = i;
2710                        btsp->mp = mp;
2711
2712                        return 0;
2713                }
2714        }
2715
2716        /* get the right sibling page if any */
2717        if (p->header.next)
2718                bn = le64_to_cpu(p->header.next);
2719        else {
2720                DT_PUTPAGE(mp);
2721                return -ESTALE;
2722        }
2723
2724        /* unpin current page */
2725        DT_PUTPAGE(mp);
2726
2727        /* get the right sibling page */
2728        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2729        if (rc)
2730                return rc;
2731
2732        goto loop;
2733}
2734#endif /* _NOTYET */
2735
2736/*
2737 *      dtRelink()
2738 *
2739 * function:
2740 *      link around a freed page.
2741 *
2742 * parameter:
2743 *      fp:     page to be freed
2744 *
2745 * return:
2746 */
2747static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2748{
2749        int rc;
2750        struct metapage *mp;
2751        s64 nextbn, prevbn;
2752        struct tlock *tlck;
2753        struct dt_lock *dtlck;
2754        struct lv *lv;
2755
2756        nextbn = le64_to_cpu(p->header.next);
2757        prevbn = le64_to_cpu(p->header.prev);
2758
2759        /* update prev pointer of the next page */
2760        if (nextbn != 0) {
2761                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2762                if (rc)
2763                        return rc;
2764
2765                BT_MARK_DIRTY(mp, ip);
2766                /*
2767                 * acquire a transaction lock on the next page
2768                 *
2769                 * action: update prev pointer;
2770                 */
2771                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2772                jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2773                        tlck, ip, mp);
2774                dtlck = (struct dt_lock *) & tlck->lock;
2775
2776                /* linelock header */
2777                if (dtlck->index >= dtlck->maxcnt)
2778                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2779                lv = & dtlck->lv[dtlck->index];
2780                lv->offset = 0;
2781                lv->length = 1;
2782                dtlck->index++;
2783
2784                p->header.prev = cpu_to_le64(prevbn);
2785                DT_PUTPAGE(mp);
2786        }
2787
2788        /* update next pointer of the previous page */
2789        if (prevbn != 0) {
2790                DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2791                if (rc)
2792                        return rc;
2793
2794                BT_MARK_DIRTY(mp, ip);
2795                /*
2796                 * acquire a transaction lock on the prev page
2797                 *
2798                 * action: update next pointer;
2799                 */
2800                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2801                jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2802                        tlck, ip, mp);
2803                dtlck = (struct dt_lock *) & tlck->lock;
2804
2805                /* linelock header */
2806                if (dtlck->index >= dtlck->maxcnt)
2807                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2808                lv = & dtlck->lv[dtlck->index];
2809                lv->offset = 0;
2810                lv->length = 1;
2811                dtlck->index++;
2812
2813                p->header.next = cpu_to_le64(nextbn);
2814                DT_PUTPAGE(mp);
2815        }
2816
2817        return 0;
2818}
2819
2820
2821/*
2822 *      dtInitRoot()
2823 *
2824 * initialize directory root (inline in inode)
2825 */
2826void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2827{
2828        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2829        dtroot_t *p;
2830        int fsi;
2831        struct dtslot *f;
2832        struct tlock *tlck;
2833        struct dt_lock *dtlck;
2834        struct lv *lv;
2835        u16 xflag_save;
2836
2837        /*
2838         * If this was previously an non-empty directory, we need to remove
2839         * the old directory table.
2840         */
2841        if (DO_INDEX(ip)) {
2842                if (!jfs_dirtable_inline(ip)) {
2843                        struct tblock *tblk = tid_to_tblock(tid);
2844                        /*
2845                         * We're playing games with the tid's xflag.  If
2846                         * we're removing a regular file, the file's xtree
2847                         * is committed with COMMIT_PMAP, but we always
2848                         * commit the directories xtree with COMMIT_PWMAP.
2849                         */
2850                        xflag_save = tblk->xflag;
2851                        tblk->xflag = 0;
2852                        /*
2853                         * xtTruncate isn't guaranteed to fully truncate
2854                         * the xtree.  The caller needs to check i_size
2855                         * after committing the transaction to see if
2856                         * additional truncation is needed.  The
2857                         * COMMIT_Stale flag tells caller that we
2858                         * initiated the truncation.
2859                         */
2860                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2861                        set_cflag(COMMIT_Stale, ip);
2862
2863                        tblk->xflag = xflag_save;
2864                } else
2865                        ip->i_size = 1;
2866
2867                jfs_ip->next_index = 2;
2868        } else
2869                ip->i_size = IDATASIZE;
2870
2871        /*
2872         * acquire a transaction lock on the root
2873         *
2874         * action: directory initialization;
2875         */
2876        tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2877                      tlckDTREE | tlckENTRY | tlckBTROOT);
2878        dtlck = (struct dt_lock *) & tlck->lock;
2879
2880        /* linelock root */
2881        ASSERT(dtlck->index == 0);
2882        lv = & dtlck->lv[0];
2883        lv->offset = 0;
2884        lv->length = DTROOTMAXSLOT;
2885        dtlck->index++;
2886
2887        p = &jfs_ip->i_dtroot;
2888
2889        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2890
2891        p->header.nextindex = 0;
2892
2893        /* init freelist */
2894        fsi = 1;
2895        f = &p->slot[fsi];
2896
2897        /* init data area of root */
2898        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2899                f->next = fsi;
2900        f->next = -1;
2901
2902        p->header.freelist = 1;
2903        p->header.freecnt = 8;
2904
2905        /* init '..' entry */
2906        p->header.idotdot = cpu_to_le32(idotdot);
2907
2908        return;
2909}
2910
2911/*
2912 *      add_missing_indices()
2913 *
2914 * function: Fix dtree page in which one or more entries has an invalid index.
2915 *           fsck.jfs should really fix this, but it currently does not.
2916 *           Called from jfs_readdir when bad index is detected.
2917 */
2918static void add_missing_indices(struct inode *inode, s64 bn)
2919{
2920        struct ldtentry *d;
2921        struct dt_lock *dtlck;
2922        int i;
2923        uint index;
2924        struct lv *lv;
2925        struct metapage *mp;
2926        dtpage_t *p;
2927        int rc;
2928        s8 *stbl;
2929        tid_t tid;
2930        struct tlock *tlck;
2931
2932        tid = txBegin(inode->i_sb, 0);
2933
2934        DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2935
2936        if (rc) {
2937                printk(KERN_ERR "DT_GETPAGE failed!\n");
2938                goto end;
2939        }
2940        BT_MARK_DIRTY(mp, inode);
2941
2942        ASSERT(p->header.flag & BT_LEAF);
2943
2944        tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2945        if (BT_IS_ROOT(mp))
2946                tlck->type |= tlckBTROOT;
2947
2948        dtlck = (struct dt_lock *) &tlck->lock;
2949
2950        stbl = DT_GETSTBL(p);
2951        for (i = 0; i < p->header.nextindex; i++) {
2952                d = (struct ldtentry *) &p->slot[stbl[i]];
2953                index = le32_to_cpu(d->index);
2954                if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2955                        d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2956                        if (dtlck->index >= dtlck->maxcnt)
2957                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2958                        lv = &dtlck->lv[dtlck->index];
2959                        lv->offset = stbl[i];
2960                        lv->length = 1;
2961                        dtlck->index++;
2962                }
2963        }
2964
2965        DT_PUTPAGE(mp);
2966        (void) txCommit(tid, 1, &inode, 0);
2967end:
2968        txEnd(tid);
2969}
2970
2971/*
2972 * Buffer to hold directory entry info while traversing a dtree page
2973 * before being fed to the filldir function
2974 */
2975struct jfs_dirent {
2976        loff_t position;
2977        int ino;
2978        u16 name_len;
2979        char name[0];
2980};
2981
2982/*
2983 * function to determine next variable-sized jfs_dirent in buffer
2984 */
2985static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2986{
2987        return (struct jfs_dirent *)
2988                ((char *)dirent +
2989                 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2990                   sizeof (loff_t) - 1) &
2991                  ~(sizeof (loff_t) - 1)));
2992}
2993
2994/*
2995 *      jfs_readdir()
2996 *
2997 * function: read directory entries sequentially
2998 *      from the specified entry offset
2999 *
3000 * parameter:
3001 *
3002 * return: offset = (pn, index) of start entry
3003 *      of next jfs_readdir()/dtRead()
3004 */
3005int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3006{
3007        struct inode *ip = filp->f_path.dentry->d_inode;
3008        struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
3009        int rc = 0;
3010        loff_t dtpos;   /* legacy OS/2 style position */
3011        struct dtoffset {
3012                s16 pn;
3013                s16 index;
3014                s32 unused;
3015        } *dtoffset = (struct dtoffset *) &dtpos;
3016        s64 bn;
3017        struct metapage *mp;
3018        dtpage_t *p;
3019        int index;
3020        s8 *stbl;
3021        struct btstack btstack;
3022        int i, next;
3023        struct ldtentry *d;
3024        struct dtslot *t;
3025        int d_namleft, len, outlen;
3026        unsigned long dirent_buf;
3027        char *name_ptr;
3028        u32 dir_index;
3029        int do_index = 0;
3030        uint loop_count = 0;
3031        struct jfs_dirent *jfs_dirent;
3032        int jfs_dirents;
3033        int overflow, fix_page, page_fixed = 0;
3034        static int unique_pos = 2;      /* If we can't fix broken index */
3035
3036        if (filp->f_pos == DIREND)
3037                return 0;
3038
3039        if (DO_INDEX(ip)) {
3040                /*
3041                 * persistent index is stored in directory entries.
3042                 * Special cases:        0 = .
3043                 *                       1 = ..
3044                 *                      -1 = End of directory
3045                 */
3046                do_index = 1;
3047
3048                dir_index = (u32) filp->f_pos;
3049
3050                if (dir_index > 1) {
3051                        struct dir_table_slot dirtab_slot;
3052
3053                        if (dtEmpty(ip) ||
3054                            (dir_index >= JFS_IP(ip)->next_index)) {
3055                                /* Stale position.  Directory has shrunk */
3056                                filp->f_pos = DIREND;
3057                                return 0;
3058                        }
3059                      repeat:
3060                        rc = read_index(ip, dir_index, &dirtab_slot);
3061                        if (rc) {
3062                                filp->f_pos = DIREND;
3063                                return rc;
3064                        }
3065                        if (dirtab_slot.flag == DIR_INDEX_FREE) {
3066                                if (loop_count++ > JFS_IP(ip)->next_index) {
3067                                        jfs_err("jfs_readdir detected "
3068                                                   "infinite loop!");
3069                                        filp->f_pos = DIREND;
3070                                        return 0;
3071                                }
3072                                dir_index = le32_to_cpu(dirtab_slot.addr2);
3073                                if (dir_index == -1) {
3074                                        filp->f_pos = DIREND;
3075                                        return 0;
3076                                }
3077                                goto repeat;
3078                        }
3079                        bn = addressDTS(&dirtab_slot);
3080                        index = dirtab_slot.slot;
3081                        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3082                        if (rc) {
3083                                filp->f_pos = DIREND;
3084                                return 0;
3085                        }
3086                        if (p->header.flag & BT_INTERNAL) {
3087                                jfs_err("jfs_readdir: bad index table");
3088                                DT_PUTPAGE(mp);
3089                                filp->f_pos = -1;
3090                                return 0;
3091                        }
3092                } else {
3093                        if (dir_index == 0) {
3094                                /*
3095                                 * self "."
3096                                 */
3097                                filp->f_pos = 0;
3098                                if (filldir(dirent, ".", 1, 0, ip->i_ino,
3099                                            DT_DIR))
3100                                        return 0;
3101                        }
3102                        /*
3103                         * parent ".."
3104                         */
3105                        filp->f_pos = 1;
3106                        if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR))
3107                                return 0;
3108
3109                        /*
3110                         * Find first entry of left-most leaf
3111                         */
3112                        if (dtEmpty(ip)) {
3113                                filp->f_pos = DIREND;
3114                                return 0;
3115                        }
3116
3117                        if ((rc = dtReadFirst(ip, &btstack)))
3118                                return rc;
3119
3120                        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3121                }
3122        } else {
3123                /*
3124                 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3125                 *
3126                 * pn = index = 0:      First entry "."
3127                 * pn = 0; index = 1:   Second entry ".."
3128                 * pn > 0:              Real entries, pn=1 -> leftmost page
3129                 * pn = index = -1:     No more entries
3130                 */
3131                dtpos = filp->f_pos;
3132                if (dtpos == 0) {
3133                        /* build "." entry */
3134
3135                        if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino,
3136                                    DT_DIR))
3137                                return 0;
3138                        dtoffset->index = 1;
3139                        filp->f_pos = dtpos;
3140                }
3141
3142                if (dtoffset->pn == 0) {
3143                        if (dtoffset->index == 1) {
3144                                /* build ".." entry */
3145
3146                                if (filldir(dirent, "..", 2, filp->f_pos,
3147                                            PARENT(ip), DT_DIR))
3148                                        return 0;
3149                        } else {
3150                                jfs_err("jfs_readdir called with "
3151                                        "invalid offset!");
3152                        }
3153                        dtoffset->pn = 1;
3154                        dtoffset->index = 0;
3155                        filp->f_pos = dtpos;
3156                }
3157
3158                if (dtEmpty(ip)) {
3159                        filp->f_pos = DIREND;
3160                        return 0;
3161                }
3162
3163                if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) {
3164                        jfs_err("jfs_readdir: unexpected rc = %d "
3165                                "from dtReadNext", rc);
3166                        filp->f_pos = DIREND;
3167                        return 0;
3168                }
3169                /* get start leaf page and index */
3170                DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3171
3172                /* offset beyond directory eof ? */
3173                if (bn < 0) {
3174                        filp->f_pos = DIREND;
3175                        return 0;
3176                }
3177        }
3178
3179        dirent_buf = __get_free_page(GFP_KERNEL);
3180        if (dirent_buf == 0) {
3181                DT_PUTPAGE(mp);
3182                jfs_warn("jfs_readdir: __get_free_page failed!");
3183                filp->f_pos = DIREND;
3184                return -ENOMEM;
3185        }
3186
3187        while (1) {
3188                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3189                jfs_dirents = 0;
3190                overflow = fix_page = 0;
3191
3192                stbl = DT_GETSTBL(p);
3193
3194                for (i = index; i < p->header.nextindex; i++) {
3195                        d = (struct ldtentry *) & p->slot[stbl[i]];
3196
3197                        if (((long) jfs_dirent + d->namlen + 1) >
3198                            (dirent_buf + PAGE_SIZE)) {
3199                                /* DBCS codepages could overrun dirent_buf */
3200                                index = i;
3201                                overflow = 1;
3202                                break;
3203                        }
3204
3205                        d_namleft = d->namlen;
3206                        name_ptr = jfs_dirent->name;
3207                        jfs_dirent->ino = le32_to_cpu(d->inumber);
3208
3209                        if (do_index) {
3210                                len = min(d_namleft, DTLHDRDATALEN);
3211                                jfs_dirent->position = le32_to_cpu(d->index);
3212                                /*
3213                                 * d->index should always be valid, but it
3214                                 * isn't.  fsck.jfs doesn't create the
3215                                 * directory index for the lost+found
3216                                 * directory.  Rather than let it go,
3217                                 * we can try to fix it.
3218                                 */
3219                                if ((jfs_dirent->position < 2) ||
3220                                    (jfs_dirent->position >=
3221                                     JFS_IP(ip)->next_index)) {
3222                                        if (!page_fixed && !isReadOnly(ip)) {
3223                                                fix_page = 1;
3224                                                /*
3225                                                 * setting overflow and setting
3226                                                 * index to i will cause the
3227                                                 * same page to be processed
3228                                                 * again starting here
3229                                                 */
3230                                                overflow = 1;
3231                                                index = i;
3232                                                break;
3233                                        }
3234                                        jfs_dirent->position = unique_pos++;
3235                                }
3236                        } else {
3237                                jfs_dirent->position = dtpos;
3238                                len = min(d_namleft, DTLHDRDATALEN_LEGACY);
3239                        }
3240
3241                        /* copy the name of head/only segment */
3242                        outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
3243                                                   codepage);
3244                        jfs_dirent->name_len = outlen;
3245
3246                        /* copy name in the additional segment(s) */
3247                        next = d->next;
3248                        while (next >= 0) {
3249                                t = (struct dtslot *) & p->slot[next];
3250                                name_ptr += outlen;
3251                                d_namleft -= len;
3252                                /* Sanity Check */
3253                                if (d_namleft == 0) {
3254                                        jfs_error(ip->i_sb,
3255                                                  "JFS:Dtree error: ino = "
3256                                                  "%ld, bn=%Ld, index = %d",
3257                                                  (long)ip->i_ino,
3258                                                  (long long)bn,
3259                                                  i);
3260                                        goto skip_one;
3261                                }
3262                                len = min(d_namleft, DTSLOTDATALEN);
3263                                outlen = jfs_strfromUCS_le(name_ptr, t->name,
3264                                                           len, codepage);
3265                                jfs_dirent->name_len += outlen;
3266
3267                                next = t->next;
3268                        }
3269
3270                        jfs_dirents++;
3271                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3272skip_one:
3273                        if (!do_index)
3274                                dtoffset->index++;
3275                }
3276
3277                if (!overflow) {
3278                        /* Point to next leaf page */
3279                        if (p->header.flag & BT_ROOT)
3280                                bn = 0;
3281                        else {
3282                                bn = le64_to_cpu(p->header.next);
3283                                index = 0;
3284                                /* update offset (pn:index) for new page */
3285                                if (!do_index) {
3286                                        dtoffset->pn++;
3287                                        dtoffset->index = 0;
3288                                }
3289                        }
3290                        page_fixed = 0;
3291                }
3292
3293                /* unpin previous leaf page */
3294                DT_PUTPAGE(mp);
3295
3296                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3297                while (jfs_dirents--) {
3298                        filp->f_pos = jfs_dirent->position;
3299                        if (filldir(dirent, jfs_dirent->name,
3300                                    jfs_dirent->name_len, filp->f_pos,
3301                                    jfs_dirent->ino, DT_UNKNOWN))
3302                                goto out;
3303                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3304                }
3305
3306                if (fix_page) {
3307                        add_missing_indices(ip, bn);
3308                        page_fixed = 1;
3309                }
3310
3311                if (!overflow && (bn == 0)) {
3312                        filp->f_pos = DIREND;
3313                        break;
3314                }
3315
3316                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3317                if (rc) {
3318                        free_page(dirent_buf);
3319                        return rc;
3320                }
3321        }
3322
3323      out:
3324        free_page(dirent_buf);
3325
3326        return rc;
3327}
3328
3329
3330/*
3331 *      dtReadFirst()
3332 *
3333 * function: get the leftmost page of the directory
3334 */
3335static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3336{
3337        int rc = 0;
3338        s64 bn;
3339        int psize = 288;        /* initial in-line directory */
3340        struct metapage *mp;
3341        dtpage_t *p;
3342        s8 *stbl;
3343        struct btframe *btsp;
3344        pxd_t *xd;
3345
3346        BT_CLR(btstack);        /* reset stack */
3347
3348        /*
3349         *      descend leftmost path of the tree
3350         *
3351         * by convention, root bn = 0.
3352         */
3353        for (bn = 0;;) {
3354                DT_GETPAGE(ip, bn, mp, psize, p, rc);
3355                if (rc)
3356                        return rc;
3357
3358                /*
3359                 * leftmost leaf page
3360                 */
3361                if (p->header.flag & BT_LEAF) {
3362                        /* return leftmost entry */
3363                        btsp = btstack->top;
3364                        btsp->bn = bn;
3365                        btsp->index = 0;
3366                        btsp->mp = mp;
3367
3368                        return 0;
3369                }
3370
3371                /*
3372                 * descend down to leftmost child page
3373                 */
3374                if (BT_STACK_FULL(btstack)) {
3375                        DT_PUTPAGE(mp);
3376                        jfs_error(ip->i_sb, "dtReadFirst: btstack overrun");
3377                        BT_STACK_DUMP(btstack);
3378                        return -EIO;
3379                }
3380                /* push (bn, index) of the parent page/entry */
3381                BT_PUSH(btstack, bn, 0);
3382
3383                /* get the leftmost entry */
3384                stbl = DT_GETSTBL(p);
3385                xd = (pxd_t *) & p->slot[stbl[0]];
3386
3387                /* get the child page block address */
3388                bn = addressPXD(xd);
3389                psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3390
3391                /* unpin the parent page */
3392                DT_PUTPAGE(mp);
3393        }
3394}
3395
3396
3397/*
3398 *      dtReadNext()
3399 *
3400 * function: get the page of the specified offset (pn:index)
3401 *
3402 * return: if (offset > eof), bn = -1;
3403 *
3404 * note: if index > nextindex of the target leaf page,
3405 * start with 1st entry of next leaf page;
3406 */
3407static int dtReadNext(struct inode *ip, loff_t * offset,
3408                      struct btstack * btstack)
3409{
3410        int rc = 0;
3411        struct dtoffset {
3412                s16 pn;
3413                s16 index;
3414                s32 unused;
3415        } *dtoffset = (struct dtoffset *) offset;
3416        s64 bn;
3417        struct metapage *mp;
3418        dtpage_t *p;
3419        int index;
3420        int pn;
3421        s8 *stbl;
3422        struct btframe *btsp, *parent;
3423        pxd_t *xd;
3424
3425        /*
3426         * get leftmost leaf page pinned
3427         */
3428        if ((rc = dtReadFirst(ip, btstack)))
3429                return rc;
3430
3431        /* get leaf page */
3432        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3433
3434        /* get the start offset (pn:index) */
3435        pn = dtoffset->pn - 1;  /* Now pn = 0 represents leftmost leaf */
3436        index = dtoffset->index;
3437
3438        /* start at leftmost page ? */
3439        if (pn == 0) {
3440                /* offset beyond eof ? */
3441                if (index < p->header.nextindex)
3442                        goto out;
3443
3444                if (p->header.flag & BT_ROOT) {
3445                        bn = -1;
3446                        goto out;
3447                }
3448
3449                /* start with 1st entry of next leaf page */
3450                dtoffset->pn++;
3451                dtoffset->index = index = 0;
3452                goto a;
3453        }
3454
3455        /* start at non-leftmost page: scan parent pages for large pn */
3456        if (p->header.flag & BT_ROOT) {
3457                bn = -1;
3458                goto out;
3459        }
3460
3461        /* start after next leaf page ? */
3462        if (pn > 1)
3463                goto b;
3464
3465        /* get leaf page pn = 1 */
3466      a:
3467        bn = le64_to_cpu(p->header.next);
3468
3469        /* unpin leaf page */
3470        DT_PUTPAGE(mp);
3471
3472        /* offset beyond eof ? */
3473        if (bn == 0) {
3474                bn = -1;
3475                goto out;
3476        }
3477
3478        goto c;
3479
3480        /*
3481         * scan last internal page level to get target leaf page
3482         */
3483      b:
3484        /* unpin leftmost leaf page */
3485        DT_PUTPAGE(mp);
3486
3487        /* get left most parent page */
3488        btsp = btstack->top;
3489        parent = btsp - 1;
3490        bn = parent->bn;
3491        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3492        if (rc)
3493                return rc;
3494
3495        /* scan parent pages at last internal page level */
3496        while (pn >= p->header.nextindex) {
3497                pn -= p->header.nextindex;
3498
3499                /* get next parent page address */
3500                bn = le64_to_cpu(p->header.next);
3501
3502                /* unpin current parent page */
3503                DT_PUTPAGE(mp);
3504
3505                /* offset beyond eof ? */
3506                if (bn == 0) {
3507                        bn = -1;
3508                        goto out;
3509                }
3510
3511                /* get next parent page */
3512                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3513                if (rc)
3514                        return rc;
3515
3516                /* update parent page stack frame */
3517                parent->bn = bn;
3518        }
3519
3520        /* get leaf page address */
3521        stbl = DT_GETSTBL(p);
3522        xd = (pxd_t *) & p->slot[stbl[pn]];
3523        bn = addressPXD(xd);
3524
3525        /* unpin parent page */
3526        DT_PUTPAGE(mp);
3527
3528        /*
3529         * get target leaf page
3530         */
3531      c:
3532        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3533        if (rc)
3534                return rc;
3535
3536        /*
3537         * leaf page has been completed:
3538         * start with 1st entry of next leaf page
3539         */
3540        if (index >= p->header.nextindex) {
3541                bn = le64_to_cpu(p->header.next);
3542
3543                /* unpin leaf page */
3544                DT_PUTPAGE(mp);
3545
3546                /* offset beyond eof ? */
3547                if (bn == 0) {
3548                        bn = -1;
3549                        goto out;
3550                }
3551
3552                /* get next leaf page */
3553                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3554                if (rc)
3555                        return rc;
3556
3557                /* start with 1st entry of next leaf page */
3558                dtoffset->pn++;
3559                dtoffset->index = 0;
3560        }
3561
3562      out:
3563        /* return target leaf page pinned */
3564        btsp = btstack->top;
3565        btsp->bn = bn;
3566        btsp->index = dtoffset->index;
3567        btsp->mp = mp;
3568
3569        return 0;
3570}
3571
3572
3573/*
3574 *      dtCompare()
3575 *
3576 * function: compare search key with an internal entry
3577 *
3578 * return:
3579 *      < 0 if k is < record
3580 *      = 0 if k is = record
3581 *      > 0 if k is > record
3582 */
3583static int dtCompare(struct component_name * key,       /* search key */
3584                     dtpage_t * p,      /* directory page */
3585                     int si)
3586{                               /* entry slot index */
3587        wchar_t *kname;
3588        __le16 *name;
3589        int klen, namlen, len, rc;
3590        struct idtentry *ih;
3591        struct dtslot *t;
3592
3593        /*
3594         * force the left-most key on internal pages, at any level of
3595         * the tree, to be less than any search key.
3596         * this obviates having to update the leftmost key on an internal
3597         * page when the user inserts a new key in the tree smaller than
3598         * anything that has been stored.
3599         *
3600         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3601         * at any internal page at any level of the tree,
3602         * it descends to child of the entry anyway -
3603         * ? make the entry as min size dummy entry)
3604         *
3605         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3606         * return (1);
3607         */
3608
3609        kname = key->name;
3610        klen = key->namlen;
3611
3612        ih = (struct idtentry *) & p->slot[si];
3613        si = ih->next;
3614        name = ih->name;
3615        namlen = ih->namlen;
3616        len = min(namlen, DTIHDRDATALEN);
3617
3618        /* compare with head/only segment */
3619        len = min(klen, len);
3620        if ((rc = UniStrncmp_le(kname, name, len)))
3621                return rc;
3622
3623        klen -= len;
3624        namlen -= len;
3625
3626        /* compare with additional segment(s) */
3627        kname += len;
3628        while (klen > 0 && namlen > 0) {
3629                /* compare with next name segment */
3630                t = (struct dtslot *) & p->slot[si];
3631                len = min(namlen, DTSLOTDATALEN);
3632                len = min(klen, len);
3633                name = t->name;
3634                if ((rc = UniStrncmp_le(kname, name, len)))
3635                        return rc;
3636
3637                klen -= len;
3638                namlen -= len;
3639                kname += len;
3640                si = t->next;
3641        }
3642
3643        return (klen - namlen);
3644}
3645
3646
3647
3648
3649/*
3650 *      ciCompare()
3651 *
3652 * function: compare search key with an (leaf/internal) entry
3653 *
3654 * return:
3655 *      < 0 if k is < record
3656 *      = 0 if k is = record
3657 *      > 0 if k is > record
3658 */
3659static int ciCompare(struct component_name * key,       /* search key */
3660                     dtpage_t * p,      /* directory page */
3661                     int si,    /* entry slot index */
3662                     int flag)
3663{
3664        wchar_t *kname, x;
3665        __le16 *name;
3666        int klen, namlen, len, rc;
3667        struct ldtentry *lh;
3668        struct idtentry *ih;
3669        struct dtslot *t;
3670        int i;
3671
3672        /*
3673         * force the left-most key on internal pages, at any level of
3674         * the tree, to be less than any search key.
3675         * this obviates having to update the leftmost key on an internal
3676         * page when the user inserts a new key in the tree smaller than
3677         * anything that has been stored.
3678         *
3679         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3680         * at any internal page at any level of the tree,
3681         * it descends to child of the entry anyway -
3682         * ? make the entry as min size dummy entry)
3683         *
3684         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3685         * return (1);
3686         */
3687
3688        kname = key->name;
3689        klen = key->namlen;
3690
3691        /*
3692         * leaf page entry
3693         */
3694        if (p->header.flag & BT_LEAF) {
3695                lh = (struct ldtentry *) & p->slot[si];
3696                si = lh->next;
3697                name = lh->name;
3698                namlen = lh->namlen;
3699                if (flag & JFS_DIR_INDEX)
3700                        len = min(namlen, DTLHDRDATALEN);
3701                else
3702                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3703        }
3704        /*
3705         * internal page entry
3706         */
3707        else {
3708                ih = (struct idtentry *) & p->slot[si];
3709                si = ih->next;
3710                name = ih->name;
3711                namlen = ih->namlen;
3712                len = min(namlen, DTIHDRDATALEN);
3713        }
3714
3715        /* compare with head/only segment */
3716        len = min(klen, len);
3717        for (i = 0; i < len; i++, kname++, name++) {
3718                /* only uppercase if case-insensitive support is on */
3719                if ((flag & JFS_OS2) == JFS_OS2)
3720                        x = UniToupper(le16_to_cpu(*name));
3721                else
3722                        x = le16_to_cpu(*name);
3723                if ((rc = *kname - x))
3724                        return rc;
3725        }
3726
3727        klen -= len;
3728        namlen -= len;
3729
3730        /* compare with additional segment(s) */
3731        while (klen > 0 && namlen > 0) {
3732                /* compare with next name segment */
3733                t = (struct dtslot *) & p->slot[si];
3734                len = min(namlen, DTSLOTDATALEN);
3735                len = min(klen, len);
3736                name = t->name;
3737                for (i = 0; i < len; i++, kname++, name++) {
3738                        /* only uppercase if case-insensitive support is on */
3739                        if ((flag & JFS_OS2) == JFS_OS2)
3740                                x = UniToupper(le16_to_cpu(*name));
3741                        else
3742                                x = le16_to_cpu(*name);
3743
3744                        if ((rc = *kname - x))
3745                                return rc;
3746                }
3747
3748                klen -= len;
3749                namlen -= len;
3750                si = t->next;
3751        }
3752
3753        return (klen - namlen);
3754}
3755
3756
3757/*
3758 *      ciGetLeafPrefixKey()
3759 *
3760 * function: compute prefix of suffix compression
3761 *           from two adjacent leaf entries
3762 *           across page boundary
3763 *
3764 * return: non-zero on error
3765 *
3766 */
3767static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3768                               int ri, struct component_name * key, int flag)
3769{
3770        int klen, namlen;
3771        wchar_t *pl, *pr, *kname;
3772        struct component_name lkey;
3773        struct component_name rkey;
3774
3775        lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3776                                        GFP_KERNEL);
3777        if (lkey.name == NULL)
3778                return -ENOMEM;
3779
3780        rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3781                                        GFP_KERNEL);
3782        if (rkey.name == NULL) {
3783                kfree(lkey.name);
3784                return -ENOMEM;
3785        }
3786
3787        /* get left and right key */
3788        dtGetKey(lp, li, &lkey, flag);
3789        lkey.name[lkey.namlen] = 0;
3790
3791        if ((flag & JFS_OS2) == JFS_OS2)
3792                ciToUpper(&lkey);
3793
3794        dtGetKey(rp, ri, &rkey, flag);
3795        rkey.name[rkey.namlen] = 0;
3796
3797
3798        if ((flag & JFS_OS2) == JFS_OS2)
3799                ciToUpper(&rkey);
3800
3801        /* compute prefix */
3802        klen = 0;
3803        kname = key->name;
3804        namlen = min(lkey.namlen, rkey.namlen);
3805        for (pl = lkey.name, pr = rkey.name;
3806             namlen; pl++, pr++, namlen--, klen++, kname++) {
3807                *kname = *pr;
3808                if (*pl != *pr) {
3809                        key->namlen = klen + 1;
3810                        goto free_names;
3811                }
3812        }
3813
3814        /* l->namlen <= r->namlen since l <= r */
3815        if (lkey.namlen < rkey.namlen) {
3816                *kname = *pr;
3817                key->namlen = klen + 1;
3818        } else                  /* l->namelen == r->namelen */
3819                key->namlen = klen;
3820
3821free_names:
3822        kfree(lkey.name);
3823        kfree(rkey.name);
3824        return 0;
3825}
3826
3827
3828
3829/*
3830 *      dtGetKey()
3831 *
3832 * function: get key of the entry
3833 */
3834static void dtGetKey(dtpage_t * p, int i,       /* entry index */
3835                     struct component_name * key, int flag)
3836{
3837        int si;
3838        s8 *stbl;
3839        struct ldtentry *lh;
3840        struct idtentry *ih;
3841        struct dtslot *t;
3842        int namlen, len;
3843        wchar_t *kname;
3844        __le16 *name;
3845
3846        /* get entry */
3847        stbl = DT_GETSTBL(p);
3848        si = stbl[i];
3849        if (p->header.flag & BT_LEAF) {
3850                lh = (struct ldtentry *) & p->slot[si];
3851                si = lh->next;
3852                namlen = lh->namlen;
3853                name = lh->name;
3854                if (flag & JFS_DIR_INDEX)
3855                        len = min(namlen, DTLHDRDATALEN);
3856                else
3857                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3858        } else {
3859                ih = (struct idtentry *) & p->slot[si];
3860                si = ih->next;
3861                namlen = ih->namlen;
3862                name = ih->name;
3863                len = min(namlen, DTIHDRDATALEN);
3864        }
3865
3866        key->namlen = namlen;
3867        kname = key->name;
3868
3869        /*
3870         * move head/only segment
3871         */
3872        UniStrncpy_from_le(kname, name, len);
3873
3874        /*
3875         * move additional segment(s)
3876         */
3877        while (si >= 0) {
3878                /* get next segment */
3879                t = &p->slot[si];
3880                kname += len;
3881                namlen -= len;
3882                len = min(namlen, DTSLOTDATALEN);
3883                UniStrncpy_from_le(kname, t->name, len);
3884
3885                si = t->next;
3886        }
3887}
3888
3889
3890/*
3891 *      dtInsertEntry()
3892 *
3893 * function: allocate free slot(s) and
3894 *           write a leaf/internal entry
3895 *
3896 * return: entry slot index
3897 */
3898static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3899                          ddata_t * data, struct dt_lock ** dtlock)
3900{
3901        struct dtslot *h, *t;
3902        struct ldtentry *lh = NULL;
3903        struct idtentry *ih = NULL;
3904        int hsi, fsi, klen, len, nextindex;
3905        wchar_t *kname;
3906        __le16 *name;
3907        s8 *stbl;
3908        pxd_t *xd;
3909        struct dt_lock *dtlck = *dtlock;
3910        struct lv *lv;
3911        int xsi, n;
3912        s64 bn = 0;
3913        struct metapage *mp = NULL;
3914
3915        klen = key->namlen;
3916        kname = key->name;
3917
3918        /* allocate a free slot */
3919        hsi = fsi = p->header.freelist;
3920        h = &p->slot[fsi];
3921        p->header.freelist = h->next;
3922        --p->header.freecnt;
3923
3924        /* open new linelock */
3925        if (dtlck->index >= dtlck->maxcnt)
3926                dtlck = (struct dt_lock *) txLinelock(dtlck);
3927
3928        lv = & dtlck->lv[dtlck->index];
3929        lv->offset = hsi;
3930
3931        /* write head/only segment */
3932        if (p->header.flag & BT_LEAF) {
3933                lh = (struct ldtentry *) h;
3934                lh->next = h->next;
3935                lh->inumber = cpu_to_le32(data->leaf.ino);
3936                lh->namlen = klen;
3937                name = lh->name;
3938                if (data->leaf.ip) {
3939                        len = min(klen, DTLHDRDATALEN);
3940                        if (!(p->header.flag & BT_ROOT))
3941                                bn = addressPXD(&p->header.self);
3942                        lh->index = cpu_to_le32(add_index(data->leaf.tid,
3943                                                          data->leaf.ip,
3944                                                          bn, index));
3945                } else
3946                        len = min(klen, DTLHDRDATALEN_LEGACY);
3947        } else {
3948                ih = (struct idtentry *) h;
3949                ih->next = h->next;
3950                xd = (pxd_t *) ih;
3951                *xd = data->xd;
3952                ih->namlen = klen;
3953                name = ih->name;
3954                len = min(klen, DTIHDRDATALEN);
3955        }
3956
3957        UniStrncpy_to_le(name, kname, len);
3958
3959        n = 1;
3960        xsi = hsi;
3961
3962        /* write additional segment(s) */
3963        t = h;
3964        klen -= len;
3965        while (klen) {
3966                /* get free slot */
3967                fsi = p->header.freelist;
3968                t = &p->slot[fsi];
3969                p->header.freelist = t->next;
3970                --p->header.freecnt;
3971
3972                /* is next slot contiguous ? */
3973                if (fsi != xsi + 1) {
3974                        /* close current linelock */
3975                        lv->length = n;
3976                        dtlck->index++;
3977
3978                        /* open new linelock */
3979                        if (dtlck->index < dtlck->maxcnt)
3980                                lv++;
3981                        else {
3982                                dtlck = (struct dt_lock *) txLinelock(dtlck);
3983                                lv = & dtlck->lv[0];
3984                        }
3985
3986                        lv->offset = fsi;
3987                        n = 0;
3988                }
3989
3990                kname += len;
3991                len = min(klen, DTSLOTDATALEN);
3992                UniStrncpy_to_le(t->name, kname, len);
3993
3994                n++;
3995                xsi = fsi;
3996                klen -= len;
3997        }
3998
3999        /* close current linelock */
4000        lv->length = n;
4001        dtlck->index++;
4002
4003        *dtlock = dtlck;
4004
4005        /* terminate last/only segment */
4006        if (h == t) {
4007                /* single segment entry */
4008                if (p->header.flag & BT_LEAF)
4009                        lh->next = -1;
4010                else
4011                        ih->next = -1;
4012        } else
4013                /* multi-segment entry */
4014                t->next = -1;
4015
4016        /* if insert into middle, shift right succeeding entries in stbl */
4017        stbl = DT_GETSTBL(p);
4018        nextindex = p->header.nextindex;
4019        if (index < nextindex) {
4020                memmove(stbl + index + 1, stbl + index, nextindex - index);
4021
4022                if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
4023                        s64 lblock;
4024
4025                        /*
4026                         * Need to update slot number for entries that moved
4027                         * in the stbl
4028                         */
4029                        mp = NULL;
4030                        for (n = index + 1; n <= nextindex; n++) {
4031                                lh = (struct ldtentry *) & (p->slot[stbl[n]]);
4032                                modify_index(data->leaf.tid, data->leaf.ip,
4033                                             le32_to_cpu(lh->index), bn, n,
4034                                             &mp, &lblock);
4035                        }
4036                        if (mp)
4037                                release_metapage(mp);
4038                }
4039        }
4040
4041        stbl[index] = hsi;
4042
4043        /* advance next available entry index of stbl */
4044        ++p->header.nextindex;
4045}
4046
4047
4048/*
4049 *      dtMoveEntry()
4050 *
4051 * function: move entries from split/left page to new/right page
4052 *
4053 *      nextindex of dst page and freelist/freecnt of both pages
4054 *      are updated.
4055 */
4056static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
4057                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
4058                        int do_index)
4059{
4060        int ssi, next;          /* src slot index */
4061        int di;                 /* dst entry index */
4062        int dsi;                /* dst slot index */
4063        s8 *sstbl, *dstbl;      /* sorted entry table */
4064        int snamlen, len;
4065        struct ldtentry *slh, *dlh = NULL;
4066        struct idtentry *sih, *dih = NULL;
4067        struct dtslot *h, *s, *d;
4068        struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
4069        struct lv *slv, *dlv;
4070        int xssi, ns, nd;
4071        int sfsi;
4072
4073        sstbl = (s8 *) & sp->slot[sp->header.stblindex];
4074        dstbl = (s8 *) & dp->slot[dp->header.stblindex];
4075
4076        dsi = dp->header.freelist;      /* first (whole page) free slot */
4077        sfsi = sp->header.freelist;
4078
4079        /* linelock destination entry slot */
4080        dlv = & ddtlck->lv[ddtlck->index];
4081        dlv->offset = dsi;
4082
4083        /* linelock source entry slot */
4084        slv = & sdtlck->lv[sdtlck->index];
4085        slv->offset = sstbl[si];
4086        xssi = slv->offset - 1;
4087
4088        /*
4089         * move entries
4090         */
4091        ns = nd = 0;
4092        for (di = 0; si < sp->header.nextindex; si++, di++) {
4093                ssi = sstbl[si];
4094                dstbl[di] = dsi;
4095
4096                /* is next slot contiguous ? */
4097                if (ssi != xssi + 1) {
4098                        /* close current linelock */
4099                        slv->length = ns;
4100                        sdtlck->index++;
4101
4102                        /* open new linelock */
4103                        if (sdtlck->index < sdtlck->maxcnt)
4104                                slv++;
4105                        else {
4106                                sdtlck = (struct dt_lock *) txLinelock(sdtlck);
4107                                slv = & sdtlck->lv[0];
4108                        }
4109
4110                        slv->offset = ssi;
4111                        ns = 0;
4112                }
4113
4114                /*
4115                 * move head/only segment of an entry
4116                 */
4117                /* get dst slot */
4118                h = d = &dp->slot[dsi];
4119
4120                /* get src slot and move */
4121                s = &sp->slot[ssi];
4122                if (sp->header.flag & BT_LEAF) {
4123                        /* get source entry */
4124                        slh = (struct ldtentry *) s;
4125                        dlh = (struct ldtentry *) h;
4126                        snamlen = slh->namlen;
4127
4128                        if (do_index) {
4129                                len = min(snamlen, DTLHDRDATALEN);
4130                                dlh->index = slh->index; /* little-endian */
4131                        } else
4132                                len = min(snamlen, DTLHDRDATALEN_LEGACY);
4133
4134                        memcpy(dlh, slh, 6 + len * 2);
4135
4136                        next = slh->next;
4137
4138                        /* update dst head/only segment next field */
4139                        dsi++;
4140                        dlh->next = dsi;
4141                } else {
4142                        sih = (struct idtentry *) s;
4143                        snamlen = sih->namlen;
4144
4145                        len = min(snamlen, DTIHDRDATALEN);
4146                        dih = (struct idtentry *) h;
4147                        memcpy(dih, sih, 10 + len * 2);
4148                        next = sih->next;
4149
4150                        dsi++;
4151                        dih->next = dsi;
4152                }
4153
4154                /* free src head/only segment */
4155                s->next = sfsi;
4156                s->cnt = 1;
4157                sfsi = ssi;
4158
4159                ns++;
4160                nd++;
4161                xssi = ssi;
4162
4163                /*
4164                 * move additional segment(s) of the entry
4165                 */
4166                snamlen -= len;
4167                while ((ssi = next) >= 0) {
4168                        /* is next slot contiguous ? */
4169                        if (ssi != xssi + 1) {
4170                                /* close current linelock */
4171                                slv->length = ns;
4172                                sdtlck->index++;
4173
4174                                /* open new linelock */
4175                                if (sdtlck->index < sdtlck->maxcnt)
4176                                        slv++;
4177                                else {
4178                                        sdtlck =
4179                                            (struct dt_lock *)
4180                                            txLinelock(sdtlck);
4181                                        slv = & sdtlck->lv[0];
4182                                }
4183
4184                                slv->offset = ssi;
4185                                ns = 0;
4186                        }
4187
4188                        /* get next source segment */
4189                        s = &sp->slot[ssi];
4190
4191                        /* get next destination free slot */
4192                        d++;
4193
4194                        len = min(snamlen, DTSLOTDATALEN);
4195                        UniStrncpy_le(d->name, s->name, len);
4196
4197                        ns++;
4198                        nd++;
4199                        xssi = ssi;
4200
4201                        dsi++;
4202                        d->next = dsi;
4203
4204                        /* free source segment */
4205                        next = s->next;
4206                        s->next = sfsi;
4207                        s->cnt = 1;
4208                        sfsi = ssi;
4209
4210                        snamlen -= len;
4211                }               /* end while */
4212
4213                /* terminate dst last/only segment */
4214                if (h == d) {
4215                        /* single segment entry */
4216                        if (dp->header.flag & BT_LEAF)
4217                                dlh->next = -1;
4218                        else
4219                                dih->next = -1;
4220                } else
4221                        /* multi-segment entry */
4222                        d->next = -1;
4223        }                       /* end for */
4224
4225        /* close current linelock */
4226        slv->length = ns;
4227        sdtlck->index++;
4228        *sdtlock = sdtlck;
4229
4230        dlv->length = nd;
4231        ddtlck->index++;
4232        *ddtlock = ddtlck;
4233
4234        /* update source header */
4235        sp->header.freelist = sfsi;
4236        sp->header.freecnt += nd;
4237
4238        /* update destination header */
4239        dp->header.nextindex = di;
4240
4241        dp->header.freelist = dsi;
4242        dp->header.freecnt -= nd;
4243}
4244
4245
4246/*
4247 *      dtDeleteEntry()
4248 *
4249 * function: free a (leaf/internal) entry
4250 *
4251 * log freelist header, stbl, and each segment slot of entry
4252 * (even though last/only segment next field is modified,
4253 * physical image logging requires all segment slots of
4254 * the entry logged to avoid applying previous updates
4255 * to the same slots)
4256 */
4257static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
4258{
4259        int fsi;                /* free entry slot index */
4260        s8 *stbl;
4261        struct dtslot *t;
4262        int si, freecnt;
4263        struct dt_lock *dtlck = *dtlock;
4264        struct lv *lv;
4265        int xsi, n;
4266
4267        /* get free entry slot index */
4268        stbl = DT_GETSTBL(p);
4269        fsi = stbl[fi];
4270
4271        /* open new linelock */
4272        if (dtlck->index >= dtlck->maxcnt)
4273                dtlck = (struct dt_lock *) txLinelock(dtlck);
4274        lv = & dtlck->lv[dtlck->index];
4275
4276        lv->offset = fsi;
4277
4278        /* get the head/only segment */
4279        t = &p->slot[fsi];
4280        if (p->header.flag & BT_LEAF)
4281                si = ((struct ldtentry *) t)->next;
4282        else
4283                si = ((struct idtentry *) t)->next;
4284        t->next = si;
4285        t->cnt = 1;
4286
4287        n = freecnt = 1;
4288        xsi = fsi;
4289
4290        /* find the last/only segment */
4291        while (si >= 0) {
4292                /* is next slot contiguous ? */
4293                if (si != xsi + 1) {
4294                        /* close current linelock */
4295                        lv->length = n;
4296                        dtlck->index++;
4297
4298                        /* open new linelock */
4299                        if (dtlck->index < dtlck->maxcnt)
4300                                lv++;
4301                        else {
4302                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4303                                lv = & dtlck->lv[0];
4304                        }
4305
4306                        lv->offset = si;
4307                        n = 0;
4308                }
4309
4310                n++;
4311                xsi = si;
4312                freecnt++;
4313
4314                t = &p->slot[si];
4315                t->cnt = 1;
4316                si = t->next;
4317        }
4318
4319        /* close current linelock */
4320        lv->length = n;
4321        dtlck->index++;
4322
4323        *dtlock = dtlck;
4324
4325        /* update freelist */
4326        t->next = p->header.freelist;
4327        p->header.freelist = fsi;
4328        p->header.freecnt += freecnt;
4329
4330        /* if delete from middle,
4331         * shift left the succedding entries in the stbl
4332         */
4333        si = p->header.nextindex;
4334        if (fi < si - 1)
4335                memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4336
4337        p->header.nextindex--;
4338}
4339
4340
4341/*
4342 *      dtTruncateEntry()
4343 *
4344 * function: truncate a (leaf/internal) entry
4345 *
4346 * log freelist header, stbl, and each segment slot of entry
4347 * (even though last/only segment next field is modified,
4348 * physical image logging requires all segment slots of
4349 * the entry logged to avoid applying previous updates
4350 * to the same slots)
4351 */
4352static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4353{
4354        int tsi;                /* truncate entry slot index */
4355        s8 *stbl;
4356        struct dtslot *t;
4357        int si, freecnt;
4358        struct dt_lock *dtlck = *dtlock;
4359        struct lv *lv;
4360        int fsi, xsi, n;
4361
4362        /* get free entry slot index */
4363        stbl = DT_GETSTBL(p);
4364        tsi = stbl[ti];
4365
4366        /* open new linelock */
4367        if (dtlck->index >= dtlck->maxcnt)
4368                dtlck = (struct dt_lock *) txLinelock(dtlck);
4369        lv = & dtlck->lv[dtlck->index];
4370
4371        lv->offset = tsi;
4372
4373        /* get the head/only segment */
4374        t = &p->slot[tsi];
4375        ASSERT(p->header.flag & BT_INTERNAL);
4376        ((struct idtentry *) t)->namlen = 0;
4377        si = ((struct idtentry *) t)->next;
4378        ((struct idtentry *) t)->next = -1;
4379
4380        n = 1;
4381        freecnt = 0;
4382        fsi = si;
4383        xsi = tsi;
4384
4385        /* find the last/only segment */
4386        while (si >= 0) {
4387                /* is next slot contiguous ? */
4388                if (si != xsi + 1) {
4389                        /* close current linelock */
4390                        lv->length = n;
4391                        dtlck->index++;
4392
4393                        /* open new linelock */
4394                        if (dtlck->index < dtlck->maxcnt)
4395                                lv++;
4396                        else {
4397                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4398                                lv = & dtlck->lv[0];
4399                        }
4400
4401                        lv->offset = si;
4402                        n = 0;
4403                }
4404
4405                n++;
4406                xsi = si;
4407                freecnt++;
4408
4409                t = &p->slot[si];
4410                t->cnt = 1;
4411                si = t->next;
4412        }
4413
4414        /* close current linelock */
4415        lv->length = n;
4416        dtlck->index++;
4417
4418        *dtlock = dtlck;
4419
4420        /* update freelist */
4421        if (freecnt == 0)
4422                return;
4423        t->next = p->header.freelist;
4424        p->header.freelist = fsi;
4425        p->header.freecnt += freecnt;
4426}
4427
4428
4429/*
4430 *      dtLinelockFreelist()
4431 */
4432static void dtLinelockFreelist(dtpage_t * p,    /* directory page */
4433                               int m,   /* max slot index */
4434                               struct dt_lock ** dtlock)
4435{
4436        int fsi;                /* free entry slot index */
4437        struct dtslot *t;
4438        int si;
4439        struct dt_lock *dtlck = *dtlock;
4440        struct lv *lv;
4441        int xsi, n;
4442
4443        /* get free entry slot index */
4444        fsi = p->header.freelist;
4445
4446        /* open new linelock */
4447        if (dtlck->index >= dtlck->maxcnt)
4448                dtlck = (struct dt_lock *) txLinelock(dtlck);
4449        lv = & dtlck->lv[dtlck->index];
4450
4451        lv->offset = fsi;
4452
4453        n = 1;
4454        xsi = fsi;
4455
4456        t = &p->slot[fsi];
4457        si = t->next;
4458
4459        /* find the last/only segment */
4460        while (si < m && si >= 0) {
4461                /* is next slot contiguous ? */
4462                if (si != xsi + 1) {
4463                        /* close current linelock */
4464                        lv->length = n;
4465                        dtlck->index++;
4466
4467                        /* open new linelock */
4468                        if (dtlck->index < dtlck->maxcnt)
4469                                lv++;
4470                        else {
4471                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4472                                lv = & dtlck->lv[0];
4473                        }
4474
4475                        lv->offset = si;
4476                        n = 0;
4477                }
4478
4479                n++;
4480                xsi = si;
4481
4482                t = &p->slot[si];
4483                si = t->next;
4484        }
4485
4486        /* close current linelock */
4487        lv->length = n;
4488        dtlck->index++;
4489
4490        *dtlock = dtlck;
4491}
4492
4493
4494/*
4495 * NAME: dtModify
4496 *
4497 * FUNCTION: Modify the inode number part of a directory entry
4498 *
4499 * PARAMETERS:
4500 *      tid     - Transaction id
4501 *      ip      - Inode of parent directory
4502 *      key     - Name of entry to be modified
4503 *      orig_ino        - Original inode number expected in entry
4504 *      new_ino - New inode number to put into entry
4505 *      flag    - JFS_RENAME
4506 *
4507 * RETURNS:
4508 *      -ESTALE - If entry found does not match orig_ino passed in
4509 *      -ENOENT - If no entry can be found to match key
4510 *      0       - If successfully modified entry
4511 */
4512int dtModify(tid_t tid, struct inode *ip,
4513         struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4514{
4515        int rc;
4516        s64 bn;
4517        struct metapage *mp;
4518        dtpage_t *p;
4519        int index;
4520        struct btstack btstack;
4521        struct tlock *tlck;
4522        struct dt_lock *dtlck;
4523        struct lv *lv;
4524        s8 *stbl;
4525        int entry_si;           /* entry slot index */
4526        struct ldtentry *entry;
4527
4528        /*
4529         *      search for the entry to modify:
4530         *
4531         * dtSearch() returns (leaf page pinned, index at which to modify).
4532         */
4533        if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4534                return rc;
4535
4536        /* retrieve search result */
4537        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4538
4539        BT_MARK_DIRTY(mp, ip);
4540        /*
4541         * acquire a transaction lock on the leaf page of named entry
4542         */
4543        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4544        dtlck = (struct dt_lock *) & tlck->lock;
4545
4546        /* get slot index of the entry */
4547        stbl = DT_GETSTBL(p);
4548        entry_si = stbl[index];
4549
4550        /* linelock entry */
4551        ASSERT(dtlck->index == 0);
4552        lv = & dtlck->lv[0];
4553        lv->offset = entry_si;
4554        lv->length = 1;
4555        dtlck->index++;
4556
4557        /* get the head/only segment */
4558        entry = (struct ldtentry *) & p->slot[entry_si];
4559
4560        /* substitute the inode number of the entry */
4561        entry->inumber = cpu_to_le32(new_ino);
4562
4563        /* unpin the leaf page */
4564        DT_PUTPAGE(mp);
4565
4566        return 0;
4567}
4568