linux/fs/jfs/jfs_dtree.c
<<
>>
Prefs
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *
   4 *   This program is free software;  you can redistribute it and/or modify
   5 *   it under the terms of the GNU General Public License as published by
   6 *   the Free Software Foundation; either version 2 of the License, or
   7 *   (at your option) any later version.
   8 *
   9 *   This program is distributed in the hope that it will be useful,
  10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  12 *   the GNU General Public License for more details.
  13 *
  14 *   You should have received a copy of the GNU General Public License
  15 *   along with this program;  if not, write to the Free Software
  16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 */
  18
  19/*
  20 *      jfs_dtree.c: directory B+-tree manager
  21 *
  22 * B+-tree with variable length key directory:
  23 *
  24 * each directory page is structured as an array of 32-byte
  25 * directory entry slots initialized as a freelist
  26 * to avoid search/compaction of free space at insertion.
  27 * when an entry is inserted, a number of slots are allocated
  28 * from the freelist as required to store variable length data
  29 * of the entry; when the entry is deleted, slots of the entry
  30 * are returned to freelist.
  31 *
  32 * leaf entry stores full name as key and file serial number
  33 * (aka inode number) as data.
  34 * internal/router entry stores sufffix compressed name
  35 * as key and simple extent descriptor as data.
  36 *
  37 * each directory page maintains a sorted entry index table
  38 * which stores the start slot index of sorted entries
  39 * to allow binary search on the table.
  40 *
  41 * directory starts as a root/leaf page in on-disk inode
  42 * inline data area.
  43 * when it becomes full, it starts a leaf of a external extent
  44 * of length of 1 block. each time the first leaf becomes full,
  45 * it is extended rather than split (its size is doubled),
  46 * until its length becoms 4 KBytes, from then the extent is split
  47 * with new 4 Kbyte extent when it becomes full
  48 * to reduce external fragmentation of small directories.
  49 *
  50 * blah, blah, blah, for linear scan of directory in pieces by
  51 * readdir().
  52 *
  53 *
  54 *      case-insensitive directory file system
  55 *
  56 * names are stored in case-sensitive way in leaf entry.
  57 * but stored, searched and compared in case-insensitive (uppercase) order
  58 * (i.e., both search key and entry key are folded for search/compare):
  59 * (note that case-sensitive order is BROKEN in storage, e.g.,
  60 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
  61 *
  62 *  entries which folds to the same key makes up a equivalent class
  63 *  whose members are stored as contiguous cluster (may cross page boundary)
  64 *  but whose order is arbitrary and acts as duplicate, e.g.,
  65 *  abc, Abc, aBc, abC)
  66 *
  67 * once match is found at leaf, requires scan forward/backward
  68 * either for, in case-insensitive search, duplicate
  69 * or for, in case-sensitive search, for exact match
  70 *
  71 * router entry must be created/stored in case-insensitive way
  72 * in internal entry:
  73 * (right most key of left page and left most key of right page
  74 * are folded, and its suffix compression is propagated as router
  75 * key in parent)
  76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
  77 * should be made the router key for the split)
  78 *
  79 * case-insensitive search:
  80 *
  81 *      fold search key;
  82 *
  83 *      case-insensitive search of B-tree:
  84 *      for internal entry, router key is already folded;
  85 *      for leaf entry, fold the entry key before comparison.
  86 *
  87 *      if (leaf entry case-insensitive match found)
  88 *              if (next entry satisfies case-insensitive match)
  89 *                      return EDUPLICATE;
  90 *              if (prev entry satisfies case-insensitive match)
  91 *                      return EDUPLICATE;
  92 *              return match;
  93 *      else
  94 *              return no match;
  95 *
  96 *      serialization:
  97 * target directory inode lock is being held on entry/exit
  98 * of all main directory service routines.
  99 *
 100 *      log based recovery:
 101 */
 102
 103#include <linux/fs.h>
 104#include <linux/quotaops.h>
 105#include "jfs_incore.h"
 106#include "jfs_superblock.h"
 107#include "jfs_filsys.h"
 108#include "jfs_metapage.h"
 109#include "jfs_dmap.h"
 110#include "jfs_unicode.h"
 111#include "jfs_debug.h"
 112
 113/* dtree split parameter */
 114struct dtsplit {
 115        struct metapage *mp;
 116        s16 index;
 117        s16 nslot;
 118        struct component_name *key;
 119        ddata_t *data;
 120        struct pxdlist *pxdlist;
 121};
 122
 123#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
 124
 125/* get page buffer for specified block address */
 126#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
 127{\
 128        BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\
 129        if (!(RC))\
 130        {\
 131                if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\
 132                    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\
 133                {\
 134                        BT_PUTPAGE(MP);\
 135                        jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\
 136                        MP = NULL;\
 137                        RC = -EIO;\
 138                }\
 139        }\
 140}
 141
 142/* for consistency */
 143#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
 144
 145#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
 146        BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
 147
 148/*
 149 * forward references
 150 */
 151static int dtSplitUp(tid_t tid, struct inode *ip,
 152                     struct dtsplit * split, struct btstack * btstack);
 153
 154static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 155                       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
 156
 157static int dtExtendPage(tid_t tid, struct inode *ip,
 158                        struct dtsplit * split, struct btstack * btstack);
 159
 160static int dtSplitRoot(tid_t tid, struct inode *ip,
 161                       struct dtsplit * split, struct metapage ** rmpp);
 162
 163static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 164                      dtpage_t * fp, struct btstack * btstack);
 165
 166static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
 167
 168static int dtReadFirst(struct inode *ip, struct btstack * btstack);
 169
 170static int dtReadNext(struct inode *ip,
 171                      loff_t * offset, struct btstack * btstack);
 172
 173static int dtCompare(struct component_name * key, dtpage_t * p, int si);
 174
 175static int ciCompare(struct component_name * key, dtpage_t * p, int si,
 176                     int flag);
 177
 178static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
 179                     int flag);
 180
 181static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
 182                              int ri, struct component_name * key, int flag);
 183
 184static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
 185                          ddata_t * data, struct dt_lock **);
 186
 187static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
 188                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
 189                        int do_index);
 190
 191static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
 192
 193static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
 194
 195static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
 196
 197#define ciToUpper(c)    UniStrupr((c)->name)
 198
 199/*
 200 *      read_index_page()
 201 *
 202 *      Reads a page of a directory's index table.
 203 *      Having metadata mapped into the directory inode's address space
 204 *      presents a multitude of problems.  We avoid this by mapping to
 205 *      the absolute address space outside of the *_metapage routines
 206 */
 207static struct metapage *read_index_page(struct inode *inode, s64 blkno)
 208{
 209        int rc;
 210        s64 xaddr;
 211        int xflag;
 212        s32 xlen;
 213
 214        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 215        if (rc || (xaddr == 0))
 216                return NULL;
 217
 218        return read_metapage(inode, xaddr, PSIZE, 1);
 219}
 220
 221/*
 222 *      get_index_page()
 223 *
 224 *      Same as get_index_page(), but get's a new page without reading
 225 */
 226static struct metapage *get_index_page(struct inode *inode, s64 blkno)
 227{
 228        int rc;
 229        s64 xaddr;
 230        int xflag;
 231        s32 xlen;
 232
 233        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 234        if (rc || (xaddr == 0))
 235                return NULL;
 236
 237        return get_metapage(inode, xaddr, PSIZE, 1);
 238}
 239
 240/*
 241 *      find_index()
 242 *
 243 *      Returns dtree page containing directory table entry for specified
 244 *      index and pointer to its entry.
 245 *
 246 *      mp must be released by caller.
 247 */
 248static struct dir_table_slot *find_index(struct inode *ip, u32 index,
 249                                         struct metapage ** mp, s64 *lblock)
 250{
 251        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 252        s64 blkno;
 253        s64 offset;
 254        int page_offset;
 255        struct dir_table_slot *slot;
 256        static int maxWarnings = 10;
 257
 258        if (index < 2) {
 259                if (maxWarnings) {
 260                        jfs_warn("find_entry called with index = %d", index);
 261                        maxWarnings--;
 262                }
 263                return NULL;
 264        }
 265
 266        if (index >= jfs_ip->next_index) {
 267                jfs_warn("find_entry called with index >= next_index");
 268                return NULL;
 269        }
 270
 271        if (jfs_dirtable_inline(ip)) {
 272                /*
 273                 * Inline directory table
 274                 */
 275                *mp = NULL;
 276                slot = &jfs_ip->i_dirtable[index - 2];
 277        } else {
 278                offset = (index - 2) * sizeof(struct dir_table_slot);
 279                page_offset = offset & (PSIZE - 1);
 280                blkno = ((offset + 1) >> L2PSIZE) <<
 281                    JFS_SBI(ip->i_sb)->l2nbperpage;
 282
 283                if (*mp && (*lblock != blkno)) {
 284                        release_metapage(*mp);
 285                        *mp = NULL;
 286                }
 287                if (*mp == 0) {
 288                        *lblock = blkno;
 289                        *mp = read_index_page(ip, blkno);
 290                }
 291                if (*mp == 0) {
 292                        jfs_err("free_index: error reading directory table");
 293                        return NULL;
 294                }
 295
 296                slot =
 297                    (struct dir_table_slot *) ((char *) (*mp)->data +
 298                                               page_offset);
 299        }
 300        return slot;
 301}
 302
 303static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
 304                              u32 index)
 305{
 306        struct tlock *tlck;
 307        struct linelock *llck;
 308        struct lv *lv;
 309
 310        tlck = txLock(tid, ip, mp, tlckDATA);
 311        llck = (struct linelock *) tlck->lock;
 312
 313        if (llck->index >= llck->maxcnt)
 314                llck = txLinelock(llck);
 315        lv = &llck->lv[llck->index];
 316
 317        /*
 318         *      Linelock slot size is twice the size of directory table
 319         *      slot size.  512 entries per page.
 320         */
 321        lv->offset = ((index - 2) & 511) >> 1;
 322        lv->length = 1;
 323        llck->index++;
 324}
 325
 326/*
 327 *      add_index()
 328 *
 329 *      Adds an entry to the directory index table.  This is used to provide
 330 *      each directory entry with a persistent index in which to resume
 331 *      directory traversals
 332 */
 333static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 334{
 335        struct super_block *sb = ip->i_sb;
 336        struct jfs_sb_info *sbi = JFS_SBI(sb);
 337        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 338        u64 blkno;
 339        struct dir_table_slot *dirtab_slot;
 340        u32 index;
 341        struct linelock *llck;
 342        struct lv *lv;
 343        struct metapage *mp;
 344        s64 offset;
 345        uint page_offset;
 346        struct tlock *tlck;
 347        s64 xaddr;
 348
 349        ASSERT(DO_INDEX(ip));
 350
 351        if (jfs_ip->next_index < 2) {
 352                jfs_warn("add_index: next_index = %d.  Resetting!",
 353                           jfs_ip->next_index);
 354                jfs_ip->next_index = 2;
 355        }
 356
 357        index = jfs_ip->next_index++;
 358
 359        if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
 360                /*
 361                 * i_size reflects size of index table, or 8 bytes per entry.
 362                 */
 363                ip->i_size = (loff_t) (index - 1) << 3;
 364
 365                /*
 366                 * dir table fits inline within inode
 367                 */
 368                dirtab_slot = &jfs_ip->i_dirtable[index-2];
 369                dirtab_slot->flag = DIR_INDEX_VALID;
 370                dirtab_slot->slot = slot;
 371                DTSaddress(dirtab_slot, bn);
 372
 373                set_cflag(COMMIT_Dirtable, ip);
 374
 375                return index;
 376        }
 377        if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
 378                struct dir_table_slot temp_table[12];
 379
 380                /*
 381                 * It's time to move the inline table to an external
 382                 * page and begin to build the xtree
 383                 */
 384                if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage))
 385                        goto clean_up;
 386                if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
 387                        DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
 388                        goto clean_up;
 389                }
 390
 391                /*
 392                 * Save the table, we're going to overwrite it with the
 393                 * xtree root
 394                 */
 395                memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
 396
 397                /*
 398                 * Initialize empty x-tree
 399                 */
 400                xtInitRoot(tid, ip);
 401
 402                /*
 403                 * Add the first block to the xtree
 404                 */
 405                if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
 406                        /* This really shouldn't fail */
 407                        jfs_warn("add_index: xtInsert failed!");
 408                        memcpy(&jfs_ip->i_dirtable, temp_table,
 409                               sizeof (temp_table));
 410                        dbFree(ip, xaddr, sbi->nbperpage);
 411                        DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
 412                        goto clean_up;
 413                }
 414                ip->i_size = PSIZE;
 415
 416                if ((mp = get_index_page(ip, 0)) == 0) {
 417                        jfs_err("add_index: get_metapage failed!");
 418                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
 419                        memcpy(&jfs_ip->i_dirtable, temp_table,
 420                               sizeof (temp_table));
 421                        goto clean_up;
 422                }
 423                tlck = txLock(tid, ip, mp, tlckDATA);
 424                llck = (struct linelock *) & tlck->lock;
 425                ASSERT(llck->index == 0);
 426                lv = &llck->lv[0];
 427
 428                lv->offset = 0;
 429                lv->length = 6; /* tlckDATA slot size is 16 bytes */
 430                llck->index++;
 431
 432                memcpy(mp->data, temp_table, sizeof(temp_table));
 433
 434                mark_metapage_dirty(mp);
 435                release_metapage(mp);
 436
 437                /*
 438                 * Logging is now directed by xtree tlocks
 439                 */
 440                clear_cflag(COMMIT_Dirtable, ip);
 441        }
 442
 443        offset = (index - 2) * sizeof(struct dir_table_slot);
 444        page_offset = offset & (PSIZE - 1);
 445        blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
 446        if (page_offset == 0) {
 447                /*
 448                 * This will be the beginning of a new page
 449                 */
 450                xaddr = 0;
 451                if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
 452                        jfs_warn("add_index: xtInsert failed!");
 453                        goto clean_up;
 454                }
 455                ip->i_size += PSIZE;
 456
 457                if ((mp = get_index_page(ip, blkno)))
 458                        memset(mp->data, 0, PSIZE);     /* Just looks better */
 459                else
 460                        xtTruncate(tid, ip, offset, COMMIT_PWMAP);
 461        } else
 462                mp = read_index_page(ip, blkno);
 463
 464        if (mp == 0) {
 465                jfs_err("add_index: get/read_metapage failed!");
 466                goto clean_up;
 467        }
 468
 469        lock_index(tid, ip, mp, index);
 470
 471        dirtab_slot =
 472            (struct dir_table_slot *) ((char *) mp->data + page_offset);
 473        dirtab_slot->flag = DIR_INDEX_VALID;
 474        dirtab_slot->slot = slot;
 475        DTSaddress(dirtab_slot, bn);
 476
 477        mark_metapage_dirty(mp);
 478        release_metapage(mp);
 479
 480        return index;
 481
 482      clean_up:
 483
 484        jfs_ip->next_index--;
 485
 486        return 0;
 487}
 488
 489/*
 490 *      free_index()
 491 *
 492 *      Marks an entry to the directory index table as free.
 493 */
 494static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
 495{
 496        struct dir_table_slot *dirtab_slot;
 497        s64 lblock;
 498        struct metapage *mp = NULL;
 499
 500        dirtab_slot = find_index(ip, index, &mp, &lblock);
 501
 502        if (dirtab_slot == 0)
 503                return;
 504
 505        dirtab_slot->flag = DIR_INDEX_FREE;
 506        dirtab_slot->slot = dirtab_slot->addr1 = 0;
 507        dirtab_slot->addr2 = cpu_to_le32(next);
 508
 509        if (mp) {
 510                lock_index(tid, ip, mp, index);
 511                mark_metapage_dirty(mp);
 512                release_metapage(mp);
 513        } else
 514                set_cflag(COMMIT_Dirtable, ip);
 515}
 516
 517/*
 518 *      modify_index()
 519 *
 520 *      Changes an entry in the directory index table
 521 */
 522static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
 523                         int slot, struct metapage ** mp, s64 *lblock)
 524{
 525        struct dir_table_slot *dirtab_slot;
 526
 527        dirtab_slot = find_index(ip, index, mp, lblock);
 528
 529        if (dirtab_slot == 0)
 530                return;
 531
 532        DTSaddress(dirtab_slot, bn);
 533        dirtab_slot->slot = slot;
 534
 535        if (*mp) {
 536                lock_index(tid, ip, *mp, index);
 537                mark_metapage_dirty(*mp);
 538        } else
 539                set_cflag(COMMIT_Dirtable, ip);
 540}
 541
 542/*
 543 *      read_index()
 544 *
 545 *      reads a directory table slot
 546 */
 547static int read_index(struct inode *ip, u32 index,
 548                     struct dir_table_slot * dirtab_slot)
 549{
 550        s64 lblock;
 551        struct metapage *mp = NULL;
 552        struct dir_table_slot *slot;
 553
 554        slot = find_index(ip, index, &mp, &lblock);
 555        if (slot == 0) {
 556                return -EIO;
 557        }
 558
 559        memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
 560
 561        if (mp)
 562                release_metapage(mp);
 563
 564        return 0;
 565}
 566
 567/*
 568 *      dtSearch()
 569 *
 570 * function:
 571 *      Search for the entry with specified key
 572 *
 573 * parameter:
 574 *
 575 * return: 0 - search result on stack, leaf page pinned;
 576 *         errno - I/O error
 577 */
 578int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
 579             struct btstack * btstack, int flag)
 580{
 581        int rc = 0;
 582        int cmp = 1;            /* init for empty page */
 583        s64 bn;
 584        struct metapage *mp;
 585        dtpage_t *p;
 586        s8 *stbl;
 587        int base, index, lim;
 588        struct btframe *btsp;
 589        pxd_t *pxd;
 590        int psize = 288;        /* initial in-line directory */
 591        ino_t inumber;
 592        struct component_name ciKey;
 593        struct super_block *sb = ip->i_sb;
 594
 595        ciKey.name =
 596            (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
 597                                GFP_NOFS);
 598        if (ciKey.name == 0) {
 599                rc = -ENOMEM;
 600                goto dtSearch_Exit2;
 601        }
 602
 603
 604        /* uppercase search key for c-i directory */
 605        UniStrcpy(ciKey.name, key->name);
 606        ciKey.namlen = key->namlen;
 607
 608        /* only uppercase if case-insensitive support is on */
 609        if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
 610                ciToUpper(&ciKey);
 611        }
 612        BT_CLR(btstack);        /* reset stack */
 613
 614        /* init level count for max pages to split */
 615        btstack->nsplit = 1;
 616
 617        /*
 618         *      search down tree from root:
 619         *
 620         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 621         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 622         *
 623         * if entry with search key K is not found
 624         * internal page search find the entry with largest key Ki
 625         * less than K which point to the child page to search;
 626         * leaf page search find the entry with smallest key Kj
 627         * greater than K so that the returned index is the position of
 628         * the entry to be shifted right for insertion of new entry.
 629         * for empty tree, search key is greater than any key of the tree.
 630         *
 631         * by convention, root bn = 0.
 632         */
 633        for (bn = 0;;) {
 634                /* get/pin the page to search */
 635                DT_GETPAGE(ip, bn, mp, psize, p, rc);
 636                if (rc)
 637                        goto dtSearch_Exit1;
 638
 639                /* get sorted entry table of the page */
 640                stbl = DT_GETSTBL(p);
 641
 642                /*
 643                 * binary search with search key K on the current page.
 644                 */
 645                for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
 646                        index = base + (lim >> 1);
 647
 648                        if (p->header.flag & BT_LEAF) {
 649                                /* uppercase leaf name to compare */
 650                                cmp =
 651                                    ciCompare(&ciKey, p, stbl[index],
 652                                              JFS_SBI(sb)->mntflag);
 653                        } else {
 654                                /* router key is in uppercase */
 655
 656                                cmp = dtCompare(&ciKey, p, stbl[index]);
 657
 658
 659                        }
 660                        if (cmp == 0) {
 661                                /*
 662                                 *      search hit
 663                                 */
 664                                /* search hit - leaf page:
 665                                 * return the entry found
 666                                 */
 667                                if (p->header.flag & BT_LEAF) {
 668                                        inumber = le32_to_cpu(
 669                        ((struct ldtentry *) & p->slot[stbl[index]])->inumber);
 670
 671                                        /*
 672                                         * search for JFS_LOOKUP
 673                                         */
 674                                        if (flag == JFS_LOOKUP) {
 675                                                *data = inumber;
 676                                                rc = 0;
 677                                                goto out;
 678                                        }
 679
 680                                        /*
 681                                         * search for JFS_CREATE
 682                                         */
 683                                        if (flag == JFS_CREATE) {
 684                                                *data = inumber;
 685                                                rc = -EEXIST;
 686                                                goto out;
 687                                        }
 688
 689                                        /*
 690                                         * search for JFS_REMOVE or JFS_RENAME
 691                                         */
 692                                        if ((flag == JFS_REMOVE ||
 693                                             flag == JFS_RENAME) &&
 694                                            *data != inumber) {
 695                                                rc = -ESTALE;
 696                                                goto out;
 697                                        }
 698
 699                                        /*
 700                                         * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
 701                                         */
 702                                        /* save search result */
 703                                        *data = inumber;
 704                                        btsp = btstack->top;
 705                                        btsp->bn = bn;
 706                                        btsp->index = index;
 707                                        btsp->mp = mp;
 708
 709                                        rc = 0;
 710                                        goto dtSearch_Exit1;
 711                                }
 712
 713                                /* search hit - internal page:
 714                                 * descend/search its child page
 715                                 */
 716                                goto getChild;
 717                        }
 718
 719                        if (cmp > 0) {
 720                                base = index + 1;
 721                                --lim;
 722                        }
 723                }
 724
 725                /*
 726                 *      search miss
 727                 *
 728                 * base is the smallest index with key (Kj) greater than
 729                 * search key (K) and may be zero or (maxindex + 1) index.
 730                 */
 731                /*
 732                 * search miss - leaf page
 733                 *
 734                 * return location of entry (base) where new entry with
 735                 * search key K is to be inserted.
 736                 */
 737                if (p->header.flag & BT_LEAF) {
 738                        /*
 739                         * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
 740                         */
 741                        if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
 742                            flag == JFS_RENAME) {
 743                                rc = -ENOENT;
 744                                goto out;
 745                        }
 746
 747                        /*
 748                         * search for JFS_CREATE|JFS_FINDDIR:
 749                         *
 750                         * save search result
 751                         */
 752                        *data = 0;
 753                        btsp = btstack->top;
 754                        btsp->bn = bn;
 755                        btsp->index = base;
 756                        btsp->mp = mp;
 757
 758                        rc = 0;
 759                        goto dtSearch_Exit1;
 760                }
 761
 762                /*
 763                 * search miss - internal page
 764                 *
 765                 * if base is non-zero, decrement base by one to get the parent
 766                 * entry of the child page to search.
 767                 */
 768                index = base ? base - 1 : base;
 769
 770                /*
 771                 * go down to child page
 772                 */
 773              getChild:
 774                /* update max. number of pages to split */
 775                if (BT_STACK_FULL(btstack)) {
 776                        /* Something's corrupted, mark filesystem dirty so
 777                         * chkdsk will fix it.
 778                         */
 779                        jfs_error(sb, "stack overrun in dtSearch!");
 780                        BT_STACK_DUMP(btstack);
 781                        rc = -EIO;
 782                        goto out;
 783                }
 784                btstack->nsplit++;
 785
 786                /* push (bn, index) of the parent page/entry */
 787                BT_PUSH(btstack, bn, index);
 788
 789                /* get the child page block number */
 790                pxd = (pxd_t *) & p->slot[stbl[index]];
 791                bn = addressPXD(pxd);
 792                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
 793
 794                /* unpin the parent page */
 795                DT_PUTPAGE(mp);
 796        }
 797
 798      out:
 799        DT_PUTPAGE(mp);
 800
 801      dtSearch_Exit1:
 802
 803        kfree(ciKey.name);
 804
 805      dtSearch_Exit2:
 806
 807        return rc;
 808}
 809
 810
 811/*
 812 *      dtInsert()
 813 *
 814 * function: insert an entry to directory tree
 815 *
 816 * parameter:
 817 *
 818 * return: 0 - success;
 819 *         errno - failure;
 820 */
 821int dtInsert(tid_t tid, struct inode *ip,
 822         struct component_name * name, ino_t * fsn, struct btstack * btstack)
 823{
 824        int rc = 0;
 825        struct metapage *mp;    /* meta-page buffer */
 826        dtpage_t *p;            /* base B+-tree index page */
 827        s64 bn;
 828        int index;
 829        struct dtsplit split;   /* split information */
 830        ddata_t data;
 831        struct dt_lock *dtlck;
 832        int n;
 833        struct tlock *tlck;
 834        struct lv *lv;
 835
 836        /*
 837         *      retrieve search result
 838         *
 839         * dtSearch() returns (leaf page pinned, index at which to insert).
 840         * n.b. dtSearch() may return index of (maxindex + 1) of
 841         * the full page.
 842         */
 843        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
 844
 845        /*
 846         *      insert entry for new key
 847         */
 848        if (DO_INDEX(ip)) {
 849                if (JFS_IP(ip)->next_index == DIREND) {
 850                        DT_PUTPAGE(mp);
 851                        return -EMLINK;
 852                }
 853                n = NDTLEAF(name->namlen);
 854                data.leaf.tid = tid;
 855                data.leaf.ip = ip;
 856        } else {
 857                n = NDTLEAF_LEGACY(name->namlen);
 858                data.leaf.ip = NULL;    /* signifies legacy directory format */
 859        }
 860        data.leaf.ino = *fsn;
 861
 862        /*
 863         *      leaf page does not have enough room for new entry:
 864         *
 865         *      extend/split the leaf page;
 866         *
 867         * dtSplitUp() will insert the entry and unpin the leaf page.
 868         */
 869        if (n > p->header.freecnt) {
 870                split.mp = mp;
 871                split.index = index;
 872                split.nslot = n;
 873                split.key = name;
 874                split.data = &data;
 875                rc = dtSplitUp(tid, ip, &split, btstack);
 876                return rc;
 877        }
 878
 879        /*
 880         *      leaf page does have enough room for new entry:
 881         *
 882         *      insert the new data entry into the leaf page;
 883         */
 884        BT_MARK_DIRTY(mp, ip);
 885        /*
 886         * acquire a transaction lock on the leaf page
 887         */
 888        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
 889        dtlck = (struct dt_lock *) & tlck->lock;
 890        ASSERT(dtlck->index == 0);
 891        lv = & dtlck->lv[0];
 892
 893        /* linelock header */
 894        lv->offset = 0;
 895        lv->length = 1;
 896        dtlck->index++;
 897
 898        dtInsertEntry(p, index, name, &data, &dtlck);
 899
 900        /* linelock stbl of non-root leaf page */
 901        if (!(p->header.flag & BT_ROOT)) {
 902                if (dtlck->index >= dtlck->maxcnt)
 903                        dtlck = (struct dt_lock *) txLinelock(dtlck);
 904                lv = & dtlck->lv[dtlck->index];
 905                n = index >> L2DTSLOTSIZE;
 906                lv->offset = p->header.stblindex + n;
 907                lv->length =
 908                    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
 909                dtlck->index++;
 910        }
 911
 912        /* unpin the leaf page */
 913        DT_PUTPAGE(mp);
 914
 915        return 0;
 916}
 917
 918
 919/*
 920 *      dtSplitUp()
 921 *
 922 * function: propagate insertion bottom up;
 923 *
 924 * parameter:
 925 *
 926 * return: 0 - success;
 927 *         errno - failure;
 928 *      leaf page unpinned;
 929 */
 930static int dtSplitUp(tid_t tid,
 931          struct inode *ip, struct dtsplit * split, struct btstack * btstack)
 932{
 933        struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 934        int rc = 0;
 935        struct metapage *smp;
 936        dtpage_t *sp;           /* split page */
 937        struct metapage *rmp;
 938        dtpage_t *rp;           /* new right page split from sp */
 939        pxd_t rpxd;             /* new right page extent descriptor */
 940        struct metapage *lmp;
 941        dtpage_t *lp;           /* left child page */
 942        int skip;               /* index of entry of insertion */
 943        struct btframe *parent; /* parent page entry on traverse stack */
 944        s64 xaddr, nxaddr;
 945        int xlen, xsize;
 946        struct pxdlist pxdlist;
 947        pxd_t *pxd;
 948        struct component_name key = { 0, NULL };
 949        ddata_t *data = split->data;
 950        int n;
 951        struct dt_lock *dtlck;
 952        struct tlock *tlck;
 953        struct lv *lv;
 954        int quota_allocation = 0;
 955
 956        /* get split page */
 957        smp = split->mp;
 958        sp = DT_PAGE(ip, smp);
 959
 960        key.name =
 961            (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t),
 962                                GFP_NOFS);
 963        if (key.name == 0) {
 964                DT_PUTPAGE(smp);
 965                rc = -ENOMEM;
 966                goto dtSplitUp_Exit;
 967        }
 968
 969        /*
 970         *      split leaf page
 971         *
 972         * The split routines insert the new entry, and
 973         * acquire txLock as appropriate.
 974         */
 975        /*
 976         *      split root leaf page:
 977         */
 978        if (sp->header.flag & BT_ROOT) {
 979                /*
 980                 * allocate a single extent child page
 981                 */
 982                xlen = 1;
 983                n = sbi->bsize >> L2DTSLOTSIZE;
 984                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
 985                n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
 986                if (n <= split->nslot)
 987                        xlen++;
 988                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
 989                        DT_PUTPAGE(smp);
 990                        goto freeKeyName;
 991                }
 992
 993                pxdlist.maxnpxd = 1;
 994                pxdlist.npxd = 0;
 995                pxd = &pxdlist.pxd[0];
 996                PXDaddress(pxd, xaddr);
 997                PXDlength(pxd, xlen);
 998                split->pxdlist = &pxdlist;
 999                rc = dtSplitRoot(tid, ip, split, &rmp);
1000
1001                if (rc)
1002                        dbFree(ip, xaddr, xlen);
1003                else
1004                        DT_PUTPAGE(rmp);
1005
1006                DT_PUTPAGE(smp);
1007
1008                if (!DO_INDEX(ip))
1009                        ip->i_size = xlen << sbi->l2bsize;
1010
1011                goto freeKeyName;
1012        }
1013
1014        /*
1015         *      extend first leaf page
1016         *
1017         * extend the 1st extent if less than buffer page size
1018         * (dtExtendPage() reurns leaf page unpinned)
1019         */
1020        pxd = &sp->header.self;
1021        xlen = lengthPXD(pxd);
1022        xsize = xlen << sbi->l2bsize;
1023        if (xsize < PSIZE) {
1024                xaddr = addressPXD(pxd);
1025                n = xsize >> L2DTSLOTSIZE;
1026                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
1027                if ((n + sp->header.freecnt) <= split->nslot)
1028                        n = xlen + (xlen << 1);
1029                else
1030                        n = xlen;
1031
1032                /* Allocate blocks to quota. */
1033                if (DQUOT_ALLOC_BLOCK(ip, n)) {
1034                        rc = -EDQUOT;
1035                        goto extendOut;
1036                }
1037                quota_allocation += n;
1038
1039                if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1040                                    (s64) n, &nxaddr)))
1041                        goto extendOut;
1042
1043                pxdlist.maxnpxd = 1;
1044                pxdlist.npxd = 0;
1045                pxd = &pxdlist.pxd[0];
1046                PXDaddress(pxd, nxaddr)
1047                    PXDlength(pxd, xlen + n);
1048                split->pxdlist = &pxdlist;
1049                if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1050                        nxaddr = addressPXD(pxd);
1051                        if (xaddr != nxaddr) {
1052                                /* free relocated extent */
1053                                xlen = lengthPXD(pxd);
1054                                dbFree(ip, nxaddr, (s64) xlen);
1055                        } else {
1056                                /* free extended delta */
1057                                xlen = lengthPXD(pxd) - n;
1058                                xaddr = addressPXD(pxd) + xlen;
1059                                dbFree(ip, xaddr, (s64) n);
1060                        }
1061                } else if (!DO_INDEX(ip))
1062                        ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
1063
1064
1065              extendOut:
1066                DT_PUTPAGE(smp);
1067                goto freeKeyName;
1068        }
1069
1070        /*
1071         *      split leaf page <sp> into <sp> and a new right page <rp>.
1072         *
1073         * return <rp> pinned and its extent descriptor <rpxd>
1074         */
1075        /*
1076         * allocate new directory page extent and
1077         * new index page(s) to cover page split(s)
1078         *
1079         * allocation hint: ?
1080         */
1081        n = btstack->nsplit;
1082        pxdlist.maxnpxd = pxdlist.npxd = 0;
1083        xlen = sbi->nbperpage;
1084        for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1085                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1086                        PXDaddress(pxd, xaddr);
1087                        PXDlength(pxd, xlen);
1088                        pxdlist.maxnpxd++;
1089                        continue;
1090                }
1091
1092                DT_PUTPAGE(smp);
1093
1094                /* undo allocation */
1095                goto splitOut;
1096        }
1097
1098        split->pxdlist = &pxdlist;
1099        if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1100                DT_PUTPAGE(smp);
1101
1102                /* undo allocation */
1103                goto splitOut;
1104        }
1105
1106        if (!DO_INDEX(ip))
1107                ip->i_size += PSIZE;
1108
1109        /*
1110         * propagate up the router entry for the leaf page just split
1111         *
1112         * insert a router entry for the new page into the parent page,
1113         * propagate the insert/split up the tree by walking back the stack
1114         * of (bn of parent page, index of child page entry in parent page)
1115         * that were traversed during the search for the page that split.
1116         *
1117         * the propagation of insert/split up the tree stops if the root
1118         * splits or the page inserted into doesn't have to split to hold
1119         * the new entry.
1120         *
1121         * the parent entry for the split page remains the same, and
1122         * a new entry is inserted at its right with the first key and
1123         * block number of the new right page.
1124         *
1125         * There are a maximum of 4 pages pinned at any time:
1126         * two children, left parent and right parent (when the parent splits).
1127         * keep the child pages pinned while working on the parent.
1128         * make sure that all pins are released at exit.
1129         */
1130        while ((parent = BT_POP(btstack)) != NULL) {
1131                /* parent page specified by stack frame <parent> */
1132
1133                /* keep current child pages (<lp>, <rp>) pinned */
1134                lmp = smp;
1135                lp = sp;
1136
1137                /*
1138                 * insert router entry in parent for new right child page <rp>
1139                 */
1140                /* get the parent page <sp> */
1141                DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1142                if (rc) {
1143                        DT_PUTPAGE(lmp);
1144                        DT_PUTPAGE(rmp);
1145                        goto splitOut;
1146                }
1147
1148                /*
1149                 * The new key entry goes ONE AFTER the index of parent entry,
1150                 * because the split was to the right.
1151                 */
1152                skip = parent->index + 1;
1153
1154                /*
1155                 * compute the key for the router entry
1156                 *
1157                 * key suffix compression:
1158                 * for internal pages that have leaf pages as children,
1159                 * retain only what's needed to distinguish between
1160                 * the new entry and the entry on the page to its left.
1161                 * If the keys compare equal, retain the entire key.
1162                 *
1163                 * note that compression is performed only at computing
1164                 * router key at the lowest internal level.
1165                 * further compression of the key between pairs of higher
1166                 * level internal pages loses too much information and
1167                 * the search may fail.
1168                 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1169                 * results in two adjacent parent entries (a)(xx).
1170                 * if split occurs between these two entries, and
1171                 * if compression is applied, the router key of parent entry
1172                 * of right page (x) will divert search for x into right
1173                 * subtree and miss x in the left subtree.)
1174                 *
1175                 * the entire key must be retained for the next-to-leftmost
1176                 * internal key at any level of the tree, or search may fail
1177                 * (e.g., ?)
1178                 */
1179                switch (rp->header.flag & BT_TYPE) {
1180                case BT_LEAF:
1181                        /*
1182                         * compute the length of prefix for suffix compression
1183                         * between last entry of left page and first entry
1184                         * of right page
1185                         */
1186                        if ((sp->header.flag & BT_ROOT && skip > 1) ||
1187                            sp->header.prev != 0 || skip > 1) {
1188                                /* compute uppercase router prefix key */
1189                                rc = ciGetLeafPrefixKey(lp,
1190                                                        lp->header.nextindex-1,
1191                                                        rp, 0, &key,
1192                                                        sbi->mntflag);
1193                                if (rc) {
1194                                        DT_PUTPAGE(lmp);
1195                                        DT_PUTPAGE(rmp);
1196                                        DT_PUTPAGE(smp);
1197                                        goto splitOut;
1198                                }
1199                        } else {
1200                                /* next to leftmost entry of
1201                                   lowest internal level */
1202
1203                                /* compute uppercase router key */
1204                                dtGetKey(rp, 0, &key, sbi->mntflag);
1205                                key.name[key.namlen] = 0;
1206
1207                                if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1208                                        ciToUpper(&key);
1209                        }
1210
1211                        n = NDTINTERNAL(key.namlen);
1212                        break;
1213
1214                case BT_INTERNAL:
1215                        dtGetKey(rp, 0, &key, sbi->mntflag);
1216                        n = NDTINTERNAL(key.namlen);
1217                        break;
1218
1219                default:
1220                        jfs_err("dtSplitUp(): UFO!");
1221                        break;
1222                }
1223
1224                /* unpin left child page */
1225                DT_PUTPAGE(lmp);
1226
1227                /*
1228                 * compute the data for the router entry
1229                 */
1230                data->xd = rpxd;        /* child page xd */
1231
1232                /*
1233                 * parent page is full - split the parent page
1234                 */
1235                if (n > sp->header.freecnt) {
1236                        /* init for parent page split */
1237                        split->mp = smp;
1238                        split->index = skip;    /* index at insert */
1239                        split->nslot = n;
1240                        split->key = &key;
1241                        /* split->data = data; */
1242
1243                        /* unpin right child page */
1244                        DT_PUTPAGE(rmp);
1245
1246                        /* The split routines insert the new entry,
1247                         * acquire txLock as appropriate.
1248                         * return <rp> pinned and its block number <rbn>.
1249                         */
1250                        rc = (sp->header.flag & BT_ROOT) ?
1251                            dtSplitRoot(tid, ip, split, &rmp) :
1252                            dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1253                        if (rc) {
1254                                DT_PUTPAGE(smp);
1255                                goto splitOut;
1256                        }
1257
1258                        /* smp and rmp are pinned */
1259                }
1260                /*
1261                 * parent page is not full - insert router entry in parent page
1262                 */
1263                else {
1264                        BT_MARK_DIRTY(smp, ip);
1265                        /*
1266                         * acquire a transaction lock on the parent page
1267                         */
1268                        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1269                        dtlck = (struct dt_lock *) & tlck->lock;
1270                        ASSERT(dtlck->index == 0);
1271                        lv = & dtlck->lv[0];
1272
1273                        /* linelock header */
1274                        lv->offset = 0;
1275                        lv->length = 1;
1276                        dtlck->index++;
1277
1278                        /* linelock stbl of non-root parent page */
1279                        if (!(sp->header.flag & BT_ROOT)) {
1280                                lv++;
1281                                n = skip >> L2DTSLOTSIZE;
1282                                lv->offset = sp->header.stblindex + n;
1283                                lv->length =
1284                                    ((sp->header.nextindex -
1285                                      1) >> L2DTSLOTSIZE) - n + 1;
1286                                dtlck->index++;
1287                        }
1288
1289                        dtInsertEntry(sp, skip, &key, data, &dtlck);
1290
1291                        /* exit propagate up */
1292                        break;
1293                }
1294        }
1295
1296        /* unpin current split and its right page */
1297        DT_PUTPAGE(smp);
1298        DT_PUTPAGE(rmp);
1299
1300        /*
1301         * free remaining extents allocated for split
1302         */
1303      splitOut:
1304        n = pxdlist.npxd;
1305        pxd = &pxdlist.pxd[n];
1306        for (; n < pxdlist.maxnpxd; n++, pxd++)
1307                dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1308
1309      freeKeyName:
1310        kfree(key.name);
1311
1312        /* Rollback quota allocation */
1313        if (rc && quota_allocation)
1314                DQUOT_FREE_BLOCK(ip, quota_allocation);
1315
1316      dtSplitUp_Exit:
1317
1318        return rc;
1319}
1320
1321
1322/*
1323 *      dtSplitPage()
1324 *
1325 * function: Split a non-root page of a btree.
1326 *
1327 * parameter:
1328 *
1329 * return: 0 - success;
1330 *         errno - failure;
1331 *      return split and new page pinned;
1332 */
1333static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1334            struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1335{
1336        int rc = 0;
1337        struct metapage *smp;
1338        dtpage_t *sp;
1339        struct metapage *rmp;
1340        dtpage_t *rp;           /* new right page allocated */
1341        s64 rbn;                /* new right page block number */
1342        struct metapage *mp;
1343        dtpage_t *p;
1344        s64 nextbn;
1345        struct pxdlist *pxdlist;
1346        pxd_t *pxd;
1347        int skip, nextindex, half, left, nxt, off, si;
1348        struct ldtentry *ldtentry;
1349        struct idtentry *idtentry;
1350        u8 *stbl;
1351        struct dtslot *f;
1352        int fsi, stblsize;
1353        int n;
1354        struct dt_lock *sdtlck, *rdtlck;
1355        struct tlock *tlck;
1356        struct dt_lock *dtlck;
1357        struct lv *slv, *rlv, *lv;
1358
1359        /* get split page */
1360        smp = split->mp;
1361        sp = DT_PAGE(ip, smp);
1362
1363        /*
1364         * allocate the new right page for the split
1365         */
1366        pxdlist = split->pxdlist;
1367        pxd = &pxdlist->pxd[pxdlist->npxd];
1368        pxdlist->npxd++;
1369        rbn = addressPXD(pxd);
1370        rmp = get_metapage(ip, rbn, PSIZE, 1);
1371        if (rmp == NULL)
1372                return -EIO;
1373
1374        /* Allocate blocks to quota. */
1375        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1376                release_metapage(rmp);
1377                return -EDQUOT;
1378        }
1379
1380        jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1381
1382        BT_MARK_DIRTY(rmp, ip);
1383        /*
1384         * acquire a transaction lock on the new right page
1385         */
1386        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1387        rdtlck = (struct dt_lock *) & tlck->lock;
1388
1389        rp = (dtpage_t *) rmp->data;
1390        *rpp = rp;
1391        rp->header.self = *pxd;
1392
1393        BT_MARK_DIRTY(smp, ip);
1394        /*
1395         * acquire a transaction lock on the split page
1396         *
1397         * action:
1398         */
1399        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1400        sdtlck = (struct dt_lock *) & tlck->lock;
1401
1402        /* linelock header of split page */
1403        ASSERT(sdtlck->index == 0);
1404        slv = & sdtlck->lv[0];
1405        slv->offset = 0;
1406        slv->length = 1;
1407        sdtlck->index++;
1408
1409        /*
1410         * initialize/update sibling pointers between sp and rp
1411         */
1412        nextbn = le64_to_cpu(sp->header.next);
1413        rp->header.next = cpu_to_le64(nextbn);
1414        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1415        sp->header.next = cpu_to_le64(rbn);
1416
1417        /*
1418         * initialize new right page
1419         */
1420        rp->header.flag = sp->header.flag;
1421
1422        /* compute sorted entry table at start of extent data area */
1423        rp->header.nextindex = 0;
1424        rp->header.stblindex = 1;
1425
1426        n = PSIZE >> L2DTSLOTSIZE;
1427        rp->header.maxslot = n;
1428        stblsize = (n + 31) >> L2DTSLOTSIZE;    /* in unit of slot */
1429
1430        /* init freelist */
1431        fsi = rp->header.stblindex + stblsize;
1432        rp->header.freelist = fsi;
1433        rp->header.freecnt = rp->header.maxslot - fsi;
1434
1435        /*
1436         *      sequential append at tail: append without split
1437         *
1438         * If splitting the last page on a level because of appending
1439         * a entry to it (skip is maxentry), it's likely that the access is
1440         * sequential. Adding an empty page on the side of the level is less
1441         * work and can push the fill factor much higher than normal.
1442         * If we're wrong it's no big deal, we'll just do the split the right
1443         * way next time.
1444         * (It may look like it's equally easy to do a similar hack for
1445         * reverse sorted data, that is, split the tree left,
1446         * but it's not. Be my guest.)
1447         */
1448        if (nextbn == 0 && split->index == sp->header.nextindex) {
1449                /* linelock header + stbl (first slot) of new page */
1450                rlv = & rdtlck->lv[rdtlck->index];
1451                rlv->offset = 0;
1452                rlv->length = 2;
1453                rdtlck->index++;
1454
1455                /*
1456                 * initialize freelist of new right page
1457                 */
1458                f = &rp->slot[fsi];
1459                for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1460                        f->next = fsi;
1461                f->next = -1;
1462
1463                /* insert entry at the first entry of the new right page */
1464                dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1465
1466                goto out;
1467        }
1468
1469        /*
1470         *      non-sequential insert (at possibly middle page)
1471         */
1472
1473        /*
1474         * update prev pointer of previous right sibling page;
1475         */
1476        if (nextbn != 0) {
1477                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1478                if (rc) {
1479                        discard_metapage(rmp);
1480                        return rc;
1481                }
1482
1483                BT_MARK_DIRTY(mp, ip);
1484                /*
1485                 * acquire a transaction lock on the next page
1486                 */
1487                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1488                jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1489                        tlck, ip, mp);
1490                dtlck = (struct dt_lock *) & tlck->lock;
1491
1492                /* linelock header of previous right sibling page */
1493                lv = & dtlck->lv[dtlck->index];
1494                lv->offset = 0;
1495                lv->length = 1;
1496                dtlck->index++;
1497
1498                p->header.prev = cpu_to_le64(rbn);
1499
1500                DT_PUTPAGE(mp);
1501        }
1502
1503        /*
1504         * split the data between the split and right pages.
1505         */
1506        skip = split->index;
1507        half = (PSIZE >> L2DTSLOTSIZE) >> 1;    /* swag */
1508        left = 0;
1509
1510        /*
1511         *      compute fill factor for split pages
1512         *
1513         * <nxt> traces the next entry to move to rp
1514         * <off> traces the next entry to stay in sp
1515         */
1516        stbl = (u8 *) & sp->slot[sp->header.stblindex];
1517        nextindex = sp->header.nextindex;
1518        for (nxt = off = 0; nxt < nextindex; ++off) {
1519                if (off == skip)
1520                        /* check for fill factor with new entry size */
1521                        n = split->nslot;
1522                else {
1523                        si = stbl[nxt];
1524                        switch (sp->header.flag & BT_TYPE) {
1525                        case BT_LEAF:
1526                                ldtentry = (struct ldtentry *) & sp->slot[si];
1527                                if (DO_INDEX(ip))
1528                                        n = NDTLEAF(ldtentry->namlen);
1529                                else
1530                                        n = NDTLEAF_LEGACY(ldtentry->
1531                                                           namlen);
1532                                break;
1533
1534                        case BT_INTERNAL:
1535                                idtentry = (struct idtentry *) & sp->slot[si];
1536                                n = NDTINTERNAL(idtentry->namlen);
1537                                break;
1538
1539                        default:
1540                                break;
1541                        }
1542
1543                        ++nxt;  /* advance to next entry to move in sp */
1544                }
1545
1546                left += n;
1547                if (left >= half)
1548                        break;
1549        }
1550
1551        /* <nxt> poins to the 1st entry to move */
1552
1553        /*
1554         *      move entries to right page
1555         *
1556         * dtMoveEntry() initializes rp and reserves entry for insertion
1557         *
1558         * split page moved out entries are linelocked;
1559         * new/right page moved in entries are linelocked;
1560         */
1561        /* linelock header + stbl of new right page */
1562        rlv = & rdtlck->lv[rdtlck->index];
1563        rlv->offset = 0;
1564        rlv->length = 5;
1565        rdtlck->index++;
1566
1567        dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1568
1569        sp->header.nextindex = nxt;
1570
1571        /*
1572         * finalize freelist of new right page
1573         */
1574        fsi = rp->header.freelist;
1575        f = &rp->slot[fsi];
1576        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1577                f->next = fsi;
1578        f->next = -1;
1579
1580        /*
1581         * Update directory index table for entries now in right page
1582         */
1583        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1584                s64 lblock;
1585
1586                mp = NULL;
1587                stbl = DT_GETSTBL(rp);
1588                for (n = 0; n < rp->header.nextindex; n++) {
1589                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1590                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1591                                     rbn, n, &mp, &lblock);
1592                }
1593                if (mp)
1594                        release_metapage(mp);
1595        }
1596
1597        /*
1598         * the skipped index was on the left page,
1599         */
1600        if (skip <= off) {
1601                /* insert the new entry in the split page */
1602                dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1603
1604                /* linelock stbl of split page */
1605                if (sdtlck->index >= sdtlck->maxcnt)
1606                        sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1607                slv = & sdtlck->lv[sdtlck->index];
1608                n = skip >> L2DTSLOTSIZE;
1609                slv->offset = sp->header.stblindex + n;
1610                slv->length =
1611                    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1612                sdtlck->index++;
1613        }
1614        /*
1615         * the skipped index was on the right page,
1616         */
1617        else {
1618                /* adjust the skip index to reflect the new position */
1619                skip -= nxt;
1620
1621                /* insert the new entry in the right page */
1622                dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1623        }
1624
1625      out:
1626        *rmpp = rmp;
1627        *rpxdp = *pxd;
1628
1629        return rc;
1630}
1631
1632
1633/*
1634 *      dtExtendPage()
1635 *
1636 * function: extend 1st/only directory leaf page
1637 *
1638 * parameter:
1639 *
1640 * return: 0 - success;
1641 *         errno - failure;
1642 *      return extended page pinned;
1643 */
1644static int dtExtendPage(tid_t tid,
1645             struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1646{
1647        struct super_block *sb = ip->i_sb;
1648        int rc;
1649        struct metapage *smp, *pmp, *mp;
1650        dtpage_t *sp, *pp;
1651        struct pxdlist *pxdlist;
1652        pxd_t *pxd, *tpxd;
1653        int xlen, xsize;
1654        int newstblindex, newstblsize;
1655        int oldstblindex, oldstblsize;
1656        int fsi, last;
1657        struct dtslot *f;
1658        struct btframe *parent;
1659        int n;
1660        struct dt_lock *dtlck;
1661        s64 xaddr, txaddr;
1662        struct tlock *tlck;
1663        struct pxd_lock *pxdlock;
1664        struct lv *lv;
1665        uint type;
1666        struct ldtentry *ldtentry;
1667        u8 *stbl;
1668
1669        /* get page to extend */
1670        smp = split->mp;
1671        sp = DT_PAGE(ip, smp);
1672
1673        /* get parent/root page */
1674        parent = BT_POP(btstack);
1675        DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1676        if (rc)
1677                return (rc);
1678
1679        /*
1680         *      extend the extent
1681         */
1682        pxdlist = split->pxdlist;
1683        pxd = &pxdlist->pxd[pxdlist->npxd];
1684        pxdlist->npxd++;
1685
1686        xaddr = addressPXD(pxd);
1687        tpxd = &sp->header.self;
1688        txaddr = addressPXD(tpxd);
1689        /* in-place extension */
1690        if (xaddr == txaddr) {
1691                type = tlckEXTEND;
1692        }
1693        /* relocation */
1694        else {
1695                type = tlckNEW;
1696
1697                /* save moved extent descriptor for later free */
1698                tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1699                pxdlock = (struct pxd_lock *) & tlck->lock;
1700                pxdlock->flag = mlckFREEPXD;
1701                pxdlock->pxd = sp->header.self;
1702                pxdlock->index = 1;
1703
1704                /*
1705                 * Update directory index table to reflect new page address
1706                 */
1707                if (DO_INDEX(ip)) {
1708                        s64 lblock;
1709
1710                        mp = NULL;
1711                        stbl = DT_GETSTBL(sp);
1712                        for (n = 0; n < sp->header.nextindex; n++) {
1713                                ldtentry =
1714                                    (struct ldtentry *) & sp->slot[stbl[n]];
1715                                modify_index(tid, ip,
1716                                             le32_to_cpu(ldtentry->index),
1717                                             xaddr, n, &mp, &lblock);
1718                        }
1719                        if (mp)
1720                                release_metapage(mp);
1721                }
1722        }
1723
1724        /*
1725         *      extend the page
1726         */
1727        sp->header.self = *pxd;
1728
1729        jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1730
1731        BT_MARK_DIRTY(smp, ip);
1732        /*
1733         * acquire a transaction lock on the extended/leaf page
1734         */
1735        tlck = txLock(tid, ip, smp, tlckDTREE | type);
1736        dtlck = (struct dt_lock *) & tlck->lock;
1737        lv = & dtlck->lv[0];
1738
1739        /* update buffer extent descriptor of extended page */
1740        xlen = lengthPXD(pxd);
1741        xsize = xlen << JFS_SBI(sb)->l2bsize;
1742
1743        /*
1744         * copy old stbl to new stbl at start of extended area
1745         */
1746        oldstblindex = sp->header.stblindex;
1747        oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1748        newstblindex = sp->header.maxslot;
1749        n = xsize >> L2DTSLOTSIZE;
1750        newstblsize = (n + 31) >> L2DTSLOTSIZE;
1751        memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1752               sp->header.nextindex);
1753
1754        /*
1755         * in-line extension: linelock old area of extended page
1756         */
1757        if (type == tlckEXTEND) {
1758                /* linelock header */
1759                lv->offset = 0;
1760                lv->length = 1;
1761                dtlck->index++;
1762                lv++;
1763
1764                /* linelock new stbl of extended page */
1765                lv->offset = newstblindex;
1766                lv->length = newstblsize;
1767        }
1768        /*
1769         * relocation: linelock whole relocated area
1770         */
1771        else {
1772                lv->offset = 0;
1773                lv->length = sp->header.maxslot + newstblsize;
1774        }
1775
1776        dtlck->index++;
1777
1778        sp->header.maxslot = n;
1779        sp->header.stblindex = newstblindex;
1780        /* sp->header.nextindex remains the same */
1781
1782        /*
1783         * add old stbl region at head of freelist
1784         */
1785        fsi = oldstblindex;
1786        f = &sp->slot[fsi];
1787        last = sp->header.freelist;
1788        for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1789                f->next = last;
1790                last = fsi;
1791        }
1792        sp->header.freelist = last;
1793        sp->header.freecnt += oldstblsize;
1794
1795        /*
1796         * append free region of newly extended area at tail of freelist
1797         */
1798        /* init free region of newly extended area */
1799        fsi = n = newstblindex + newstblsize;
1800        f = &sp->slot[fsi];
1801        for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1802                f->next = fsi;
1803        f->next = -1;
1804
1805        /* append new free region at tail of old freelist */
1806        fsi = sp->header.freelist;
1807        if (fsi == -1)
1808                sp->header.freelist = n;
1809        else {
1810                do {
1811                        f = &sp->slot[fsi];
1812                        fsi = f->next;
1813                } while (fsi != -1);
1814
1815                f->next = n;
1816        }
1817
1818        sp->header.freecnt += sp->header.maxslot - n;
1819
1820        /*
1821         * insert the new entry
1822         */
1823        dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1824
1825        BT_MARK_DIRTY(pmp, ip);
1826        /*
1827         * linelock any freeslots residing in old extent
1828         */
1829        if (type == tlckEXTEND) {
1830                n = sp->header.maxslot >> 2;
1831                if (sp->header.freelist < n)
1832                        dtLinelockFreelist(sp, n, &dtlck);
1833        }
1834
1835        /*
1836         *      update parent entry on the parent/root page
1837         */
1838        /*
1839         * acquire a transaction lock on the parent/root page
1840         */
1841        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1842        dtlck = (struct dt_lock *) & tlck->lock;
1843        lv = & dtlck->lv[dtlck->index];
1844
1845        /* linelock parent entry - 1st slot */
1846        lv->offset = 1;
1847        lv->length = 1;
1848        dtlck->index++;
1849
1850        /* update the parent pxd for page extension */
1851        tpxd = (pxd_t *) & pp->slot[1];
1852        *tpxd = *pxd;
1853
1854        DT_PUTPAGE(pmp);
1855        return 0;
1856}
1857
1858
1859/*
1860 *      dtSplitRoot()
1861 *
1862 * function:
1863 *      split the full root page into
1864 *      original/root/split page and new right page
1865 *      i.e., root remains fixed in tree anchor (inode) and
1866 *      the root is copied to a single new right child page
1867 *      since root page << non-root page, and
1868 *      the split root page contains a single entry for the
1869 *      new right child page.
1870 *
1871 * parameter:
1872 *
1873 * return: 0 - success;
1874 *         errno - failure;
1875 *      return new page pinned;
1876 */
1877static int dtSplitRoot(tid_t tid,
1878            struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1879{
1880        struct super_block *sb = ip->i_sb;
1881        struct metapage *smp;
1882        dtroot_t *sp;
1883        struct metapage *rmp;
1884        dtpage_t *rp;
1885        s64 rbn;
1886        int xlen;
1887        int xsize;
1888        struct dtslot *f;
1889        s8 *stbl;
1890        int fsi, stblsize, n;
1891        struct idtentry *s;
1892        pxd_t *ppxd;
1893        struct pxdlist *pxdlist;
1894        pxd_t *pxd;
1895        struct dt_lock *dtlck;
1896        struct tlock *tlck;
1897        struct lv *lv;
1898
1899        /* get split root page */
1900        smp = split->mp;
1901        sp = &JFS_IP(ip)->i_dtroot;
1902
1903        /*
1904         *      allocate/initialize a single (right) child page
1905         *
1906         * N.B. at first split, a one (or two) block to fit new entry
1907         * is allocated; at subsequent split, a full page is allocated;
1908         */
1909        pxdlist = split->pxdlist;
1910        pxd = &pxdlist->pxd[pxdlist->npxd];
1911        pxdlist->npxd++;
1912        rbn = addressPXD(pxd);
1913        xlen = lengthPXD(pxd);
1914        xsize = xlen << JFS_SBI(sb)->l2bsize;
1915        rmp = get_metapage(ip, rbn, xsize, 1);
1916        if (!rmp)
1917                return -EIO;
1918
1919        rp = rmp->data;
1920
1921        /* Allocate blocks to quota. */
1922        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1923                release_metapage(rmp);
1924                return -EDQUOT;
1925        }
1926
1927        BT_MARK_DIRTY(rmp, ip);
1928        /*
1929         * acquire a transaction lock on the new right page
1930         */
1931        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1932        dtlck = (struct dt_lock *) & tlck->lock;
1933
1934        rp->header.flag =
1935            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1936        rp->header.self = *pxd;
1937
1938        /* initialize sibling pointers */
1939        rp->header.next = 0;
1940        rp->header.prev = 0;
1941
1942        /*
1943         *      move in-line root page into new right page extent
1944         */
1945        /* linelock header + copied entries + new stbl (1st slot) in new page */
1946        ASSERT(dtlck->index == 0);
1947        lv = & dtlck->lv[0];
1948        lv->offset = 0;
1949        lv->length = 10;        /* 1 + 8 + 1 */
1950        dtlck->index++;
1951
1952        n = xsize >> L2DTSLOTSIZE;
1953        rp->header.maxslot = n;
1954        stblsize = (n + 31) >> L2DTSLOTSIZE;
1955
1956        /* copy old stbl to new stbl at start of extended area */
1957        rp->header.stblindex = DTROOTMAXSLOT;
1958        stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1959        memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1960        rp->header.nextindex = sp->header.nextindex;
1961
1962        /* copy old data area to start of new data area */
1963        memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1964
1965        /*
1966         * append free region of newly extended area at tail of freelist
1967         */
1968        /* init free region of newly extended area */
1969        fsi = n = DTROOTMAXSLOT + stblsize;
1970        f = &rp->slot[fsi];
1971        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1972                f->next = fsi;
1973        f->next = -1;
1974
1975        /* append new free region at tail of old freelist */
1976        fsi = sp->header.freelist;
1977        if (fsi == -1)
1978                rp->header.freelist = n;
1979        else {
1980                rp->header.freelist = fsi;
1981
1982                do {
1983                        f = &rp->slot[fsi];
1984                        fsi = f->next;
1985                } while (fsi != -1);
1986
1987                f->next = n;
1988        }
1989
1990        rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1991
1992        /*
1993         * Update directory index table for entries now in right page
1994         */
1995        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1996                s64 lblock;
1997                struct metapage *mp = NULL;
1998                struct ldtentry *ldtentry;
1999
2000                stbl = DT_GETSTBL(rp);
2001                for (n = 0; n < rp->header.nextindex; n++) {
2002                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
2003                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
2004                                     rbn, n, &mp, &lblock);
2005                }
2006                if (mp)
2007                        release_metapage(mp);
2008        }
2009        /*
2010         * insert the new entry into the new right/child page
2011         * (skip index in the new right page will not change)
2012         */
2013        dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2014
2015        /*
2016         *      reset parent/root page
2017         *
2018         * set the 1st entry offset to 0, which force the left-most key
2019         * at any level of the tree to be less than any search key.
2020         *
2021         * The btree comparison code guarantees that the left-most key on any
2022         * level of the tree is never used, so it doesn't need to be filled in.
2023         */
2024        BT_MARK_DIRTY(smp, ip);
2025        /*
2026         * acquire a transaction lock on the root page (in-memory inode)
2027         */
2028        tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
2029        dtlck = (struct dt_lock *) & tlck->lock;
2030
2031        /* linelock root */
2032        ASSERT(dtlck->index == 0);
2033        lv = & dtlck->lv[0];
2034        lv->offset = 0;
2035        lv->length = DTROOTMAXSLOT;
2036        dtlck->index++;
2037
2038        /* update page header of root */
2039        if (sp->header.flag & BT_LEAF) {
2040                sp->header.flag &= ~BT_LEAF;
2041                sp->header.flag |= BT_INTERNAL;
2042        }
2043
2044        /* init the first entry */
2045        s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2046        ppxd = (pxd_t *) s;
2047        *ppxd = *pxd;
2048        s->next = -1;
2049        s->namlen = 0;
2050
2051        stbl = sp->header.stbl;
2052        stbl[0] = DTENTRYSTART;
2053        sp->header.nextindex = 1;
2054
2055        /* init freelist */
2056        fsi = DTENTRYSTART + 1;
2057        f = &sp->slot[fsi];
2058
2059        /* init free region of remaining area */
2060        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2061                f->next = fsi;
2062        f->next = -1;
2063
2064        sp->header.freelist = DTENTRYSTART + 1;
2065        sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2066
2067        *rmpp = rmp;
2068
2069        return 0;
2070}
2071
2072
2073/*
2074 *      dtDelete()
2075 *
2076 * function: delete the entry(s) referenced by a key.
2077 *
2078 * parameter:
2079 *
2080 * return:
2081 */
2082int dtDelete(tid_t tid,
2083         struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2084{
2085        int rc = 0;
2086        s64 bn;
2087        struct metapage *mp, *imp;
2088        dtpage_t *p;
2089        int index;
2090        struct btstack btstack;
2091        struct dt_lock *dtlck;
2092        struct tlock *tlck;
2093        struct lv *lv;
2094        int i;
2095        struct ldtentry *ldtentry;
2096        u8 *stbl;
2097        u32 table_index, next_index;
2098        struct metapage *nmp;
2099        dtpage_t *np;
2100
2101        /*
2102         *      search for the entry to delete:
2103         *
2104         * dtSearch() returns (leaf page pinned, index at which to delete).
2105         */
2106        if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2107                return rc;
2108
2109        /* retrieve search result */
2110        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2111
2112        /*
2113         * We need to find put the index of the next entry into the
2114         * directory index table in order to resume a readdir from this
2115         * entry.
2116         */
2117        if (DO_INDEX(ip)) {
2118                stbl = DT_GETSTBL(p);
2119                ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2120                table_index = le32_to_cpu(ldtentry->index);
2121                if (index == (p->header.nextindex - 1)) {
2122                        /*
2123                         * Last entry in this leaf page
2124                         */
2125                        if ((p->header.flag & BT_ROOT)
2126                            || (p->header.next == 0))
2127                                next_index = -1;
2128                        else {
2129                                /* Read next leaf page */
2130                                DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2131                                           nmp, PSIZE, np, rc);
2132                                if (rc)
2133                                        next_index = -1;
2134                                else {
2135                                        stbl = DT_GETSTBL(np);
2136                                        ldtentry =
2137                                            (struct ldtentry *) & np->
2138                                            slot[stbl[0]];
2139                                        next_index =
2140                                            le32_to_cpu(ldtentry->index);
2141                                        DT_PUTPAGE(nmp);
2142                                }
2143                        }
2144                } else {
2145                        ldtentry =
2146                            (struct ldtentry *) & p->slot[stbl[index + 1]];
2147                        next_index = le32_to_cpu(ldtentry->index);
2148                }
2149                free_index(tid, ip, table_index, next_index);
2150        }
2151        /*
2152         * the leaf page becomes empty, delete the page
2153         */
2154        if (p->header.nextindex == 1) {
2155                /* delete empty page */
2156                rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2157        }
2158        /*
2159         * the leaf page has other entries remaining:
2160         *
2161         * delete the entry from the leaf page.
2162         */
2163        else {
2164                BT_MARK_DIRTY(mp, ip);
2165                /*
2166                 * acquire a transaction lock on the leaf page
2167                 */
2168                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2169                dtlck = (struct dt_lock *) & tlck->lock;
2170
2171                /*
2172                 * Do not assume that dtlck->index will be zero.  During a
2173                 * rename within a directory, this transaction may have
2174                 * modified this page already when adding the new entry.
2175                 */
2176
2177                /* linelock header */
2178                if (dtlck->index >= dtlck->maxcnt)
2179                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2180                lv = & dtlck->lv[dtlck->index];
2181                lv->offset = 0;
2182                lv->length = 1;
2183                dtlck->index++;
2184
2185                /* linelock stbl of non-root leaf page */
2186                if (!(p->header.flag & BT_ROOT)) {
2187                        if (dtlck->index >= dtlck->maxcnt)
2188                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2189                        lv = & dtlck->lv[dtlck->index];
2190                        i = index >> L2DTSLOTSIZE;
2191                        lv->offset = p->header.stblindex + i;
2192                        lv->length =
2193                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2194                            i + 1;
2195                        dtlck->index++;
2196                }
2197
2198                /* free the leaf entry */
2199                dtDeleteEntry(p, index, &dtlck);
2200
2201                /*
2202                 * Update directory index table for entries moved in stbl
2203                 */
2204                if (DO_INDEX(ip) && index < p->header.nextindex) {
2205                        s64 lblock;
2206
2207                        imp = NULL;
2208                        stbl = DT_GETSTBL(p);
2209                        for (i = index; i < p->header.nextindex; i++) {
2210                                ldtentry =
2211                                    (struct ldtentry *) & p->slot[stbl[i]];
2212                                modify_index(tid, ip,
2213                                             le32_to_cpu(ldtentry->index),
2214                                             bn, i, &imp, &lblock);
2215                        }
2216                        if (imp)
2217                                release_metapage(imp);
2218                }
2219
2220                DT_PUTPAGE(mp);
2221        }
2222
2223        return rc;
2224}
2225
2226
2227/*
2228 *      dtDeleteUp()
2229 *
2230 * function:
2231 *      free empty pages as propagating deletion up the tree
2232 *
2233 * parameter:
2234 *
2235 * return:
2236 */
2237static int dtDeleteUp(tid_t tid, struct inode *ip,
2238           struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2239{
2240        int rc = 0;
2241        struct metapage *mp;
2242        dtpage_t *p;
2243        int index, nextindex;
2244        int xlen;
2245        struct btframe *parent;
2246        struct dt_lock *dtlck;
2247        struct tlock *tlck;
2248        struct lv *lv;
2249        struct pxd_lock *pxdlock;
2250        int i;
2251
2252        /*
2253         *      keep the root leaf page which has become empty
2254         */
2255        if (BT_IS_ROOT(fmp)) {
2256                /*
2257                 * reset the root
2258                 *
2259                 * dtInitRoot() acquires txlock on the root
2260                 */
2261                dtInitRoot(tid, ip, PARENT(ip));
2262
2263                DT_PUTPAGE(fmp);
2264
2265                return 0;
2266        }
2267
2268        /*
2269         *      free the non-root leaf page
2270         */
2271        /*
2272         * acquire a transaction lock on the page
2273         *
2274         * write FREEXTENT|NOREDOPAGE log record
2275         * N.B. linelock is overlaid as freed extent descriptor, and
2276         * the buffer page is freed;
2277         */
2278        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2279        pxdlock = (struct pxd_lock *) & tlck->lock;
2280        pxdlock->flag = mlckFREEPXD;
2281        pxdlock->pxd = fp->header.self;
2282        pxdlock->index = 1;
2283
2284        /* update sibling pointers */
2285        if ((rc = dtRelink(tid, ip, fp))) {
2286                BT_PUTPAGE(fmp);
2287                return rc;
2288        }
2289
2290        xlen = lengthPXD(&fp->header.self);
2291
2292        /* Free quota allocation. */
2293        DQUOT_FREE_BLOCK(ip, xlen);
2294
2295        /* free/invalidate its buffer page */
2296        discard_metapage(fmp);
2297
2298        /*
2299         *      propagate page deletion up the directory tree
2300         *
2301         * If the delete from the parent page makes it empty,
2302         * continue all the way up the tree.
2303         * stop if the root page is reached (which is never deleted) or
2304         * if the entry deletion does not empty the page.
2305         */
2306        while ((parent = BT_POP(btstack)) != NULL) {
2307                /* pin the parent page <sp> */
2308                DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2309                if (rc)
2310                        return rc;
2311
2312                /*
2313                 * free the extent of the child page deleted
2314                 */
2315                index = parent->index;
2316
2317                /*
2318                 * delete the entry for the child page from parent
2319                 */
2320                nextindex = p->header.nextindex;
2321
2322                /*
2323                 * the parent has the single entry being deleted:
2324                 *
2325                 * free the parent page which has become empty.
2326                 */
2327                if (nextindex == 1) {
2328                        /*
2329                         * keep the root internal page which has become empty
2330                         */
2331                        if (p->header.flag & BT_ROOT) {
2332                                /*
2333                                 * reset the root
2334                                 *
2335                                 * dtInitRoot() acquires txlock on the root
2336                                 */
2337                                dtInitRoot(tid, ip, PARENT(ip));
2338
2339                                DT_PUTPAGE(mp);
2340
2341                                return 0;
2342                        }
2343                        /*
2344                         * free the parent page
2345                         */
2346                        else {
2347                                /*
2348                                 * acquire a transaction lock on the page
2349                                 *
2350                                 * write FREEXTENT|NOREDOPAGE log record
2351                                 */
2352                                tlck =
2353                                    txMaplock(tid, ip,
2354                                              tlckDTREE | tlckFREE);
2355                                pxdlock = (struct pxd_lock *) & tlck->lock;
2356                                pxdlock->flag = mlckFREEPXD;
2357                                pxdlock->pxd = p->header.self;
2358                                pxdlock->index = 1;
2359
2360                                /* update sibling pointers */
2361                                if ((rc = dtRelink(tid, ip, p))) {
2362                                        DT_PUTPAGE(mp);
2363                                        return rc;
2364                                }
2365
2366                                xlen = lengthPXD(&p->header.self);
2367
2368                                /* Free quota allocation */
2369                                DQUOT_FREE_BLOCK(ip, xlen);
2370
2371                                /* free/invalidate its buffer page */
2372                                discard_metapage(mp);
2373
2374                                /* propagate up */
2375                                continue;
2376                        }
2377                }
2378
2379                /*
2380                 * the parent has other entries remaining:
2381                 *
2382                 * delete the router entry from the parent page.
2383                 */
2384                BT_MARK_DIRTY(mp, ip);
2385                /*
2386                 * acquire a transaction lock on the page
2387                 *
2388                 * action: router entry deletion
2389                 */
2390                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2391                dtlck = (struct dt_lock *) & tlck->lock;
2392
2393                /* linelock header */
2394                if (dtlck->index >= dtlck->maxcnt)
2395                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2396                lv = & dtlck->lv[dtlck->index];
2397                lv->offset = 0;
2398                lv->length = 1;
2399                dtlck->index++;
2400
2401                /* linelock stbl of non-root leaf page */
2402                if (!(p->header.flag & BT_ROOT)) {
2403                        if (dtlck->index < dtlck->maxcnt)
2404                                lv++;
2405                        else {
2406                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2407                                lv = & dtlck->lv[0];
2408                        }
2409                        i = index >> L2DTSLOTSIZE;
2410                        lv->offset = p->header.stblindex + i;
2411                        lv->length =
2412                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2413                            i + 1;
2414                        dtlck->index++;
2415                }
2416
2417                /* free the router entry */
2418                dtDeleteEntry(p, index, &dtlck);
2419
2420                /* reset key of new leftmost entry of level (for consistency) */
2421                if (index == 0 &&
2422                    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2423                        dtTruncateEntry(p, 0, &dtlck);
2424
2425                /* unpin the parent page */
2426                DT_PUTPAGE(mp);
2427
2428                /* exit propagation up */
2429                break;
2430        }
2431
2432        if (!DO_INDEX(ip))
2433                ip->i_size -= PSIZE;
2434
2435        return 0;
2436}
2437
2438#ifdef _NOTYET
2439/*
2440 * NAME:        dtRelocate()
2441 *
2442 * FUNCTION:    relocate dtpage (internal or leaf) of directory;
2443 *              This function is mainly used by defragfs utility.
2444 */
2445int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2446               s64 nxaddr)
2447{
2448        int rc = 0;
2449        struct metapage *mp, *pmp, *lmp, *rmp;
2450        dtpage_t *p, *pp, *rp = 0, *lp= 0;
2451        s64 bn;
2452        int index;
2453        struct btstack btstack;
2454        pxd_t *pxd;
2455        s64 oxaddr, nextbn, prevbn;
2456        int xlen, xsize;
2457        struct tlock *tlck;
2458        struct dt_lock *dtlck;
2459        struct pxd_lock *pxdlock;
2460        s8 *stbl;
2461        struct lv *lv;
2462
2463        oxaddr = addressPXD(opxd);
2464        xlen = lengthPXD(opxd);
2465
2466        jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
2467                   (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
2468                   xlen);
2469
2470        /*
2471         *      1. get the internal parent dtpage covering
2472         *      router entry for the tartget page to be relocated;
2473         */
2474        rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2475        if (rc)
2476                return rc;
2477
2478        /* retrieve search result */
2479        DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2480        jfs_info("dtRelocate: parent router entry validated.");
2481
2482        /*
2483         *      2. relocate the target dtpage
2484         */
2485        /* read in the target page from src extent */
2486        DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2487        if (rc) {
2488                /* release the pinned parent page */
2489                DT_PUTPAGE(pmp);
2490                return rc;
2491        }
2492
2493        /*
2494         * read in sibling pages if any to update sibling pointers;
2495         */
2496        rmp = NULL;
2497        if (p->header.next) {
2498                nextbn = le64_to_cpu(p->header.next);
2499                DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2500                if (rc) {
2501                        DT_PUTPAGE(mp);
2502                        DT_PUTPAGE(pmp);
2503                        return (rc);
2504                }
2505        }
2506
2507        lmp = NULL;
2508        if (p->header.prev) {
2509                prevbn = le64_to_cpu(p->header.prev);
2510                DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2511                if (rc) {
2512                        DT_PUTPAGE(mp);
2513                        DT_PUTPAGE(pmp);
2514                        if (rmp)
2515                                DT_PUTPAGE(rmp);
2516                        return (rc);
2517                }
2518        }
2519
2520        /* at this point, all xtpages to be updated are in memory */
2521
2522        /*
2523         * update sibling pointers of sibling dtpages if any;
2524         */
2525        if (lmp) {
2526                tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
2527                dtlck = (struct dt_lock *) & tlck->lock;
2528                /* linelock header */
2529                ASSERT(dtlck->index == 0);
2530                lv = & dtlck->lv[0];
2531                lv->offset = 0;
2532                lv->length = 1;
2533                dtlck->index++;
2534
2535                lp->header.next = cpu_to_le64(nxaddr);
2536                DT_PUTPAGE(lmp);
2537        }
2538
2539        if (rmp) {
2540                tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
2541                dtlck = (struct dt_lock *) & tlck->lock;
2542                /* linelock header */
2543                ASSERT(dtlck->index == 0);
2544                lv = & dtlck->lv[0];
2545                lv->offset = 0;
2546                lv->length = 1;
2547                dtlck->index++;
2548
2549                rp->header.prev = cpu_to_le64(nxaddr);
2550                DT_PUTPAGE(rmp);
2551        }
2552
2553        /*
2554         * update the target dtpage to be relocated
2555         *
2556         * write LOG_REDOPAGE of LOG_NEW type for dst page
2557         * for the whole target page (logredo() will apply
2558         * after image and update bmap for allocation of the
2559         * dst extent), and update bmap for allocation of
2560         * the dst extent;
2561         */
2562        tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
2563        dtlck = (struct dt_lock *) & tlck->lock;
2564        /* linelock header */
2565        ASSERT(dtlck->index == 0);
2566        lv = & dtlck->lv[0];
2567
2568        /* update the self address in the dtpage header */
2569        pxd = &p->header.self;
2570        PXDaddress(pxd, nxaddr);
2571
2572        /* the dst page is the same as the src page, i.e.,
2573         * linelock for afterimage of the whole page;
2574         */
2575        lv->offset = 0;
2576        lv->length = p->header.maxslot;
2577        dtlck->index++;
2578
2579        /* update the buffer extent descriptor of the dtpage */
2580        xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2581
2582        /* unpin the relocated page */
2583        DT_PUTPAGE(mp);
2584        jfs_info("dtRelocate: target dtpage relocated.");
2585
2586        /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
2587         * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
2588         * will also force a bmap update ).
2589         */
2590
2591        /*
2592         *      3. acquire maplock for the source extent to be freed;
2593         */
2594        /* for dtpage relocation, write a LOG_NOREDOPAGE record
2595         * for the source dtpage (logredo() will init NoRedoPage
2596         * filter and will also update bmap for free of the source
2597         * dtpage), and upadte bmap for free of the source dtpage;
2598         */
2599        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2600        pxdlock = (struct pxd_lock *) & tlck->lock;
2601        pxdlock->flag = mlckFREEPXD;
2602        PXDaddress(&pxdlock->pxd, oxaddr);
2603        PXDlength(&pxdlock->pxd, xlen);
2604        pxdlock->index = 1;
2605
2606        /*
2607         *      4. update the parent router entry for relocation;
2608         *
2609         * acquire tlck for the parent entry covering the target dtpage;
2610         * write LOG_REDOPAGE to apply after image only;
2611         */
2612        jfs_info("dtRelocate: update parent router entry.");
2613        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
2614        dtlck = (struct dt_lock *) & tlck->lock;
2615        lv = & dtlck->lv[dtlck->index];
2616
2617        /* update the PXD with the new address */
2618        stbl = DT_GETSTBL(pp);
2619        pxd = (pxd_t *) & pp->slot[stbl[index]];
2620        PXDaddress(pxd, nxaddr);
2621        lv->offset = stbl[index];
2622        lv->length = 1;
2623        dtlck->index++;
2624
2625        /* unpin the parent dtpage */
2626        DT_PUTPAGE(pmp);
2627
2628        return rc;
2629}
2630
2631/*
2632 * NAME:        dtSearchNode()
2633 *
2634 * FUNCTION:    Search for an dtpage containing a specified address
2635 *              This function is mainly used by defragfs utility.
2636 *
2637 * NOTE:        Search result on stack, the found page is pinned at exit.
2638 *              The result page must be an internal dtpage.
2639 *              lmxaddr give the address of the left most page of the
2640 *              dtree level, in which the required dtpage resides.
2641 */
2642static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2643                        struct btstack * btstack)
2644{
2645        int rc = 0;
2646        s64 bn;
2647        struct metapage *mp;
2648        dtpage_t *p;
2649        int psize = 288;        /* initial in-line directory */
2650        s8 *stbl;
2651        int i;
2652        pxd_t *pxd;
2653        struct btframe *btsp;
2654
2655        BT_CLR(btstack);        /* reset stack */
2656
2657        /*
2658         *      descend tree to the level with specified leftmost page
2659         *
2660         *  by convention, root bn = 0.
2661         */
2662        for (bn = 0;;) {
2663                /* get/pin the page to search */
2664                DT_GETPAGE(ip, bn, mp, psize, p, rc);
2665                if (rc)
2666                        return rc;
2667
2668                /* does the xaddr of leftmost page of the levevl
2669                 * matches levevl search key ?
2670                 */
2671                if (p->header.flag & BT_ROOT) {
2672                        if (lmxaddr == 0)
2673                                break;
2674                } else if (addressPXD(&p->header.self) == lmxaddr)
2675                        break;
2676
2677                /*
2678                 * descend down to leftmost child page
2679                 */
2680                if (p->header.flag & BT_LEAF) {
2681                        DT_PUTPAGE(mp);
2682                        return -ESTALE;
2683                }
2684
2685                /* get the leftmost entry */
2686                stbl = DT_GETSTBL(p);
2687                pxd = (pxd_t *) & p->slot[stbl[0]];
2688
2689                /* get the child page block address */
2690                bn = addressPXD(pxd);
2691                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
2692                /* unpin the parent page */
2693                DT_PUTPAGE(mp);
2694        }
2695
2696        /*
2697         *      search each page at the current levevl
2698         */
2699      loop:
2700        stbl = DT_GETSTBL(p);
2701        for (i = 0; i < p->header.nextindex; i++) {
2702                pxd = (pxd_t *) & p->slot[stbl[i]];
2703
2704                /* found the specified router entry */
2705                if (addressPXD(pxd) == addressPXD(kpxd) &&
2706                    lengthPXD(pxd) == lengthPXD(kpxd)) {
2707                        btsp = btstack->top;
2708                        btsp->bn = bn;
2709                        btsp->index = i;
2710                        btsp->mp = mp;
2711
2712                        return 0;
2713                }
2714        }
2715
2716        /* get the right sibling page if any */
2717        if (p->header.next)
2718                bn = le64_to_cpu(p->header.next);
2719        else {
2720                DT_PUTPAGE(mp);
2721                return -ESTALE;
2722        }
2723
2724        /* unpin current page */
2725        DT_PUTPAGE(mp);
2726
2727        /* get the right sibling page */
2728        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2729        if (rc)
2730                return rc;
2731
2732        goto loop;
2733}
2734#endif /* _NOTYET */
2735
2736/*
2737 *      dtRelink()
2738 *
2739 * function:
2740 *      link around a freed page.
2741 *
2742 * parameter:
2743 *      fp:     page to be freed
2744 *
2745 * return:
2746 */
2747static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2748{
2749        int rc;
2750        struct metapage *mp;
2751        s64 nextbn, prevbn;
2752        struct tlock *tlck;
2753        struct dt_lock *dtlck;
2754        struct lv *lv;
2755
2756        nextbn = le64_to_cpu(p->header.next);
2757        prevbn = le64_to_cpu(p->header.prev);
2758
2759        /* update prev pointer of the next page */
2760        if (nextbn != 0) {
2761                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2762                if (rc)
2763                        return rc;
2764
2765                BT_MARK_DIRTY(mp, ip);
2766                /*
2767                 * acquire a transaction lock on the next page
2768                 *
2769                 * action: update prev pointer;
2770                 */
2771                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2772                jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2773                        tlck, ip, mp);
2774                dtlck = (struct dt_lock *) & tlck->lock;
2775
2776                /* linelock header */
2777                if (dtlck->index >= dtlck->maxcnt)
2778                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2779                lv = & dtlck->lv[dtlck->index];
2780                lv->offset = 0;
2781                lv->length = 1;
2782                dtlck->index++;
2783
2784                p->header.prev = cpu_to_le64(prevbn);
2785                DT_PUTPAGE(mp);
2786        }
2787
2788        /* update next pointer of the previous page */
2789        if (prevbn != 0) {
2790                DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2791                if (rc)
2792                        return rc;
2793
2794                BT_MARK_DIRTY(mp, ip);
2795                /*
2796                 * acquire a transaction lock on the prev page
2797                 *
2798                 * action: update next pointer;
2799                 */
2800                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2801                jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2802                        tlck, ip, mp);
2803                dtlck = (struct dt_lock *) & tlck->lock;
2804
2805                /* linelock header */
2806                if (dtlck->index >= dtlck->maxcnt)
2807                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2808                lv = & dtlck->lv[dtlck->index];
2809                lv->offset = 0;
2810                lv->length = 1;
2811                dtlck->index++;
2812
2813                p->header.next = cpu_to_le64(nextbn);
2814                DT_PUTPAGE(mp);
2815        }
2816
2817        return 0;
2818}
2819
2820
2821/*
2822 *      dtInitRoot()
2823 *
2824 * initialize directory root (inline in inode)
2825 */
2826void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2827{
2828        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2829        dtroot_t *p;
2830        int fsi;
2831        struct dtslot *f;
2832        struct tlock *tlck;
2833        struct dt_lock *dtlck;
2834        struct lv *lv;
2835        u16 xflag_save;
2836
2837        /*
2838         * If this was previously an non-empty directory, we need to remove
2839         * the old directory table.
2840         */
2841        if (DO_INDEX(ip)) {
2842                if (!jfs_dirtable_inline(ip)) {
2843                        struct tblock *tblk = tid_to_tblock(tid);
2844                        /*
2845                         * We're playing games with the tid's xflag.  If
2846                         * we're removing a regular file, the file's xtree
2847                         * is committed with COMMIT_PMAP, but we always
2848                         * commit the directories xtree with COMMIT_PWMAP.
2849                         */
2850                        xflag_save = tblk->xflag;
2851                        tblk->xflag = 0;
2852                        /*
2853                         * xtTruncate isn't guaranteed to fully truncate
2854                         * the xtree.  The caller needs to check i_size
2855                         * after committing the transaction to see if
2856                         * additional truncation is needed.  The
2857                         * COMMIT_Stale flag tells caller that we
2858                         * initiated the truncation.
2859                         */
2860                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2861                        set_cflag(COMMIT_Stale, ip);
2862
2863                        tblk->xflag = xflag_save;
2864                } else
2865                        ip->i_size = 1;
2866
2867                jfs_ip->next_index = 2;
2868        } else
2869                ip->i_size = IDATASIZE;
2870
2871        /*
2872         * acquire a transaction lock on the root
2873         *
2874         * action: directory initialization;
2875         */
2876        tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2877                      tlckDTREE | tlckENTRY | tlckBTROOT);
2878        dtlck = (struct dt_lock *) & tlck->lock;
2879
2880        /* linelock root */
2881        ASSERT(dtlck->index == 0);
2882        lv = & dtlck->lv[0];
2883        lv->offset = 0;
2884        lv->length = DTROOTMAXSLOT;
2885        dtlck->index++;
2886
2887        p = &jfs_ip->i_dtroot;
2888
2889        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2890
2891        p->header.nextindex = 0;
2892
2893        /* init freelist */
2894        fsi = 1;
2895        f = &p->slot[fsi];
2896
2897        /* init data area of root */
2898        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2899                f->next = fsi;
2900        f->next = -1;
2901
2902        p->header.freelist = 1;
2903        p->header.freecnt = 8;
2904
2905        /* init '..' entry */
2906        p->header.idotdot = cpu_to_le32(idotdot);
2907
2908        return;
2909}
2910
2911/*
2912 *      add_missing_indices()
2913 *
2914 * function: Fix dtree page in which one or more entries has an invalid index.
2915 *           fsck.jfs should really fix this, but it currently does not.
2916 *           Called from jfs_readdir when bad index is detected.
2917 */
2918static void add_missing_indices(struct inode *inode, s64 bn)
2919{
2920        struct ldtentry *d;
2921        struct dt_lock *dtlck;
2922        int i;
2923        uint index;
2924        struct lv *lv;
2925        struct metapage *mp;
2926        dtpage_t *p;
2927        int rc;
2928        s8 *stbl;
2929        tid_t tid;
2930        struct tlock *tlck;
2931
2932        tid = txBegin(inode->i_sb, 0);
2933
2934        DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2935
2936        if (rc) {
2937                printk(KERN_ERR "DT_GETPAGE failed!\n");
2938                goto end;
2939        }
2940        BT_MARK_DIRTY(mp, inode);
2941
2942        ASSERT(p->header.flag & BT_LEAF);
2943
2944        tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2945        if (BT_IS_ROOT(mp))
2946                tlck->type |= tlckBTROOT;
2947
2948        dtlck = (struct dt_lock *) &tlck->lock;
2949
2950        stbl = DT_GETSTBL(p);
2951        for (i = 0; i < p->header.nextindex; i++) {
2952                d = (struct ldtentry *) &p->slot[stbl[i]];
2953                index = le32_to_cpu(d->index);
2954                if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2955                        d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2956                        if (dtlck->index >= dtlck->maxcnt)
2957                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2958                        lv = &dtlck->lv[dtlck->index];
2959                        lv->offset = stbl[i];
2960                        lv->length = 1;
2961                        dtlck->index++;
2962                }
2963        }
2964
2965        DT_PUTPAGE(mp);
2966        (void) txCommit(tid, 1, &inode, 0);
2967end:
2968        txEnd(tid);
2969}
2970
2971/*
2972 * Buffer to hold directory entry info while traversing a dtree page
2973 * before being fed to the filldir function
2974 */
2975struct jfs_dirent {
2976        loff_t position;
2977        int ino;
2978        u16 name_len;
2979        char name[0];
2980};
2981
2982/*
2983 * function to determine next variable-sized jfs_dirent in buffer
2984 */
2985static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2986{
2987        return (struct jfs_dirent *)
2988                ((char *)dirent +
2989                 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2990                   sizeof (loff_t) - 1) &
2991                  ~(sizeof (loff_t) - 1)));
2992}
2993
2994/*
2995 *      jfs_readdir()
2996 *
2997 * function: read directory entries sequentially
2998 *      from the specified entry offset
2999 *
3000 * parameter:
3001 *
3002 * return: offset = (pn, index) of start entry
3003 *      of next jfs_readdir()/dtRead()
3004 */
3005int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3006{
3007        struct inode *ip = filp->f_path.dentry->d_inode;
3008        struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
3009        int rc = 0;
3010        loff_t dtpos;   /* legacy OS/2 style position */
3011        struct dtoffset {
3012                s16 pn;
3013                s16 index;
3014                s32 unused;
3015        } *dtoffset = (struct dtoffset *) &dtpos;
3016        s64 bn;
3017        struct metapage *mp;
3018        dtpage_t *p;
3019        int index;
3020        s8 *stbl;
3021        struct btstack btstack;
3022        int i, next;
3023        struct ldtentry *d;
3024        struct dtslot *t;
3025        int d_namleft, len, outlen;
3026        unsigned long dirent_buf;
3027        char *name_ptr;
3028        u32 dir_index;
3029        int do_index = 0;
3030        uint loop_count = 0;
3031        struct jfs_dirent *jfs_dirent;
3032        int jfs_dirents;
3033        int overflow, fix_page, page_fixed = 0;
3034        static int unique_pos = 2;      /* If we can't fix broken index */
3035
3036        if (filp->f_pos == DIREND)
3037                return 0;
3038
3039        if (DO_INDEX(ip)) {
3040                /*
3041                 * persistent index is stored in directory entries.
3042                 * Special cases:        0 = .
3043                 *                       1 = ..
3044                 *                      -1 = End of directory
3045                 */
3046                do_index = 1;
3047
3048                dir_index = (u32) filp->f_pos;
3049
3050                if (dir_index > 1) {
3051                        struct dir_table_slot dirtab_slot;
3052
3053                        if (dtEmpty(ip) ||
3054                            (dir_index >= JFS_IP(ip)->next_index)) {
3055                                /* Stale position.  Directory has shrunk */
3056                                filp->f_pos = DIREND;
3057                                return 0;
3058                        }
3059                      repeat:
3060                        rc = read_index(ip, dir_index, &dirtab_slot);
3061                        if (rc) {
3062                                filp->f_pos = DIREND;
3063                                return rc;
3064                        }
3065                        if (dirtab_slot.flag == DIR_INDEX_FREE) {
3066                                if (loop_count++ > JFS_IP(ip)->next_index) {
3067                                        jfs_err("jfs_readdir detected "
3068                                                   "infinite loop!");
3069                                        filp->f_pos = DIREND;
3070                                        return 0;
3071                                }
3072                                dir_index = le32_to_cpu(dirtab_slot.addr2);
3073                                if (dir_index == -1) {
3074                                        filp->f_pos = DIREND;
3075                                        return 0;
3076                                }
3077                                goto repeat;
3078                        }
3079                        bn = addressDTS(&dirtab_slot);
3080                        index = dirtab_slot.slot;
3081                        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3082                        if (rc) {
3083                                filp->f_pos = DIREND;
3084                                return 0;
3085                        }
3086                        if (p->header.flag & BT_INTERNAL) {
3087                                jfs_err("jfs_readdir: bad index table");
3088                                DT_PUTPAGE(mp);
3089                                filp->f_pos = -1;
3090                                return 0;
3091                        }
3092                } else {
3093                        if (dir_index == 0) {
3094                                /*
3095                                 * self "."
3096                                 */
3097                                filp->f_pos = 0;
3098                                if (filldir(dirent, ".", 1, 0, ip->i_ino,
3099                                            DT_DIR))
3100                                        return 0;
3101                        }
3102                        /*
3103                         * parent ".."
3104                         */
3105                        filp->f_pos = 1;
3106                        if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR))
3107                                return 0;
3108
3109                        /*
3110                         * Find first entry of left-most leaf
3111                         */
3112                        if (dtEmpty(ip)) {
3113                                filp->f_pos = DIREND;
3114                                return 0;
3115                        }
3116
3117                        if ((rc = dtReadFirst(ip, &btstack)))
3118                                return rc;
3119
3120                        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3121                }
3122        } else {
3123                /*
3124                 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3125                 *
3126                 * pn = index = 0:      First entry "."
3127                 * pn = 0; index = 1:   Second entry ".."
3128                 * pn > 0:              Real entries, pn=1 -> leftmost page
3129                 * pn = index = -1:     No more entries
3130                 */
3131                dtpos = filp->f_pos;
3132                if (dtpos == 0) {
3133                        /* build "." entry */
3134
3135                        if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino,
3136                                    DT_DIR))
3137                                return 0;
3138                        dtoffset->index = 1;
3139                        filp->f_pos = dtpos;
3140                }
3141
3142                if (dtoffset->pn == 0) {
3143                        if (dtoffset->index == 1) {
3144                                /* build ".." entry */
3145
3146                                if (filldir(dirent, "..", 2, filp->f_pos,
3147                                            PARENT(ip), DT_DIR))
3148                                        return 0;
3149                        } else {
3150                                jfs_err("jfs_readdir called with "
3151                                        "invalid offset!");
3152                        }
3153                        dtoffset->pn = 1;
3154                        dtoffset->index = 0;
3155                        filp->f_pos = dtpos;
3156                }
3157
3158                if (dtEmpty(ip)) {
3159                        filp->f_pos = DIREND;
3160                        return 0;
3161                }
3162
3163                if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) {
3164                        jfs_err("jfs_readdir: unexpected rc = %d "
3165                                "from dtReadNext", rc);
3166                        filp->f_pos = DIREND;
3167                        return 0;
3168                }
3169                /* get start leaf page and index */
3170                DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3171
3172                /* offset beyond directory eof ? */
3173                if (bn < 0) {
3174                        filp->f_pos = DIREND;
3175                        return 0;
3176                }
3177        }
3178
3179        dirent_buf = __get_free_page(GFP_KERNEL);
3180        if (dirent_buf == 0) {
3181                DT_PUTPAGE(mp);
3182                jfs_warn("jfs_readdir: __get_free_page failed!");
3183                filp->f_pos = DIREND;
3184                return -ENOMEM;
3185        }
3186
3187        while (1) {
3188                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3189                jfs_dirents = 0;
3190                overflow = fix_page = 0;
3191
3192                stbl = DT_GETSTBL(p);
3193
3194                for (i = index; i < p->header.nextindex; i++) {
3195                        d = (struct ldtentry *) & p->slot[stbl[i]];
3196
3197                        if (((long) jfs_dirent + d->namlen + 1) >
3198                            (dirent_buf + PAGE_SIZE)) {
3199                                /* DBCS codepages could overrun dirent_buf */
3200                                index = i;
3201                                overflow = 1;
3202                                break;
3203                        }
3204
3205                        d_namleft = d->namlen;
3206                        name_ptr = jfs_dirent->name;
3207                        jfs_dirent->ino = le32_to_cpu(d->inumber);
3208
3209                        if (do_index) {
3210                                len = min(d_namleft, DTLHDRDATALEN);
3211                                jfs_dirent->position = le32_to_cpu(d->index);
3212                                /*
3213                                 * d->index should always be valid, but it
3214                                 * isn't.  fsck.jfs doesn't create the
3215                                 * directory index for the lost+found
3216                                 * directory.  Rather than let it go,
3217                                 * we can try to fix it.
3218                                 */
3219                                if ((jfs_dirent->position < 2) ||
3220                                    (jfs_dirent->position >=
3221                                     JFS_IP(ip)->next_index)) {
3222                                        if (!page_fixed && !isReadOnly(ip)) {
3223                                                fix_page = 1;
3224                                                /*
3225                                                 * setting overflow and setting
3226                                                 * index to i will cause the
3227                                                 * same page to be processed
3228                                                 * again starting here
3229                                                 */
3230                                                overflow = 1;
3231                                                index = i;
3232                                                break;
3233                                        }
3234                                        jfs_dirent->position = unique_pos++;
3235                                }
3236                        } else {
3237                                jfs_dirent->position = dtpos;
3238                                len = min(d_namleft, DTLHDRDATALEN_LEGACY);
3239                        }
3240
3241                        /* copy the name of head/only segment */
3242                        outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
3243                                                   codepage);
3244                        jfs_dirent->name_len = outlen;
3245
3246                        /* copy name in the additional segment(s) */
3247                        next = d->next;
3248                        while (next >= 0) {
3249                                t = (struct dtslot *) & p->slot[next];
3250                                name_ptr += outlen;
3251                                d_namleft -= len;
3252                                /* Sanity Check */
3253                                if (d_namleft == 0) {
3254                                        jfs_error(ip->i_sb,
3255                                                  "JFS:Dtree error: ino = "
3256                                                  "%ld, bn=%Ld, index = %d",
3257                                                  (long)ip->i_ino,
3258                                                  (long long)bn,
3259                                                  i);
3260                                        goto skip_one;
3261                                }
3262                                len = min(d_namleft, DTSLOTDATALEN);
3263                                outlen = jfs_strfromUCS_le(name_ptr, t->name,
3264                                                           len, codepage);
3265                                jfs_dirent->name_len += outlen;
3266
3267                                next = t->next;
3268                        }
3269
3270                        jfs_dirents++;
3271                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3272skip_one:
3273                        if (!do_index)
3274                                dtoffset->index++;
3275                }
3276
3277                if (!overflow) {
3278                        /* Point to next leaf page */
3279                        if (p->header.flag & BT_ROOT)
3280                                bn = 0;
3281                        else {
3282                                bn = le64_to_cpu(p->header.next);
3283                                index = 0;
3284                                /* update offset (pn:index) for new page */
3285                                if (!do_index) {
3286                                        dtoffset->pn++;
3287                                        dtoffset->index = 0;
3288                                }
3289                        }
3290                        page_fixed = 0;
3291                }
3292
3293                /* unpin previous leaf page */
3294                DT_PUTPAGE(mp);
3295
3296                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3297                while (jfs_dirents--) {
3298                        filp->f_pos = jfs_dirent->position;
3299                        if (filldir(dirent, jfs_dirent->name,
3300                                    jfs_dirent->name_len, filp->f_pos,
3301                                    jfs_dirent->ino, DT_UNKNOWN))
3302                                goto out;
3303                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3304                }
3305
3306                if (fix_page) {
3307                        add_missing_indices(ip, bn);
3308                        page_fixed = 1;
3309                }
3310
3311                if (!overflow && (bn == 0)) {
3312                        filp->f_pos = DIREND;
3313                        break;
3314                }
3315
3316                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3317                if (rc) {
3318                        free_page(dirent_buf);
3319                        return rc;
3320                }
3321        }
3322
3323      out:
3324        free_page(dirent_buf);
3325
3326        return rc;
3327}
3328
3329
3330/*
3331 *      dtReadFirst()
3332 *
3333 * function: get the leftmost page of the directory
3334 */
3335static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3336{
3337        int rc = 0;
3338        s64 bn;
3339        int psize = 288;        /* initial in-line directory */
3340        struct metapage *mp;
3341        dtpage_t *p;
3342        s8 *stbl;
3343        struct btframe *btsp;
3344        pxd_t *xd;
3345
3346        BT_CLR(btstack);        /* reset stack */
3347
3348        /*
3349         *      descend leftmost path of the tree
3350         *
3351         * by convention, root bn = 0.
3352         */
3353        for (bn = 0;;) {
3354                DT_GETPAGE(ip, bn, mp, psize, p, rc);
3355                if (rc)
3356                        return rc;
3357
3358                /*
3359                 * leftmost leaf page
3360                 */
3361                if (p->header.flag & BT_LEAF) {
3362                        /* return leftmost entry */
3363                        btsp = btstack->top;
3364                        btsp->bn = bn;
3365                        btsp->index = 0;
3366                        btsp->mp = mp;
3367
3368                        return 0;
3369                }
3370
3371                /*
3372                 * descend down to leftmost child page
3373                 */
3374                if (BT_STACK_FULL(btstack)) {
3375                        DT_PUTPAGE(mp);
3376                        jfs_error(ip->i_sb, "dtReadFirst: btstack overrun");
3377                        BT_STACK_DUMP(btstack);
3378                        return -EIO;
3379                }
3380                /* push (bn, index) of the parent page/entry */
3381                BT_PUSH(btstack, bn, 0);
3382
3383                /* get the leftmost entry */
3384                stbl = DT_GETSTBL(p);
3385                xd = (pxd_t *) & p->slot[stbl[0]];
3386
3387                /* get the child page block address */
3388                bn = addressPXD(xd);
3389                psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3390
3391                /* unpin the parent page */
3392                DT_PUTPAGE(mp);
3393        }
3394}
3395
3396
3397/*
3398 *      dtReadNext()
3399 *
3400 * function: get the page of the specified offset (pn:index)
3401 *
3402 * return: if (offset > eof), bn = -1;
3403 *
3404 * note: if index > nextindex of the target leaf page,
3405 * start with 1st entry of next leaf page;
3406 */
3407static int dtReadNext(struct inode *ip, loff_t * offset,
3408                      struct btstack * btstack)
3409{
3410        int rc = 0;
3411        struct dtoffset {
3412                s16 pn;
3413                s16 index;
3414                s32 unused;
3415        } *dtoffset = (struct dtoffset *) offset;
3416        s64 bn;
3417        struct metapage *mp;
3418        dtpage_t *p;
3419        int index;
3420        int pn;
3421        s8 *stbl;
3422        struct btframe *btsp, *parent;
3423        pxd_t *xd;
3424
3425        /*
3426         * get leftmost leaf page pinned
3427         */
3428        if ((rc = dtReadFirst(ip, btstack)))
3429                return rc;
3430
3431        /* get leaf page */
3432        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3433
3434        /* get the start offset (pn:index) */
3435        pn = dtoffset->pn - 1;  /* Now pn = 0 represents leftmost leaf */
3436        index = dtoffset->index;
3437
3438        /* start at leftmost page ? */
3439        if (pn == 0) {
3440                /* offset beyond eof ? */
3441                if (index < p->header.nextindex)
3442                        goto out;
3443
3444                if (p->header.flag & BT_ROOT) {
3445                        bn = -1;
3446                        goto out;
3447                }
3448
3449                /* start with 1st entry of next leaf page */
3450                dtoffset->pn++;
3451                dtoffset->index = index = 0;
3452                goto a;
3453        }
3454
3455        /* start at non-leftmost page: scan parent pages for large pn */
3456        if (p->header.flag & BT_ROOT) {
3457                bn = -1;
3458                goto out;
3459        }
3460
3461        /* start after next leaf page ? */
3462        if (pn > 1)
3463                goto b;
3464
3465        /* get leaf page pn = 1 */
3466      a:
3467        bn = le64_to_cpu(p->header.next);
3468
3469        /* unpin leaf page */
3470        DT_PUTPAGE(mp);
3471
3472        /* offset beyond eof ? */
3473        if (bn == 0) {
3474                bn = -1;
3475                goto out;
3476        }
3477
3478        goto c;
3479
3480        /*
3481         * scan last internal page level to get target leaf page
3482         */
3483      b:
3484        /* unpin leftmost leaf page */
3485        DT_PUTPAGE(mp);
3486
3487        /* get left most parent page */
3488        btsp = btstack->top;
3489        parent = btsp - 1;
3490        bn = parent->bn;
3491        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3492        if (rc)
3493                return rc;
3494
3495        /* scan parent pages at last internal page level */
3496        while (pn >= p->header.nextindex) {
3497                pn -= p->header.nextindex;
3498
3499                /* get next parent page address */
3500                bn = le64_to_cpu(p->header.next);
3501
3502                /* unpin current parent page */
3503                DT_PUTPAGE(mp);
3504
3505                /* offset beyond eof ? */
3506                if (bn == 0) {
3507                        bn = -1;
3508                        goto out;
3509                }
3510
3511                /* get next parent page */
3512                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3513                if (rc)
3514                        return rc;
3515
3516                /* update parent page stack frame */
3517                parent->bn = bn;
3518        }
3519
3520        /* get leaf page address */
3521        stbl = DT_GETSTBL(p);
3522        xd = (pxd_t *) & p->slot[stbl[pn]];
3523        bn = addressPXD(xd);
3524
3525        /* unpin parent page */
3526        DT_PUTPAGE(mp);
3527
3528        /*
3529         * get target leaf page
3530         */
3531      c:
3532        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3533        if (rc)
3534                return rc;
3535
3536        /*
3537         * leaf page has been completed:
3538         * start with 1st entry of next leaf page
3539         */
3540        if (index >= p->header.nextindex) {
3541                bn = le64_to_cpu(p->header.next);
3542
3543                /* unpin leaf page */
3544                DT_PUTPAGE(mp);
3545
3546                /* offset beyond eof ? */
3547                if (bn == 0) {
3548                        bn = -1;
3549                        goto out;
3550                }
3551
3552                /* get next leaf page */
3553                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3554                if (rc)
3555                        return rc;
3556
3557                /* start with 1st entry of next leaf page */
3558                dtoffset->pn++;
3559                dtoffset->index = 0;
3560        }
3561
3562      out:
3563        /* return target leaf page pinned */
3564        btsp = btstack->top;
3565        btsp->bn = bn;
3566        btsp->index = dtoffset->index;
3567        btsp->mp = mp;
3568
3569        return 0;
3570}
3571
3572
3573/*
3574 *      dtCompare()
3575 *
3576 * function: compare search key with an internal entry
3577 *
3578 * return:
3579 *      < 0 if k is < record
3580 *      = 0 if k is = record
3581 *      > 0 if k is > record
3582 */
3583static int dtCompare(struct component_name * key,       /* search key */
3584                     dtpage_t * p,      /* directory page */
3585                     int si)
3586{                               /* entry slot index */
3587        wchar_t *kname;
3588        __le16 *name;
3589        int klen, namlen, len, rc;
3590        struct idtentry *ih;
3591        struct dtslot *t;
3592
3593        /*
3594         * force the left-most key on internal pages, at any level of
3595         * the tree, to be less than any search key.
3596         * this obviates having to update the leftmost key on an internal
3597         * page when the user inserts a new key in the tree smaller than
3598         * anything that has been stored.
3599         *
3600         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3601         * at any internal page at any level of the tree,
3602         * it descends to child of the entry anyway -
3603         * ? make the entry as min size dummy entry)
3604         *
3605         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3606         * return (1);
3607         */
3608
3609        kname = key->name;
3610        klen = key->namlen;
3611
3612        ih = (struct idtentry *) & p->slot[si];
3613        si = ih->next;
3614        name = ih->name;
3615        namlen = ih->namlen;
3616        len = min(namlen, DTIHDRDATALEN);
3617
3618        /* compare with head/only segment */
3619        len = min(klen, len);
3620        if ((rc = UniStrncmp_le(kname, name, len)))
3621                return rc;
3622
3623        klen -= len;
3624        namlen -= len;
3625
3626        /* compare with additional segment(s) */
3627        kname += len;
3628        while (klen > 0 && namlen > 0) {
3629                /* compare with next name segment */
3630                t = (struct dtslot *) & p->slot[si];
3631                len = min(namlen, DTSLOTDATALEN);
3632                len = min(klen, len);
3633                name = t->name;
3634                if ((rc = UniStrncmp_le(kname, name, len)))
3635                        return rc;
3636
3637                klen -= len;
3638                namlen -= len;
3639                kname += len;
3640                si = t->next;
3641        }
3642
3643        return (klen - namlen);
3644}
3645
3646
3647
3648
3649/*
3650 *      ciCompare()
3651 *
3652 * function: compare search key with an (leaf/internal) entry
3653 *
3654 * return:
3655 *      < 0 if k is < record
3656 *      = 0 if k is = record
3657 *      > 0 if k is > record
3658 */
3659static int ciCompare(struct component_name * key,       /* search key */
3660                     dtpage_t * p,      /* directory page */
3661                     int si,    /* entry slot index */
3662                     int flag)
3663{
3664        wchar_t *kname, x;
3665        __le16 *name;
3666        int klen, namlen, len, rc;
3667        struct ldtentry *lh;
3668        struct idtentry *ih;
3669        struct dtslot *t;
3670        int i;
3671
3672        /*
3673         * force the left-most key on internal pages, at any level of
3674         * the tree, to be less than any search key.
3675         * this obviates having to update the leftmost key on an internal
3676         * page when the user inserts a new key in the tree smaller than
3677         * anything that has been stored.
3678         *
3679         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3680         * at any internal page at any level of the tree,
3681         * it descends to child of the entry anyway -
3682         * ? make the entry as min size dummy entry)
3683         *
3684         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3685         * return (1);
3686         */
3687
3688        kname = key->name;
3689        klen = key->namlen;
3690
3691        /*
3692         * leaf page entry
3693         */
3694        if (p->header.flag & BT_LEAF) {
3695                lh = (struct ldtentry *) & p->slot[si];
3696                si = lh->next;
3697                name = lh->name;
3698                namlen = lh->namlen;
3699                if (flag & JFS_DIR_INDEX)
3700                        len = min(namlen, DTLHDRDATALEN);
3701                else
3702                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3703        }
3704        /*
3705         * internal page entry
3706         */
3707        else {
3708                ih = (struct idtentry *) & p->slot[si];
3709                si = ih->next;
3710                name = ih->name;
3711                namlen = ih->namlen;
3712                len = min(namlen, DTIHDRDATALEN);
3713        }
3714
3715        /* compare with head/only segment */
3716        len = min(klen, len);
3717        for (i = 0; i < len; i++, kname++, name++) {
3718                /* only uppercase if case-insensitive support is on */
3719                if ((flag & JFS_OS2) == JFS_OS2)
3720                        x = UniToupper(le16_to_cpu(*name));
3721                else
3722                        x = le16_to_cpu(*name);
3723                if ((rc = *kname - x))
3724                        return rc;
3725        }
3726
3727        klen -= len;
3728        namlen -= len;
3729
3730        /* compare with additional segment(s) */
3731        while (klen > 0 && namlen > 0) {
3732                /* compare with next name segment */
3733                t = (struct dtslot *) & p->slot[si];
3734                len = min(namlen, DTSLOTDATALEN);
3735                len = min(klen, len);
3736                name = t->name;
3737                for (i = 0; i < len; i++, kname++, name++) {
3738                        /* only uppercase if case-insensitive support is on */
3739                        if ((flag & JFS_OS2) == JFS_OS2)
3740                                x = UniToupper(le16_to_cpu(*name));
3741                        else
3742                                x = le16_to_cpu(*name);
3743
3744                        if ((rc = *kname - x))
3745                                return rc;
3746                }
3747
3748                klen -= len;
3749                namlen -= len;
3750                si = t->next;
3751        }
3752
3753        return (klen - namlen);
3754}
3755
3756
3757/*
3758 *      ciGetLeafPrefixKey()
3759 *
3760 * function: compute prefix of suffix compression
3761 *           from two adjacent leaf entries
3762 *           across page boundary
3763 *
3764 * return: non-zero on error
3765 *
3766 */
3767static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3768                               int ri, struct component_name * key, int flag)
3769{
3770        int klen, namlen;
3771        wchar_t *pl, *pr, *kname;
3772        struct component_name lkey;
3773        struct component_name rkey;
3774
3775        lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3776                                        GFP_KERNEL);
3777        if (lkey.name == NULL)
3778                return -ENOMEM;
3779
3780        rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3781                                        GFP_KERNEL);
3782        if (rkey.name == NULL) {
3783                kfree(lkey.name);
3784                return -ENOMEM;
3785        }
3786
3787        /* get left and right key */
3788        dtGetKey(lp, li, &lkey, flag);
3789        lkey.name[lkey.namlen] = 0;
3790
3791        if ((flag & JFS_OS2) == JFS_OS2)
3792                ciToUpper(&lkey);
3793
3794        dtGetKey(rp, ri, &rkey, flag);
3795        rkey.name[rkey.namlen] = 0;
3796
3797
3798        if ((flag & JFS_OS2) == JFS_OS2)
3799                ciToUpper(&rkey);
3800
3801        /* compute prefix */
3802        klen = 0;
3803        kname = key->name;
3804        namlen = min(lkey.namlen, rkey.namlen);
3805        for (pl = lkey.name, pr = rkey.name;
3806             namlen; pl++, pr++, namlen--, klen++, kname++) {
3807                *kname = *pr;
3808                if (*pl != *pr) {
3809                        key->namlen = klen + 1;
3810                        goto free_names;
3811                }
3812        }
3813
3814        /* l->namlen <= r->namlen since l <= r */
3815        if (lkey.namlen < rkey.namlen) {
3816                *kname = *pr;
3817                key->namlen = klen + 1;
3818        } else                  /* l->namelen == r->namelen */
3819                key->namlen = klen;
3820
3821free_names:
3822        kfree(lkey.name);
3823        kfree(rkey.name);
3824        return 0;
3825}
3826
3827
3828
3829/*
3830 *      dtGetKey()
3831 *
3832 * function: get key of the entry
3833 */
3834static void dtGetKey(dtpage_t * p, int i,       /* entry index */
3835                     struct component_name * key, int flag)
3836{
3837        int si;
3838        s8 *stbl;
3839        struct ldtentry *lh;
3840        struct idtentry *ih;
3841        struct dtslot *t;
3842        int namlen, len;
3843        wchar_t *kname;
3844        __le16 *name;
3845
3846        /* get entry */
3847        stbl = DT_GETSTBL(p);
3848        si = stbl[i];
3849        if (p->header.flag & BT_LEAF) {
3850                lh = (struct ldtentry *) & p->slot[si];
3851                si = lh->next;
3852                namlen = lh->namlen;
3853                name = lh->name;
3854                if (flag & JFS_DIR_INDEX)
3855                        len = min(namlen, DTLHDRDATALEN);
3856                else
3857                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3858        } else {
3859                ih = (struct idtentry *) & p->slot[si];
3860                si = ih->next;
3861                namlen = ih->namlen;
3862                name = ih->name;
3863                len = min(namlen, DTIHDRDATALEN);
3864        }
3865
3866        key->namlen = namlen;
3867        kname = key->name;
3868
3869        /*
3870         * move head/only segment
3871         */
3872        UniStrncpy_from_le(kname, name, len);
3873
3874        /*
3875         * move additional segment(s)
3876         */
3877        while (si >= 0) {
3878                /* get next segment */
3879                t = &p->slot[si];
3880                kname += len;
3881                namlen -= len;
3882                len = min(namlen, DTSLOTDATALEN);
3883                UniStrncpy_from_le(kname, t->name, len);
3884
3885                si = t->next;
3886        }
3887}
3888
3889
3890/*
3891 *      dtInsertEntry()
3892 *
3893 * function: allocate free slot(s) and
3894 *           write a leaf/internal entry
3895 *
3896 * return: entry slot index
3897 */
3898static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3899                          ddata_t * data, struct dt_lock ** dtlock)
3900{
3901        struct dtslot *h, *t;
3902        struct ldtentry *lh = NULL;
3903        struct idtentry *ih = NULL;
3904        int hsi, fsi, klen, len, nextindex;
3905        wchar_t *kname;
3906        __le16 *name;
3907        s8 *stbl;
3908        pxd_t *xd;
3909        struct dt_lock *dtlck = *dtlock;
3910        struct lv *lv;
3911        int xsi, n;
3912        s64 bn = 0;
3913        struct metapage *mp = NULL;
3914
3915        klen = key->namlen;
3916        kname = key->name;
3917
3918        /* allocate a free slot */
3919        hsi = fsi = p->header.freelist;
3920        h = &p->slot[fsi];
3921        p->header.freelist = h->next;
3922        --p->header.freecnt;
3923
3924        /* open new linelock */
3925        if (dtlck->index >= dtlck->maxcnt)
3926                dtlck = (struct dt_lock *) txLinelock(dtlck);
3927
3928        lv = & dtlck->lv[dtlck->index];
3929        lv->offset = hsi;
3930
3931        /* write head/only segment */
3932        if (p->header.flag & BT_LEAF) {
3933                lh = (struct ldtentry *) h;
3934                lh->next = h->next;
3935                lh->inumber = cpu_to_le32(data->leaf.ino);
3936                lh->namlen = klen;
3937                name = lh->name;
3938                if (data->leaf.ip) {
3939                        len = min(klen, DTLHDRDATALEN);
3940                        if (!(p->header.flag & BT_ROOT))
3941                                bn = addressPXD(&p->header.self);
3942                        lh->index = cpu_to_le32(add_index(data->leaf.tid,
3943                                                          data->leaf.ip,
3944                                                          bn, index));
3945                } else
3946                        len = min(klen, DTLHDRDATALEN_LEGACY);
3947        } else {
3948                ih = (struct idtentry *) h;
3949                ih->next = h->next;
3950                xd = (pxd_t *) ih;
3951                *xd = data->xd;
3952                ih->namlen = klen;
3953                name = ih->name;
3954                len = min(klen, DTIHDRDATALEN);
3955        }
3956
3957        UniStrncpy_to_le(name, kname, len);
3958
3959        n = 1;
3960        xsi = hsi;
3961
3962        /* write additional segment(s) */
3963        t = h;
3964        klen -= len;
3965        while (klen) {
3966                /* get free slot */
3967                fsi = p->header.freelist;
3968                t = &p->slot[fsi];
3969                p->header.freelist = t->next;
3970                --p->header.freecnt;
3971
3972                /* is next slot contiguous ? */
3973                if (fsi != xsi + 1) {
3974                        /* close current linelock */
3975                        lv->length = n;
3976                        dtlck->index++;
3977
3978                        /* open new linelock */
3979                        if (dtlck->index < dtlck->maxcnt)
3980                                lv++;
3981                        else {
3982                                dtlck = (struct dt_lock *) txLinelock(dtlck);
3983                                lv = & dtlck->lv[0];
3984                        }
3985
3986                        lv->offset = fsi;
3987                        n = 0;
3988                }
3989
3990                kname += len;
3991                len = min(klen, DTSLOTDATALEN);
3992                UniStrncpy_to_le(t->name, kname, len);
3993
3994                n++;
3995                xsi = fsi;
3996                klen -= len;
3997        }
3998
3999        /* close current linelock */
4000        lv->length = n;
4001        dtlck->index++;
4002
4003        *dtlock = dtlck;
4004
4005        /* terminate last/only segment */
4006        if (h == t) {
4007                /* single segment entry */
4008                if (p->header.flag & BT_LEAF)
4009                        lh->next = -1;
4010                else
4011                        ih->next = -1;
4012        } else
4013                /* multi-segment entry */
4014                t->next = -1;
4015
4016        /* if insert into middle, shift right succeeding entries in stbl */
4017        stbl = DT_GETSTBL(p);
4018        nextindex = p->header.nextindex;
4019        if (index < nextindex) {
4020                memmove(stbl + index + 1, stbl + index, nextindex - index);
4021
4022                if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
4023                        s64 lblock;
4024
4025                        /*
4026                         * Need to update slot number for entries that moved
4027                         * in the stbl
4028                         */
4029                        mp = NULL;
4030                        for (n = index + 1; n <= nextindex; n++) {
4031                                lh = (struct ldtentry *) & (p->slot[stbl[n]]);
4032                                modify_index(data->leaf.tid, data->leaf.ip,
4033                                             le32_to_cpu(lh->index), bn, n,
4034                                             &mp, &lblock);
4035                        }
4036                        if (mp)
4037                                release_metapage(mp);
4038                }
4039        }
4040
4041        stbl[index] = hsi;
4042
4043        /* advance next available entry index of stbl */
4044        ++p->header.nextindex;
4045}
4046
4047
4048/*
4049 *      dtMoveEntry()
4050 *
4051 * function: move entries from split/left page to new/right page
4052 *
4053 *      nextindex of dst page and freelist/freecnt of both pages
4054 *      are updated.
4055 */
4056static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
4057                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
4058                        int do_index)
4059{
4060        int ssi, next;          /* src slot index */
4061        int di;                 /* dst entry index */
4062        int dsi;                /* dst slot index */
4063        s8 *sstbl, *dstbl;      /* sorted entry table */
4064        int snamlen, len;
4065        struct ldtentry *slh, *dlh = NULL;
4066        struct idtentry *sih, *dih = NULL;
4067        struct dtslot *h, *s, *d;
4068        struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
4069        struct lv *slv, *dlv;
4070        int xssi, ns, nd;
4071        int sfsi;
4072
4073        sstbl = (s8 *) & sp->slot[sp->header.stblindex];
4074        dstbl = (s8 *) & dp->slot[dp->header.stblindex];
4075
4076        dsi = dp->header.freelist;      /* first (whole page) free slot */
4077        sfsi = sp->header.freelist;
4078
4079        /* linelock destination entry slot */
4080        dlv = & ddtlck->lv[ddtlck->index];
4081        dlv->offset = dsi;
4082
4083        /* linelock source entry slot */
4084        slv = & sdtlck->lv[sdtlck->index];
4085        slv->offset = sstbl[si];
4086        xssi = slv->offset - 1;
4087
4088        /*
4089         * move entries
4090         */
4091        ns = nd = 0;
4092        for (di = 0; si < sp->header.nextindex; si++, di++) {
4093                ssi = sstbl[si];
4094                dstbl[di] = dsi;
4095
4096                /* is next slot contiguous ? */
4097                if (ssi != xssi + 1) {
4098                        /* close current linelock */
4099                        slv->length = ns;
4100                        sdtlck->index++;
4101
4102                        /* open new linelock */
4103                        if (sdtlck->index < sdtlck->maxcnt)
4104                                slv++;
4105                        else {
4106                                sdtlck = (struct dt_lock *) txLinelock(sdtlck);
4107                                slv = & sdtlck->lv[0];
4108                        }
4109
4110                        slv->offset = ssi;
4111                        ns = 0;
4112                }
4113
4114                /*
4115                 * move head/only segment of an entry
4116                 */
4117                /* get dst slot */
4118                h = d = &dp->slot[dsi];
4119
4120                /* get src slot and move */
4121                s = &sp->slot[ssi];
4122                if (sp->header.flag & BT_LEAF) {
4123                        /* get source entry */
4124                        slh = (struct ldtentry *) s;
4125                        dlh = (struct ldtentry *) h;
4126                        snamlen = slh->namlen;
4127
4128                        if (do_index) {
4129                                len = min(snamlen, DTLHDRDATALEN);
4130                                dlh->index = slh->index; /* little-endian */
4131                        } else
4132                                len = min(snamlen, DTLHDRDATALEN_LEGACY);
4133
4134                        memcpy(dlh, slh, 6 + len * 2);
4135
4136                        next = slh->next;
4137
4138                        /* update dst head/only segment next field */
4139                        dsi++;
4140                        dlh->next = dsi;
4141                } else {
4142                        sih = (struct idtentry *) s;
4143                        snamlen = sih->namlen;
4144
4145                        len = min(snamlen, DTIHDRDATALEN);
4146                        dih = (struct idtentry *) h;
4147                        memcpy(dih, sih, 10 + len * 2);
4148                        next = sih->next;
4149
4150                        dsi++;
4151                        dih->next = dsi;
4152                }
4153
4154                /* free src head/only segment */
4155                s->next = sfsi;
4156                s->cnt = 1;
4157                sfsi = ssi;
4158
4159                ns++;
4160                nd++;
4161                xssi = ssi;
4162
4163                /*
4164                 * move additional segment(s) of the entry
4165                 */
4166                snamlen -= len;
4167                while ((ssi = next) >= 0) {
4168                        /* is next slot contiguous ? */
4169                        if (ssi != xssi + 1) {
4170                                /* close current linelock */
4171                                slv->length = ns;
4172                                sdtlck->index++;
4173
4174                                /* open new linelock */
4175                                if (sdtlck->index < sdtlck->maxcnt)
4176                                        slv++;
4177                                else {
4178                                        sdtlck =
4179                                            (struct dt_lock *)
4180                                            txLinelock(sdtlck);
4181                                        slv = & sdtlck->lv[0];
4182                                }
4183
4184                                slv->offset = ssi;
4185                                ns = 0;
4186                        }
4187
4188                        /* get next source segment */
4189                        s = &sp->slot[ssi];
4190
4191                        /* get next destination free slot */
4192                        d++;
4193
4194                        len = min(snamlen, DTSLOTDATALEN);
4195                        UniStrncpy_le(d->name, s->name, len);
4196
4197                        ns++;
4198                        nd++;
4199                        xssi = ssi;
4200
4201                        dsi++;
4202                        d->next = dsi;
4203
4204                        /* free source segment */
4205                        next = s->next;
4206                        s->next = sfsi;
4207                        s->cnt = 1;
4208                        sfsi = ssi;
4209
4210                        snamlen -= len;
4211                }               /* end while */
4212
4213                /* terminate dst last/only segment */
4214                if (h == d) {
4215                        /* single segment entry */
4216                        if (dp->header.flag & BT_LEAF)
4217                                dlh->next = -1;
4218                        else
4219                                dih->next = -1;
4220                } else
4221                        /* multi-segment entry */
4222                        d->next = -1;
4223        }                       /* end for */
4224
4225        /* close current linelock */
4226        slv->length = ns;
4227        sdtlck->index++;
4228        *sdtlock = sdtlck;
4229
4230        dlv->length = nd;
4231        ddtlck->index++;
4232        *ddtlock = ddtlck;
4233
4234        /* update source header */
4235        sp->header.freelist = sfsi;
4236        sp->header.freecnt += nd;
4237
4238        /* update destination header */
4239        dp->header.nextindex = di;
4240
4241        dp->header.freelist = dsi;
4242        dp->header.freecnt -= nd;
4243}
4244
4245
4246/*
4247 *      dtDeleteEntry()
4248 *
4249 * function: free a (leaf/internal) entry
4250 *
4251 * log freelist header, stbl, and each segment slot of entry
4252 * (even though last/only segment next field is modified,
4253 * physical image logging requires all segment slots of
4254 * the entry logged to avoid applying previous updates
4255 * to the same slots)
4256 */
4257static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
4258{
4259        int fsi;                /* free entry slot index */
4260        s8 *stbl;
4261        struct dtslot *t;
4262        int si, freecnt;
4263        struct dt_lock *dtlck = *dtlock;
4264        struct lv *lv;
4265        int xsi, n;
4266
4267        /* get free entry slot index */
4268        stbl = DT_GETSTBL(p);
4269        fsi = stbl[fi];
4270
4271        /* open new linelock */
4272        if (dtlck->index >= dtlck->maxcnt)
4273                dtlck = (struct dt_lock *) txLinelock(dtlck);
4274        lv = & dtlck->lv[dtlck->index];
4275
4276        lv->offset = fsi;
4277
4278        /* get the head/only segment */
4279        t = &p->slot[fsi];
4280        if (p->header.flag & BT_LEAF)
4281                si = ((struct ldtentry *) t)->next;
4282        else
4283                si = ((struct idtentry *) t)->next;
4284        t->next = si;
4285        t->cnt = 1;
4286
4287        n = freecnt = 1;
4288        xsi = fsi;
4289
4290        /* find the last/only segment */
4291        while (si >= 0) {
4292                /* is next slot contiguous ? */
4293                if (si != xsi + 1) {
4294                        /* close current linelock */
4295                        lv->length = n;
4296                        dtlck->index++;
4297
4298                        /* open new linelock */
4299                        if (dtlck->index < dtlck->maxcnt)
4300                                lv++;
4301                        else {
4302                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4303                                lv = & dtlck->lv[0];
4304                        }
4305
4306                        lv->offset = si;
4307                        n = 0;
4308                }
4309
4310                n++;
4311                xsi = si;
4312                freecnt++;
4313
4314                t = &p->slot[si];
4315                t->cnt = 1;
4316                si = t->next;
4317        }
4318
4319        /* close current linelock */
4320        lv->length = n;
4321        dtlck->index++;
4322
4323        *dtlock = dtlck;
4324
4325        /* update freelist */
4326        t->next = p->header.freelist;
4327        p->header.freelist = fsi;
4328        p->header.freecnt += freecnt;
4329
4330        /* if delete from middle,
4331         * shift left the succedding entries in the stbl
4332         */
4333        si = p->header.nextindex;
4334        if (fi < si - 1)
4335                memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4336
4337        p->header.nextindex--;
4338}
4339
4340
4341/*
4342 *      dtTruncateEntry()
4343 *
4344 * function: truncate a (leaf/internal) entry
4345 *
4346 * log freelist header, stbl, and each segment slot of entry
4347 * (even though last/only segment next field is modified,
4348 * physical image logging requires all segment slots of
4349 * the entry logged to avoid applying previous updates
4350 * to the same slots)
4351 */
4352static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4353{
4354        int tsi;                /* truncate entry slot index */
4355        s8 *stbl;
4356        struct dtslot *t;
4357        int si, freecnt;
4358        struct dt_lock *dtlck = *dtlock;
4359        struct lv *lv;
4360        int fsi, xsi, n;
4361
4362        /* get free entry slot index */
4363        stbl = DT_GETSTBL(p);
4364        tsi = stbl[ti];
4365
4366        /* open new linelock */
4367        if (dtlck->index >= dtlck->maxcnt)
4368                dtlck = (struct dt_lock *) txLinelock(dtlck);
4369        lv = & dtlck->lv[dtlck->index];
4370
4371        lv->offset = tsi;
4372
4373        /* get the head/only segment */
4374        t = &p->slot[tsi];
4375        ASSERT(p->header.flag & BT_INTERNAL);
4376        ((struct idtentry *) t)->namlen = 0;
4377        si = ((struct idtentry *) t)->next;
4378        ((struct idtentry *) t)->next = -1;
4379
4380        n = 1;
4381        freecnt = 0;
4382        fsi = si;
4383        xsi = tsi;
4384
4385        /* find the last/only segment */
4386        while (si >= 0) {
4387                /* is next slot contiguous ? */
4388                if (si != xsi + 1) {
4389                        /* close current linelock */
4390                        lv->length = n;
4391                        dtlck->index++;
4392
4393                        /* open new linelock */
4394                        if (dtlck->index < dtlck->maxcnt)
4395                                lv++;
4396                        else {
4397                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4398                                lv = & dtlck->lv[0];
4399                        }
4400
4401                        lv->offset = si;
4402                        n = 0;
4403                }
4404
4405                n++;
4406                xsi = si;
4407                freecnt++;
4408
4409                t = &p->slot[si];
4410                t->cnt = 1;
4411                si = t->next;
4412        }
4413
4414        /* close current linelock */
4415        lv->length = n;
4416        dtlck->index++;
4417
4418        *dtlock = dtlck;
4419
4420        /* update freelist */
4421        if (freecnt == 0)
4422                return;
4423        t->next = p->header.freelist;
4424        p->header.freelist = fsi;
4425        p->header.freecnt += freecnt;
4426}
4427
4428
4429/*
4430 *      dtLinelockFreelist()
4431 */
4432static void dtLinelockFreelist(dtpage_t * p,    /* directory page */
4433                               int m,   /* max slot index */
4434                               struct dt_lock ** dtlock)
4435{
4436        int fsi;                /* free entry slot index */
4437        struct dtslot *t;
4438        int si;
4439        struct dt_lock *dtlck = *dtlock;
4440        struct lv *lv;
4441        int xsi, n;
4442
4443        /* get free entry slot index */
4444        fsi = p->header.freelist;
4445
4446        /* open new linelock */
4447        if (dtlck->index >= dtlck->maxcnt)
4448                dtlck = (struct dt_lock *) txLinelock(dtlck);
4449        lv = & dtlck->lv[dtlck->index];
4450
4451        lv->offset = fsi;
4452
4453        n = 1;
4454        xsi = fsi;
4455
4456        t = &p->slot[fsi];
4457        si = t->next;
4458
4459        /* find the last/only segment */
4460        while (si < m && si >= 0) {
4461                /* is next slot contiguous ? */
4462                if (si != xsi + 1) {
4463                        /* close current linelock */
4464                        lv->length = n;
4465                        dtlck->index++;
4466
4467                        /* open new linelock */
4468                        if (dtlck->index < dtlck->maxcnt)
4469                                lv++;
4470                        else {
4471                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4472                                lv = & dtlck->lv[0];
4473                        }
4474
4475                        lv->offset = si;
4476                        n = 0;
4477                }
4478
4479                n++;
4480                xsi = si;
4481
4482                t = &p->slot[si];
4483                si = t->next;
4484        }
4485
4486        /* close current linelock */
4487        lv->length = n;
4488        dtlck->index++;
4489
4490        *dtlock = dtlck;
4491}
4492
4493
4494/*
4495 * NAME: dtModify
4496 *
4497 * FUNCTION: Modify the inode number part of a directory entry
4498 *
4499 * PARAMETERS:
4500 *      tid     - Transaction id
4501 *      ip      - Inode of parent directory
4502 *      key     - Name of entry to be modified
4503 *      orig_ino        - Original inode number expected in entry
4504 *      new_ino - New inode number to put into entry
4505 *      flag    - JFS_RENAME
4506 *
4507 * RETURNS:
4508 *      -ESTALE - If entry found does not match orig_ino passed in
4509 *      -ENOENT - If no entry can be found to match key
4510 *      0       - If successfully modified entry
4511 */
4512int dtModify(tid_t tid, struct inode *ip,
4513         struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4514{
4515        int rc;
4516        s64 bn;
4517        struct metapage *mp;
4518        dtpage_t *p;
4519        int index;
4520        struct btstack btstack;
4521        struct tlock *tlck;
4522        struct dt_lock *dtlck;
4523        struct lv *lv;
4524        s8 *stbl;
4525        int entry_si;           /* entry slot index */
4526        struct ldtentry *entry;
4527
4528        /*
4529         *      search for the entry to modify:
4530         *
4531         * dtSearch() returns (leaf page pinned, index at which to modify).
4532         */
4533        if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4534                return rc;
4535
4536        /* retrieve search result */
4537        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4538
4539        BT_MARK_DIRTY(mp, ip);
4540        /*
4541         * acquire a transaction lock on the leaf page of named entry
4542         */
4543        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4544        dtlck = (struct dt_lock *) & tlck->lock;
4545
4546        /* get slot index of the entry */
4547        stbl = DT_GETSTBL(p);
4548        entry_si = stbl[index];
4549
4550        /* linelock entry */
4551        ASSERT(dtlck->index == 0);
4552        lv = & dtlck->lv[0];
4553        lv->offset = entry_si;
4554        lv->length = 1;
4555        dtlck->index++;
4556
4557        /* get the head/only segment */
4558        entry = (struct ldtentry *) & p->slot[entry_si];
4559
4560        /* substitute the inode number of the entry */
4561        entry->inumber = cpu_to_le32(new_ino);
4562
4563        /* unpin the leaf page */
4564        DT_PUTPAGE(mp);
4565
4566        return 0;
4567}
4568