linux/fs/jfs/jfs_dtree.c
<<
>>
Prefs
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *
   4 *   This program is free software;  you can redistribute it and/or modify
   5 *   it under the terms of the GNU General Public License as published by
   6 *   the Free Software Foundation; either version 2 of the License, or
   7 *   (at your option) any later version.
   8 *
   9 *   This program is distributed in the hope that it will be useful,
  10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  12 *   the GNU General Public License for more details.
  13 *
  14 *   You should have received a copy of the GNU General Public License
  15 *   along with this program;  if not, write to the Free Software
  16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 */
  18
  19/*
  20 *      jfs_dtree.c: directory B+-tree manager
  21 *
  22 * B+-tree with variable length key directory:
  23 *
  24 * each directory page is structured as an array of 32-byte
  25 * directory entry slots initialized as a freelist
  26 * to avoid search/compaction of free space at insertion.
  27 * when an entry is inserted, a number of slots are allocated
  28 * from the freelist as required to store variable length data
  29 * of the entry; when the entry is deleted, slots of the entry
  30 * are returned to freelist.
  31 *
  32 * leaf entry stores full name as key and file serial number
  33 * (aka inode number) as data.
  34 * internal/router entry stores sufffix compressed name
  35 * as key and simple extent descriptor as data.
  36 *
  37 * each directory page maintains a sorted entry index table
  38 * which stores the start slot index of sorted entries
  39 * to allow binary search on the table.
  40 *
  41 * directory starts as a root/leaf page in on-disk inode
  42 * inline data area.
  43 * when it becomes full, it starts a leaf of a external extent
  44 * of length of 1 block. each time the first leaf becomes full,
  45 * it is extended rather than split (its size is doubled),
  46 * until its length becoms 4 KBytes, from then the extent is split
  47 * with new 4 Kbyte extent when it becomes full
  48 * to reduce external fragmentation of small directories.
  49 *
  50 * blah, blah, blah, for linear scan of directory in pieces by
  51 * readdir().
  52 *
  53 *
  54 *      case-insensitive directory file system
  55 *
  56 * names are stored in case-sensitive way in leaf entry.
  57 * but stored, searched and compared in case-insensitive (uppercase) order
  58 * (i.e., both search key and entry key are folded for search/compare):
  59 * (note that case-sensitive order is BROKEN in storage, e.g.,
  60 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
  61 *
  62 *  entries which folds to the same key makes up a equivalent class
  63 *  whose members are stored as contiguous cluster (may cross page boundary)
  64 *  but whose order is arbitrary and acts as duplicate, e.g.,
  65 *  abc, Abc, aBc, abC)
  66 *
  67 * once match is found at leaf, requires scan forward/backward
  68 * either for, in case-insensitive search, duplicate
  69 * or for, in case-sensitive search, for exact match
  70 *
  71 * router entry must be created/stored in case-insensitive way
  72 * in internal entry:
  73 * (right most key of left page and left most key of right page
  74 * are folded, and its suffix compression is propagated as router
  75 * key in parent)
  76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
  77 * should be made the router key for the split)
  78 *
  79 * case-insensitive search:
  80 *
  81 *      fold search key;
  82 *
  83 *      case-insensitive search of B-tree:
  84 *      for internal entry, router key is already folded;
  85 *      for leaf entry, fold the entry key before comparison.
  86 *
  87 *      if (leaf entry case-insensitive match found)
  88 *              if (next entry satisfies case-insensitive match)
  89 *                      return EDUPLICATE;
  90 *              if (prev entry satisfies case-insensitive match)
  91 *                      return EDUPLICATE;
  92 *              return match;
  93 *      else
  94 *              return no match;
  95 *
  96 *      serialization:
  97 * target directory inode lock is being held on entry/exit
  98 * of all main directory service routines.
  99 *
 100 *      log based recovery:
 101 */
 102
 103#include <linux/fs.h>
 104#include <linux/quotaops.h>
 105#include <linux/slab.h>
 106#include "jfs_incore.h"
 107#include "jfs_superblock.h"
 108#include "jfs_filsys.h"
 109#include "jfs_metapage.h"
 110#include "jfs_dmap.h"
 111#include "jfs_unicode.h"
 112#include "jfs_debug.h"
 113
 114/* dtree split parameter */
 115struct dtsplit {
 116        struct metapage *mp;
 117        s16 index;
 118        s16 nslot;
 119        struct component_name *key;
 120        ddata_t *data;
 121        struct pxdlist *pxdlist;
 122};
 123
 124#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
 125
 126/* get page buffer for specified block address */
 127#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)                             \
 128do {                                                                    \
 129        BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot);        \
 130        if (!(RC)) {                                                    \
 131                if (((P)->header.nextindex >                            \
 132                     (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \
 133                    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) {  \
 134                        BT_PUTPAGE(MP);                                 \
 135                        jfs_error((IP)->i_sb,                           \
 136                                  "DT_GETPAGE: dtree page corrupt\n");  \
 137                        MP = NULL;                                      \
 138                        RC = -EIO;                                      \
 139                }                                                       \
 140        }                                                               \
 141} while (0)
 142
 143/* for consistency */
 144#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
 145
 146#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
 147        BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
 148
 149/*
 150 * forward references
 151 */
 152static int dtSplitUp(tid_t tid, struct inode *ip,
 153                     struct dtsplit * split, struct btstack * btstack);
 154
 155static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 156                       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
 157
 158static int dtExtendPage(tid_t tid, struct inode *ip,
 159                        struct dtsplit * split, struct btstack * btstack);
 160
 161static int dtSplitRoot(tid_t tid, struct inode *ip,
 162                       struct dtsplit * split, struct metapage ** rmpp);
 163
 164static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 165                      dtpage_t * fp, struct btstack * btstack);
 166
 167static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
 168
 169static int dtReadFirst(struct inode *ip, struct btstack * btstack);
 170
 171static int dtReadNext(struct inode *ip,
 172                      loff_t * offset, struct btstack * btstack);
 173
 174static int dtCompare(struct component_name * key, dtpage_t * p, int si);
 175
 176static int ciCompare(struct component_name * key, dtpage_t * p, int si,
 177                     int flag);
 178
 179static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
 180                     int flag);
 181
 182static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
 183                              int ri, struct component_name * key, int flag);
 184
 185static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
 186                          ddata_t * data, struct dt_lock **);
 187
 188static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
 189                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
 190                        int do_index);
 191
 192static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
 193
 194static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
 195
 196static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
 197
 198#define ciToUpper(c)    UniStrupr((c)->name)
 199
 200/*
 201 *      read_index_page()
 202 *
 203 *      Reads a page of a directory's index table.
 204 *      Having metadata mapped into the directory inode's address space
 205 *      presents a multitude of problems.  We avoid this by mapping to
 206 *      the absolute address space outside of the *_metapage routines
 207 */
 208static struct metapage *read_index_page(struct inode *inode, s64 blkno)
 209{
 210        int rc;
 211        s64 xaddr;
 212        int xflag;
 213        s32 xlen;
 214
 215        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 216        if (rc || (xaddr == 0))
 217                return NULL;
 218
 219        return read_metapage(inode, xaddr, PSIZE, 1);
 220}
 221
 222/*
 223 *      get_index_page()
 224 *
 225 *      Same as get_index_page(), but get's a new page without reading
 226 */
 227static struct metapage *get_index_page(struct inode *inode, s64 blkno)
 228{
 229        int rc;
 230        s64 xaddr;
 231        int xflag;
 232        s32 xlen;
 233
 234        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
 235        if (rc || (xaddr == 0))
 236                return NULL;
 237
 238        return get_metapage(inode, xaddr, PSIZE, 1);
 239}
 240
 241/*
 242 *      find_index()
 243 *
 244 *      Returns dtree page containing directory table entry for specified
 245 *      index and pointer to its entry.
 246 *
 247 *      mp must be released by caller.
 248 */
 249static struct dir_table_slot *find_index(struct inode *ip, u32 index,
 250                                         struct metapage ** mp, s64 *lblock)
 251{
 252        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 253        s64 blkno;
 254        s64 offset;
 255        int page_offset;
 256        struct dir_table_slot *slot;
 257        static int maxWarnings = 10;
 258
 259        if (index < 2) {
 260                if (maxWarnings) {
 261                        jfs_warn("find_entry called with index = %d", index);
 262                        maxWarnings--;
 263                }
 264                return NULL;
 265        }
 266
 267        if (index >= jfs_ip->next_index) {
 268                jfs_warn("find_entry called with index >= next_index");
 269                return NULL;
 270        }
 271
 272        if (jfs_dirtable_inline(ip)) {
 273                /*
 274                 * Inline directory table
 275                 */
 276                *mp = NULL;
 277                slot = &jfs_ip->i_dirtable[index - 2];
 278        } else {
 279                offset = (index - 2) * sizeof(struct dir_table_slot);
 280                page_offset = offset & (PSIZE - 1);
 281                blkno = ((offset + 1) >> L2PSIZE) <<
 282                    JFS_SBI(ip->i_sb)->l2nbperpage;
 283
 284                if (*mp && (*lblock != blkno)) {
 285                        release_metapage(*mp);
 286                        *mp = NULL;
 287                }
 288                if (!(*mp)) {
 289                        *lblock = blkno;
 290                        *mp = read_index_page(ip, blkno);
 291                }
 292                if (!(*mp)) {
 293                        jfs_err("free_index: error reading directory table");
 294                        return NULL;
 295                }
 296
 297                slot =
 298                    (struct dir_table_slot *) ((char *) (*mp)->data +
 299                                               page_offset);
 300        }
 301        return slot;
 302}
 303
 304static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
 305                              u32 index)
 306{
 307        struct tlock *tlck;
 308        struct linelock *llck;
 309        struct lv *lv;
 310
 311        tlck = txLock(tid, ip, mp, tlckDATA);
 312        llck = (struct linelock *) tlck->lock;
 313
 314        if (llck->index >= llck->maxcnt)
 315                llck = txLinelock(llck);
 316        lv = &llck->lv[llck->index];
 317
 318        /*
 319         *      Linelock slot size is twice the size of directory table
 320         *      slot size.  512 entries per page.
 321         */
 322        lv->offset = ((index - 2) & 511) >> 1;
 323        lv->length = 1;
 324        llck->index++;
 325}
 326
 327/*
 328 *      add_index()
 329 *
 330 *      Adds an entry to the directory index table.  This is used to provide
 331 *      each directory entry with a persistent index in which to resume
 332 *      directory traversals
 333 */
 334static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 335{
 336        struct super_block *sb = ip->i_sb;
 337        struct jfs_sb_info *sbi = JFS_SBI(sb);
 338        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 339        u64 blkno;
 340        struct dir_table_slot *dirtab_slot;
 341        u32 index;
 342        struct linelock *llck;
 343        struct lv *lv;
 344        struct metapage *mp;
 345        s64 offset;
 346        uint page_offset;
 347        struct tlock *tlck;
 348        s64 xaddr;
 349
 350        ASSERT(DO_INDEX(ip));
 351
 352        if (jfs_ip->next_index < 2) {
 353                jfs_warn("add_index: next_index = %d.  Resetting!",
 354                           jfs_ip->next_index);
 355                jfs_ip->next_index = 2;
 356        }
 357
 358        index = jfs_ip->next_index++;
 359
 360        if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
 361                /*
 362                 * i_size reflects size of index table, or 8 bytes per entry.
 363                 */
 364                ip->i_size = (loff_t) (index - 1) << 3;
 365
 366                /*
 367                 * dir table fits inline within inode
 368                 */
 369                dirtab_slot = &jfs_ip->i_dirtable[index-2];
 370                dirtab_slot->flag = DIR_INDEX_VALID;
 371                dirtab_slot->slot = slot;
 372                DTSaddress(dirtab_slot, bn);
 373
 374                set_cflag(COMMIT_Dirtable, ip);
 375
 376                return index;
 377        }
 378        if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
 379                struct dir_table_slot temp_table[12];
 380
 381                /*
 382                 * It's time to move the inline table to an external
 383                 * page and begin to build the xtree
 384                 */
 385                if (dquot_alloc_block(ip, sbi->nbperpage))
 386                        goto clean_up;
 387                if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
 388                        dquot_free_block(ip, sbi->nbperpage);
 389                        goto clean_up;
 390                }
 391
 392                /*
 393                 * Save the table, we're going to overwrite it with the
 394                 * xtree root
 395                 */
 396                memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
 397
 398                /*
 399                 * Initialize empty x-tree
 400                 */
 401                xtInitRoot(tid, ip);
 402
 403                /*
 404                 * Add the first block to the xtree
 405                 */
 406                if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
 407                        /* This really shouldn't fail */
 408                        jfs_warn("add_index: xtInsert failed!");
 409                        memcpy(&jfs_ip->i_dirtable, temp_table,
 410                               sizeof (temp_table));
 411                        dbFree(ip, xaddr, sbi->nbperpage);
 412                        dquot_free_block(ip, sbi->nbperpage);
 413                        goto clean_up;
 414                }
 415                ip->i_size = PSIZE;
 416
 417                mp = get_index_page(ip, 0);
 418                if (!mp) {
 419                        jfs_err("add_index: get_metapage failed!");
 420                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
 421                        memcpy(&jfs_ip->i_dirtable, temp_table,
 422                               sizeof (temp_table));
 423                        goto clean_up;
 424                }
 425                tlck = txLock(tid, ip, mp, tlckDATA);
 426                llck = (struct linelock *) & tlck->lock;
 427                ASSERT(llck->index == 0);
 428                lv = &llck->lv[0];
 429
 430                lv->offset = 0;
 431                lv->length = 6; /* tlckDATA slot size is 16 bytes */
 432                llck->index++;
 433
 434                memcpy(mp->data, temp_table, sizeof(temp_table));
 435
 436                mark_metapage_dirty(mp);
 437                release_metapage(mp);
 438
 439                /*
 440                 * Logging is now directed by xtree tlocks
 441                 */
 442                clear_cflag(COMMIT_Dirtable, ip);
 443        }
 444
 445        offset = (index - 2) * sizeof(struct dir_table_slot);
 446        page_offset = offset & (PSIZE - 1);
 447        blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
 448        if (page_offset == 0) {
 449                /*
 450                 * This will be the beginning of a new page
 451                 */
 452                xaddr = 0;
 453                if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
 454                        jfs_warn("add_index: xtInsert failed!");
 455                        goto clean_up;
 456                }
 457                ip->i_size += PSIZE;
 458
 459                if ((mp = get_index_page(ip, blkno)))
 460                        memset(mp->data, 0, PSIZE);     /* Just looks better */
 461                else
 462                        xtTruncate(tid, ip, offset, COMMIT_PWMAP);
 463        } else
 464                mp = read_index_page(ip, blkno);
 465
 466        if (!mp) {
 467                jfs_err("add_index: get/read_metapage failed!");
 468                goto clean_up;
 469        }
 470
 471        lock_index(tid, ip, mp, index);
 472
 473        dirtab_slot =
 474            (struct dir_table_slot *) ((char *) mp->data + page_offset);
 475        dirtab_slot->flag = DIR_INDEX_VALID;
 476        dirtab_slot->slot = slot;
 477        DTSaddress(dirtab_slot, bn);
 478
 479        mark_metapage_dirty(mp);
 480        release_metapage(mp);
 481
 482        return index;
 483
 484      clean_up:
 485
 486        jfs_ip->next_index--;
 487
 488        return 0;
 489}
 490
 491/*
 492 *      free_index()
 493 *
 494 *      Marks an entry to the directory index table as free.
 495 */
 496static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
 497{
 498        struct dir_table_slot *dirtab_slot;
 499        s64 lblock;
 500        struct metapage *mp = NULL;
 501
 502        dirtab_slot = find_index(ip, index, &mp, &lblock);
 503
 504        if (!dirtab_slot)
 505                return;
 506
 507        dirtab_slot->flag = DIR_INDEX_FREE;
 508        dirtab_slot->slot = dirtab_slot->addr1 = 0;
 509        dirtab_slot->addr2 = cpu_to_le32(next);
 510
 511        if (mp) {
 512                lock_index(tid, ip, mp, index);
 513                mark_metapage_dirty(mp);
 514                release_metapage(mp);
 515        } else
 516                set_cflag(COMMIT_Dirtable, ip);
 517}
 518
 519/*
 520 *      modify_index()
 521 *
 522 *      Changes an entry in the directory index table
 523 */
 524static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
 525                         int slot, struct metapage ** mp, s64 *lblock)
 526{
 527        struct dir_table_slot *dirtab_slot;
 528
 529        dirtab_slot = find_index(ip, index, mp, lblock);
 530
 531        if (!dirtab_slot)
 532                return;
 533
 534        DTSaddress(dirtab_slot, bn);
 535        dirtab_slot->slot = slot;
 536
 537        if (*mp) {
 538                lock_index(tid, ip, *mp, index);
 539                mark_metapage_dirty(*mp);
 540        } else
 541                set_cflag(COMMIT_Dirtable, ip);
 542}
 543
 544/*
 545 *      read_index()
 546 *
 547 *      reads a directory table slot
 548 */
 549static int read_index(struct inode *ip, u32 index,
 550                     struct dir_table_slot * dirtab_slot)
 551{
 552        s64 lblock;
 553        struct metapage *mp = NULL;
 554        struct dir_table_slot *slot;
 555
 556        slot = find_index(ip, index, &mp, &lblock);
 557        if (!slot) {
 558                return -EIO;
 559        }
 560
 561        memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
 562
 563        if (mp)
 564                release_metapage(mp);
 565
 566        return 0;
 567}
 568
 569/*
 570 *      dtSearch()
 571 *
 572 * function:
 573 *      Search for the entry with specified key
 574 *
 575 * parameter:
 576 *
 577 * return: 0 - search result on stack, leaf page pinned;
 578 *         errno - I/O error
 579 */
 580int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
 581             struct btstack * btstack, int flag)
 582{
 583        int rc = 0;
 584        int cmp = 1;            /* init for empty page */
 585        s64 bn;
 586        struct metapage *mp;
 587        dtpage_t *p;
 588        s8 *stbl;
 589        int base, index, lim;
 590        struct btframe *btsp;
 591        pxd_t *pxd;
 592        int psize = 288;        /* initial in-line directory */
 593        ino_t inumber;
 594        struct component_name ciKey;
 595        struct super_block *sb = ip->i_sb;
 596
 597        ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS);
 598        if (!ciKey.name) {
 599                rc = -ENOMEM;
 600                goto dtSearch_Exit2;
 601        }
 602
 603
 604        /* uppercase search key for c-i directory */
 605        UniStrcpy(ciKey.name, key->name);
 606        ciKey.namlen = key->namlen;
 607
 608        /* only uppercase if case-insensitive support is on */
 609        if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
 610                ciToUpper(&ciKey);
 611        }
 612        BT_CLR(btstack);        /* reset stack */
 613
 614        /* init level count for max pages to split */
 615        btstack->nsplit = 1;
 616
 617        /*
 618         *      search down tree from root:
 619         *
 620         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 621         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 622         *
 623         * if entry with search key K is not found
 624         * internal page search find the entry with largest key Ki
 625         * less than K which point to the child page to search;
 626         * leaf page search find the entry with smallest key Kj
 627         * greater than K so that the returned index is the position of
 628         * the entry to be shifted right for insertion of new entry.
 629         * for empty tree, search key is greater than any key of the tree.
 630         *
 631         * by convention, root bn = 0.
 632         */
 633        for (bn = 0;;) {
 634                /* get/pin the page to search */
 635                DT_GETPAGE(ip, bn, mp, psize, p, rc);
 636                if (rc)
 637                        goto dtSearch_Exit1;
 638
 639                /* get sorted entry table of the page */
 640                stbl = DT_GETSTBL(p);
 641
 642                /*
 643                 * binary search with search key K on the current page.
 644                 */
 645                for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
 646                        index = base + (lim >> 1);
 647
 648                        if (p->header.flag & BT_LEAF) {
 649                                /* uppercase leaf name to compare */
 650                                cmp =
 651                                    ciCompare(&ciKey, p, stbl[index],
 652                                              JFS_SBI(sb)->mntflag);
 653                        } else {
 654                                /* router key is in uppercase */
 655
 656                                cmp = dtCompare(&ciKey, p, stbl[index]);
 657
 658
 659                        }
 660                        if (cmp == 0) {
 661                                /*
 662                                 *      search hit
 663                                 */
 664                                /* search hit - leaf page:
 665                                 * return the entry found
 666                                 */
 667                                if (p->header.flag & BT_LEAF) {
 668                                        inumber = le32_to_cpu(
 669                        ((struct ldtentry *) & p->slot[stbl[index]])->inumber);
 670
 671                                        /*
 672                                         * search for JFS_LOOKUP
 673                                         */
 674                                        if (flag == JFS_LOOKUP) {
 675                                                *data = inumber;
 676                                                rc = 0;
 677                                                goto out;
 678                                        }
 679
 680                                        /*
 681                                         * search for JFS_CREATE
 682                                         */
 683                                        if (flag == JFS_CREATE) {
 684                                                *data = inumber;
 685                                                rc = -EEXIST;
 686                                                goto out;
 687                                        }
 688
 689                                        /*
 690                                         * search for JFS_REMOVE or JFS_RENAME
 691                                         */
 692                                        if ((flag == JFS_REMOVE ||
 693                                             flag == JFS_RENAME) &&
 694                                            *data != inumber) {
 695                                                rc = -ESTALE;
 696                                                goto out;
 697                                        }
 698
 699                                        /*
 700                                         * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
 701                                         */
 702                                        /* save search result */
 703                                        *data = inumber;
 704                                        btsp = btstack->top;
 705                                        btsp->bn = bn;
 706                                        btsp->index = index;
 707                                        btsp->mp = mp;
 708
 709                                        rc = 0;
 710                                        goto dtSearch_Exit1;
 711                                }
 712
 713                                /* search hit - internal page:
 714                                 * descend/search its child page
 715                                 */
 716                                goto getChild;
 717                        }
 718
 719                        if (cmp > 0) {
 720                                base = index + 1;
 721                                --lim;
 722                        }
 723                }
 724
 725                /*
 726                 *      search miss
 727                 *
 728                 * base is the smallest index with key (Kj) greater than
 729                 * search key (K) and may be zero or (maxindex + 1) index.
 730                 */
 731                /*
 732                 * search miss - leaf page
 733                 *
 734                 * return location of entry (base) where new entry with
 735                 * search key K is to be inserted.
 736                 */
 737                if (p->header.flag & BT_LEAF) {
 738                        /*
 739                         * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
 740                         */
 741                        if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
 742                            flag == JFS_RENAME) {
 743                                rc = -ENOENT;
 744                                goto out;
 745                        }
 746
 747                        /*
 748                         * search for JFS_CREATE|JFS_FINDDIR:
 749                         *
 750                         * save search result
 751                         */
 752                        *data = 0;
 753                        btsp = btstack->top;
 754                        btsp->bn = bn;
 755                        btsp->index = base;
 756                        btsp->mp = mp;
 757
 758                        rc = 0;
 759                        goto dtSearch_Exit1;
 760                }
 761
 762                /*
 763                 * search miss - internal page
 764                 *
 765                 * if base is non-zero, decrement base by one to get the parent
 766                 * entry of the child page to search.
 767                 */
 768                index = base ? base - 1 : base;
 769
 770                /*
 771                 * go down to child page
 772                 */
 773              getChild:
 774                /* update max. number of pages to split */
 775                if (BT_STACK_FULL(btstack)) {
 776                        /* Something's corrupted, mark filesystem dirty so
 777                         * chkdsk will fix it.
 778                         */
 779                        jfs_error(sb, "stack overrun!\n");
 780                        BT_STACK_DUMP(btstack);
 781                        rc = -EIO;
 782                        goto out;
 783                }
 784                btstack->nsplit++;
 785
 786                /* push (bn, index) of the parent page/entry */
 787                BT_PUSH(btstack, bn, index);
 788
 789                /* get the child page block number */
 790                pxd = (pxd_t *) & p->slot[stbl[index]];
 791                bn = addressPXD(pxd);
 792                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
 793
 794                /* unpin the parent page */
 795                DT_PUTPAGE(mp);
 796        }
 797
 798      out:
 799        DT_PUTPAGE(mp);
 800
 801      dtSearch_Exit1:
 802
 803        kfree(ciKey.name);
 804
 805      dtSearch_Exit2:
 806
 807        return rc;
 808}
 809
 810
 811/*
 812 *      dtInsert()
 813 *
 814 * function: insert an entry to directory tree
 815 *
 816 * parameter:
 817 *
 818 * return: 0 - success;
 819 *         errno - failure;
 820 */
 821int dtInsert(tid_t tid, struct inode *ip,
 822         struct component_name * name, ino_t * fsn, struct btstack * btstack)
 823{
 824        int rc = 0;
 825        struct metapage *mp;    /* meta-page buffer */
 826        dtpage_t *p;            /* base B+-tree index page */
 827        s64 bn;
 828        int index;
 829        struct dtsplit split;   /* split information */
 830        ddata_t data;
 831        struct dt_lock *dtlck;
 832        int n;
 833        struct tlock *tlck;
 834        struct lv *lv;
 835
 836        /*
 837         *      retrieve search result
 838         *
 839         * dtSearch() returns (leaf page pinned, index at which to insert).
 840         * n.b. dtSearch() may return index of (maxindex + 1) of
 841         * the full page.
 842         */
 843        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
 844
 845        /*
 846         *      insert entry for new key
 847         */
 848        if (DO_INDEX(ip)) {
 849                if (JFS_IP(ip)->next_index == DIREND) {
 850                        DT_PUTPAGE(mp);
 851                        return -EMLINK;
 852                }
 853                n = NDTLEAF(name->namlen);
 854                data.leaf.tid = tid;
 855                data.leaf.ip = ip;
 856        } else {
 857                n = NDTLEAF_LEGACY(name->namlen);
 858                data.leaf.ip = NULL;    /* signifies legacy directory format */
 859        }
 860        data.leaf.ino = *fsn;
 861
 862        /*
 863         *      leaf page does not have enough room for new entry:
 864         *
 865         *      extend/split the leaf page;
 866         *
 867         * dtSplitUp() will insert the entry and unpin the leaf page.
 868         */
 869        if (n > p->header.freecnt) {
 870                split.mp = mp;
 871                split.index = index;
 872                split.nslot = n;
 873                split.key = name;
 874                split.data = &data;
 875                rc = dtSplitUp(tid, ip, &split, btstack);
 876                return rc;
 877        }
 878
 879        /*
 880         *      leaf page does have enough room for new entry:
 881         *
 882         *      insert the new data entry into the leaf page;
 883         */
 884        BT_MARK_DIRTY(mp, ip);
 885        /*
 886         * acquire a transaction lock on the leaf page
 887         */
 888        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
 889        dtlck = (struct dt_lock *) & tlck->lock;
 890        ASSERT(dtlck->index == 0);
 891        lv = & dtlck->lv[0];
 892
 893        /* linelock header */
 894        lv->offset = 0;
 895        lv->length = 1;
 896        dtlck->index++;
 897
 898        dtInsertEntry(p, index, name, &data, &dtlck);
 899
 900        /* linelock stbl of non-root leaf page */
 901        if (!(p->header.flag & BT_ROOT)) {
 902                if (dtlck->index >= dtlck->maxcnt)
 903                        dtlck = (struct dt_lock *) txLinelock(dtlck);
 904                lv = & dtlck->lv[dtlck->index];
 905                n = index >> L2DTSLOTSIZE;
 906                lv->offset = p->header.stblindex + n;
 907                lv->length =
 908                    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
 909                dtlck->index++;
 910        }
 911
 912        /* unpin the leaf page */
 913        DT_PUTPAGE(mp);
 914
 915        return 0;
 916}
 917
 918
 919/*
 920 *      dtSplitUp()
 921 *
 922 * function: propagate insertion bottom up;
 923 *
 924 * parameter:
 925 *
 926 * return: 0 - success;
 927 *         errno - failure;
 928 *      leaf page unpinned;
 929 */
 930static int dtSplitUp(tid_t tid,
 931          struct inode *ip, struct dtsplit * split, struct btstack * btstack)
 932{
 933        struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 934        int rc = 0;
 935        struct metapage *smp;
 936        dtpage_t *sp;           /* split page */
 937        struct metapage *rmp;
 938        dtpage_t *rp;           /* new right page split from sp */
 939        pxd_t rpxd;             /* new right page extent descriptor */
 940        struct metapage *lmp;
 941        dtpage_t *lp;           /* left child page */
 942        int skip;               /* index of entry of insertion */
 943        struct btframe *parent; /* parent page entry on traverse stack */
 944        s64 xaddr, nxaddr;
 945        int xlen, xsize;
 946        struct pxdlist pxdlist;
 947        pxd_t *pxd;
 948        struct component_name key = { 0, NULL };
 949        ddata_t *data = split->data;
 950        int n;
 951        struct dt_lock *dtlck;
 952        struct tlock *tlck;
 953        struct lv *lv;
 954        int quota_allocation = 0;
 955
 956        /* get split page */
 957        smp = split->mp;
 958        sp = DT_PAGE(ip, smp);
 959
 960        key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS);
 961        if (!key.name) {
 962                DT_PUTPAGE(smp);
 963                rc = -ENOMEM;
 964                goto dtSplitUp_Exit;
 965        }
 966
 967        /*
 968         *      split leaf page
 969         *
 970         * The split routines insert the new entry, and
 971         * acquire txLock as appropriate.
 972         */
 973        /*
 974         *      split root leaf page:
 975         */
 976        if (sp->header.flag & BT_ROOT) {
 977                /*
 978                 * allocate a single extent child page
 979                 */
 980                xlen = 1;
 981                n = sbi->bsize >> L2DTSLOTSIZE;
 982                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
 983                n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
 984                if (n <= split->nslot)
 985                        xlen++;
 986                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
 987                        DT_PUTPAGE(smp);
 988                        goto freeKeyName;
 989                }
 990
 991                pxdlist.maxnpxd = 1;
 992                pxdlist.npxd = 0;
 993                pxd = &pxdlist.pxd[0];
 994                PXDaddress(pxd, xaddr);
 995                PXDlength(pxd, xlen);
 996                split->pxdlist = &pxdlist;
 997                rc = dtSplitRoot(tid, ip, split, &rmp);
 998
 999                if (rc)
1000                        dbFree(ip, xaddr, xlen);
1001                else
1002                        DT_PUTPAGE(rmp);
1003
1004                DT_PUTPAGE(smp);
1005
1006                if (!DO_INDEX(ip))
1007                        ip->i_size = xlen << sbi->l2bsize;
1008
1009                goto freeKeyName;
1010        }
1011
1012        /*
1013         *      extend first leaf page
1014         *
1015         * extend the 1st extent if less than buffer page size
1016         * (dtExtendPage() reurns leaf page unpinned)
1017         */
1018        pxd = &sp->header.self;
1019        xlen = lengthPXD(pxd);
1020        xsize = xlen << sbi->l2bsize;
1021        if (xsize < PSIZE) {
1022                xaddr = addressPXD(pxd);
1023                n = xsize >> L2DTSLOTSIZE;
1024                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
1025                if ((n + sp->header.freecnt) <= split->nslot)
1026                        n = xlen + (xlen << 1);
1027                else
1028                        n = xlen;
1029
1030                /* Allocate blocks to quota. */
1031                rc = dquot_alloc_block(ip, n);
1032                if (rc)
1033                        goto extendOut;
1034                quota_allocation += n;
1035
1036                if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1037                                    (s64) n, &nxaddr)))
1038                        goto extendOut;
1039
1040                pxdlist.maxnpxd = 1;
1041                pxdlist.npxd = 0;
1042                pxd = &pxdlist.pxd[0];
1043                PXDaddress(pxd, nxaddr)
1044                    PXDlength(pxd, xlen + n);
1045                split->pxdlist = &pxdlist;
1046                if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1047                        nxaddr = addressPXD(pxd);
1048                        if (xaddr != nxaddr) {
1049                                /* free relocated extent */
1050                                xlen = lengthPXD(pxd);
1051                                dbFree(ip, nxaddr, (s64) xlen);
1052                        } else {
1053                                /* free extended delta */
1054                                xlen = lengthPXD(pxd) - n;
1055                                xaddr = addressPXD(pxd) + xlen;
1056                                dbFree(ip, xaddr, (s64) n);
1057                        }
1058                } else if (!DO_INDEX(ip))
1059                        ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
1060
1061
1062              extendOut:
1063                DT_PUTPAGE(smp);
1064                goto freeKeyName;
1065        }
1066
1067        /*
1068         *      split leaf page <sp> into <sp> and a new right page <rp>.
1069         *
1070         * return <rp> pinned and its extent descriptor <rpxd>
1071         */
1072        /*
1073         * allocate new directory page extent and
1074         * new index page(s) to cover page split(s)
1075         *
1076         * allocation hint: ?
1077         */
1078        n = btstack->nsplit;
1079        pxdlist.maxnpxd = pxdlist.npxd = 0;
1080        xlen = sbi->nbperpage;
1081        for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1082                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1083                        PXDaddress(pxd, xaddr);
1084                        PXDlength(pxd, xlen);
1085                        pxdlist.maxnpxd++;
1086                        continue;
1087                }
1088
1089                DT_PUTPAGE(smp);
1090
1091                /* undo allocation */
1092                goto splitOut;
1093        }
1094
1095        split->pxdlist = &pxdlist;
1096        if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1097                DT_PUTPAGE(smp);
1098
1099                /* undo allocation */
1100                goto splitOut;
1101        }
1102
1103        if (!DO_INDEX(ip))
1104                ip->i_size += PSIZE;
1105
1106        /*
1107         * propagate up the router entry for the leaf page just split
1108         *
1109         * insert a router entry for the new page into the parent page,
1110         * propagate the insert/split up the tree by walking back the stack
1111         * of (bn of parent page, index of child page entry in parent page)
1112         * that were traversed during the search for the page that split.
1113         *
1114         * the propagation of insert/split up the tree stops if the root
1115         * splits or the page inserted into doesn't have to split to hold
1116         * the new entry.
1117         *
1118         * the parent entry for the split page remains the same, and
1119         * a new entry is inserted at its right with the first key and
1120         * block number of the new right page.
1121         *
1122         * There are a maximum of 4 pages pinned at any time:
1123         * two children, left parent and right parent (when the parent splits).
1124         * keep the child pages pinned while working on the parent.
1125         * make sure that all pins are released at exit.
1126         */
1127        while ((parent = BT_POP(btstack)) != NULL) {
1128                /* parent page specified by stack frame <parent> */
1129
1130                /* keep current child pages (<lp>, <rp>) pinned */
1131                lmp = smp;
1132                lp = sp;
1133
1134                /*
1135                 * insert router entry in parent for new right child page <rp>
1136                 */
1137                /* get the parent page <sp> */
1138                DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1139                if (rc) {
1140                        DT_PUTPAGE(lmp);
1141                        DT_PUTPAGE(rmp);
1142                        goto splitOut;
1143                }
1144
1145                /*
1146                 * The new key entry goes ONE AFTER the index of parent entry,
1147                 * because the split was to the right.
1148                 */
1149                skip = parent->index + 1;
1150
1151                /*
1152                 * compute the key for the router entry
1153                 *
1154                 * key suffix compression:
1155                 * for internal pages that have leaf pages as children,
1156                 * retain only what's needed to distinguish between
1157                 * the new entry and the entry on the page to its left.
1158                 * If the keys compare equal, retain the entire key.
1159                 *
1160                 * note that compression is performed only at computing
1161                 * router key at the lowest internal level.
1162                 * further compression of the key between pairs of higher
1163                 * level internal pages loses too much information and
1164                 * the search may fail.
1165                 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1166                 * results in two adjacent parent entries (a)(xx).
1167                 * if split occurs between these two entries, and
1168                 * if compression is applied, the router key of parent entry
1169                 * of right page (x) will divert search for x into right
1170                 * subtree and miss x in the left subtree.)
1171                 *
1172                 * the entire key must be retained for the next-to-leftmost
1173                 * internal key at any level of the tree, or search may fail
1174                 * (e.g., ?)
1175                 */
1176                switch (rp->header.flag & BT_TYPE) {
1177                case BT_LEAF:
1178                        /*
1179                         * compute the length of prefix for suffix compression
1180                         * between last entry of left page and first entry
1181                         * of right page
1182                         */
1183                        if ((sp->header.flag & BT_ROOT && skip > 1) ||
1184                            sp->header.prev != 0 || skip > 1) {
1185                                /* compute uppercase router prefix key */
1186                                rc = ciGetLeafPrefixKey(lp,
1187                                                        lp->header.nextindex-1,
1188                                                        rp, 0, &key,
1189                                                        sbi->mntflag);
1190                                if (rc) {
1191                                        DT_PUTPAGE(lmp);
1192                                        DT_PUTPAGE(rmp);
1193                                        DT_PUTPAGE(smp);
1194                                        goto splitOut;
1195                                }
1196                        } else {
1197                                /* next to leftmost entry of
1198                                   lowest internal level */
1199
1200                                /* compute uppercase router key */
1201                                dtGetKey(rp, 0, &key, sbi->mntflag);
1202                                key.name[key.namlen] = 0;
1203
1204                                if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1205                                        ciToUpper(&key);
1206                        }
1207
1208                        n = NDTINTERNAL(key.namlen);
1209                        break;
1210
1211                case BT_INTERNAL:
1212                        dtGetKey(rp, 0, &key, sbi->mntflag);
1213                        n = NDTINTERNAL(key.namlen);
1214                        break;
1215
1216                default:
1217                        jfs_err("dtSplitUp(): UFO!");
1218                        break;
1219                }
1220
1221                /* unpin left child page */
1222                DT_PUTPAGE(lmp);
1223
1224                /*
1225                 * compute the data for the router entry
1226                 */
1227                data->xd = rpxd;        /* child page xd */
1228
1229                /*
1230                 * parent page is full - split the parent page
1231                 */
1232                if (n > sp->header.freecnt) {
1233                        /* init for parent page split */
1234                        split->mp = smp;
1235                        split->index = skip;    /* index at insert */
1236                        split->nslot = n;
1237                        split->key = &key;
1238                        /* split->data = data; */
1239
1240                        /* unpin right child page */
1241                        DT_PUTPAGE(rmp);
1242
1243                        /* The split routines insert the new entry,
1244                         * acquire txLock as appropriate.
1245                         * return <rp> pinned and its block number <rbn>.
1246                         */
1247                        rc = (sp->header.flag & BT_ROOT) ?
1248                            dtSplitRoot(tid, ip, split, &rmp) :
1249                            dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1250                        if (rc) {
1251                                DT_PUTPAGE(smp);
1252                                goto splitOut;
1253                        }
1254
1255                        /* smp and rmp are pinned */
1256                }
1257                /*
1258                 * parent page is not full - insert router entry in parent page
1259                 */
1260                else {
1261                        BT_MARK_DIRTY(smp, ip);
1262                        /*
1263                         * acquire a transaction lock on the parent page
1264                         */
1265                        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1266                        dtlck = (struct dt_lock *) & tlck->lock;
1267                        ASSERT(dtlck->index == 0);
1268                        lv = & dtlck->lv[0];
1269
1270                        /* linelock header */
1271                        lv->offset = 0;
1272                        lv->length = 1;
1273                        dtlck->index++;
1274
1275                        /* linelock stbl of non-root parent page */
1276                        if (!(sp->header.flag & BT_ROOT)) {
1277                                lv++;
1278                                n = skip >> L2DTSLOTSIZE;
1279                                lv->offset = sp->header.stblindex + n;
1280                                lv->length =
1281                                    ((sp->header.nextindex -
1282                                      1) >> L2DTSLOTSIZE) - n + 1;
1283                                dtlck->index++;
1284                        }
1285
1286                        dtInsertEntry(sp, skip, &key, data, &dtlck);
1287
1288                        /* exit propagate up */
1289                        break;
1290                }
1291        }
1292
1293        /* unpin current split and its right page */
1294        DT_PUTPAGE(smp);
1295        DT_PUTPAGE(rmp);
1296
1297        /*
1298         * free remaining extents allocated for split
1299         */
1300      splitOut:
1301        n = pxdlist.npxd;
1302        pxd = &pxdlist.pxd[n];
1303        for (; n < pxdlist.maxnpxd; n++, pxd++)
1304                dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1305
1306      freeKeyName:
1307        kfree(key.name);
1308
1309        /* Rollback quota allocation */
1310        if (rc && quota_allocation)
1311                dquot_free_block(ip, quota_allocation);
1312
1313      dtSplitUp_Exit:
1314
1315        return rc;
1316}
1317
1318
1319/*
1320 *      dtSplitPage()
1321 *
1322 * function: Split a non-root page of a btree.
1323 *
1324 * parameter:
1325 *
1326 * return: 0 - success;
1327 *         errno - failure;
1328 *      return split and new page pinned;
1329 */
1330static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1331            struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1332{
1333        int rc = 0;
1334        struct metapage *smp;
1335        dtpage_t *sp;
1336        struct metapage *rmp;
1337        dtpage_t *rp;           /* new right page allocated */
1338        s64 rbn;                /* new right page block number */
1339        struct metapage *mp;
1340        dtpage_t *p;
1341        s64 nextbn;
1342        struct pxdlist *pxdlist;
1343        pxd_t *pxd;
1344        int skip, nextindex, half, left, nxt, off, si;
1345        struct ldtentry *ldtentry;
1346        struct idtentry *idtentry;
1347        u8 *stbl;
1348        struct dtslot *f;
1349        int fsi, stblsize;
1350        int n;
1351        struct dt_lock *sdtlck, *rdtlck;
1352        struct tlock *tlck;
1353        struct dt_lock *dtlck;
1354        struct lv *slv, *rlv, *lv;
1355
1356        /* get split page */
1357        smp = split->mp;
1358        sp = DT_PAGE(ip, smp);
1359
1360        /*
1361         * allocate the new right page for the split
1362         */
1363        pxdlist = split->pxdlist;
1364        pxd = &pxdlist->pxd[pxdlist->npxd];
1365        pxdlist->npxd++;
1366        rbn = addressPXD(pxd);
1367        rmp = get_metapage(ip, rbn, PSIZE, 1);
1368        if (rmp == NULL)
1369                return -EIO;
1370
1371        /* Allocate blocks to quota. */
1372        rc = dquot_alloc_block(ip, lengthPXD(pxd));
1373        if (rc) {
1374                release_metapage(rmp);
1375                return rc;
1376        }
1377
1378        jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1379
1380        BT_MARK_DIRTY(rmp, ip);
1381        /*
1382         * acquire a transaction lock on the new right page
1383         */
1384        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1385        rdtlck = (struct dt_lock *) & tlck->lock;
1386
1387        rp = (dtpage_t *) rmp->data;
1388        *rpp = rp;
1389        rp->header.self = *pxd;
1390
1391        BT_MARK_DIRTY(smp, ip);
1392        /*
1393         * acquire a transaction lock on the split page
1394         *
1395         * action:
1396         */
1397        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1398        sdtlck = (struct dt_lock *) & tlck->lock;
1399
1400        /* linelock header of split page */
1401        ASSERT(sdtlck->index == 0);
1402        slv = & sdtlck->lv[0];
1403        slv->offset = 0;
1404        slv->length = 1;
1405        sdtlck->index++;
1406
1407        /*
1408         * initialize/update sibling pointers between sp and rp
1409         */
1410        nextbn = le64_to_cpu(sp->header.next);
1411        rp->header.next = cpu_to_le64(nextbn);
1412        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1413        sp->header.next = cpu_to_le64(rbn);
1414
1415        /*
1416         * initialize new right page
1417         */
1418        rp->header.flag = sp->header.flag;
1419
1420        /* compute sorted entry table at start of extent data area */
1421        rp->header.nextindex = 0;
1422        rp->header.stblindex = 1;
1423
1424        n = PSIZE >> L2DTSLOTSIZE;
1425        rp->header.maxslot = n;
1426        stblsize = (n + 31) >> L2DTSLOTSIZE;    /* in unit of slot */
1427
1428        /* init freelist */
1429        fsi = rp->header.stblindex + stblsize;
1430        rp->header.freelist = fsi;
1431        rp->header.freecnt = rp->header.maxslot - fsi;
1432
1433        /*
1434         *      sequential append at tail: append without split
1435         *
1436         * If splitting the last page on a level because of appending
1437         * a entry to it (skip is maxentry), it's likely that the access is
1438         * sequential. Adding an empty page on the side of the level is less
1439         * work and can push the fill factor much higher than normal.
1440         * If we're wrong it's no big deal, we'll just do the split the right
1441         * way next time.
1442         * (It may look like it's equally easy to do a similar hack for
1443         * reverse sorted data, that is, split the tree left,
1444         * but it's not. Be my guest.)
1445         */
1446        if (nextbn == 0 && split->index == sp->header.nextindex) {
1447                /* linelock header + stbl (first slot) of new page */
1448                rlv = & rdtlck->lv[rdtlck->index];
1449                rlv->offset = 0;
1450                rlv->length = 2;
1451                rdtlck->index++;
1452
1453                /*
1454                 * initialize freelist of new right page
1455                 */
1456                f = &rp->slot[fsi];
1457                for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1458                        f->next = fsi;
1459                f->next = -1;
1460
1461                /* insert entry at the first entry of the new right page */
1462                dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1463
1464                goto out;
1465        }
1466
1467        /*
1468         *      non-sequential insert (at possibly middle page)
1469         */
1470
1471        /*
1472         * update prev pointer of previous right sibling page;
1473         */
1474        if (nextbn != 0) {
1475                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1476                if (rc) {
1477                        discard_metapage(rmp);
1478                        return rc;
1479                }
1480
1481                BT_MARK_DIRTY(mp, ip);
1482                /*
1483                 * acquire a transaction lock on the next page
1484                 */
1485                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1486                jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1487                        tlck, ip, mp);
1488                dtlck = (struct dt_lock *) & tlck->lock;
1489
1490                /* linelock header of previous right sibling page */
1491                lv = & dtlck->lv[dtlck->index];
1492                lv->offset = 0;
1493                lv->length = 1;
1494                dtlck->index++;
1495
1496                p->header.prev = cpu_to_le64(rbn);
1497
1498                DT_PUTPAGE(mp);
1499        }
1500
1501        /*
1502         * split the data between the split and right pages.
1503         */
1504        skip = split->index;
1505        half = (PSIZE >> L2DTSLOTSIZE) >> 1;    /* swag */
1506        left = 0;
1507
1508        /*
1509         *      compute fill factor for split pages
1510         *
1511         * <nxt> traces the next entry to move to rp
1512         * <off> traces the next entry to stay in sp
1513         */
1514        stbl = (u8 *) & sp->slot[sp->header.stblindex];
1515        nextindex = sp->header.nextindex;
1516        for (nxt = off = 0; nxt < nextindex; ++off) {
1517                if (off == skip)
1518                        /* check for fill factor with new entry size */
1519                        n = split->nslot;
1520                else {
1521                        si = stbl[nxt];
1522                        switch (sp->header.flag & BT_TYPE) {
1523                        case BT_LEAF:
1524                                ldtentry = (struct ldtentry *) & sp->slot[si];
1525                                if (DO_INDEX(ip))
1526                                        n = NDTLEAF(ldtentry->namlen);
1527                                else
1528                                        n = NDTLEAF_LEGACY(ldtentry->
1529                                                           namlen);
1530                                break;
1531
1532                        case BT_INTERNAL:
1533                                idtentry = (struct idtentry *) & sp->slot[si];
1534                                n = NDTINTERNAL(idtentry->namlen);
1535                                break;
1536
1537                        default:
1538                                break;
1539                        }
1540
1541                        ++nxt;  /* advance to next entry to move in sp */
1542                }
1543
1544                left += n;
1545                if (left >= half)
1546                        break;
1547        }
1548
1549        /* <nxt> poins to the 1st entry to move */
1550
1551        /*
1552         *      move entries to right page
1553         *
1554         * dtMoveEntry() initializes rp and reserves entry for insertion
1555         *
1556         * split page moved out entries are linelocked;
1557         * new/right page moved in entries are linelocked;
1558         */
1559        /* linelock header + stbl of new right page */
1560        rlv = & rdtlck->lv[rdtlck->index];
1561        rlv->offset = 0;
1562        rlv->length = 5;
1563        rdtlck->index++;
1564
1565        dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1566
1567        sp->header.nextindex = nxt;
1568
1569        /*
1570         * finalize freelist of new right page
1571         */
1572        fsi = rp->header.freelist;
1573        f = &rp->slot[fsi];
1574        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1575                f->next = fsi;
1576        f->next = -1;
1577
1578        /*
1579         * Update directory index table for entries now in right page
1580         */
1581        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1582                s64 lblock;
1583
1584                mp = NULL;
1585                stbl = DT_GETSTBL(rp);
1586                for (n = 0; n < rp->header.nextindex; n++) {
1587                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1588                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1589                                     rbn, n, &mp, &lblock);
1590                }
1591                if (mp)
1592                        release_metapage(mp);
1593        }
1594
1595        /*
1596         * the skipped index was on the left page,
1597         */
1598        if (skip <= off) {
1599                /* insert the new entry in the split page */
1600                dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1601
1602                /* linelock stbl of split page */
1603                if (sdtlck->index >= sdtlck->maxcnt)
1604                        sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1605                slv = & sdtlck->lv[sdtlck->index];
1606                n = skip >> L2DTSLOTSIZE;
1607                slv->offset = sp->header.stblindex + n;
1608                slv->length =
1609                    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1610                sdtlck->index++;
1611        }
1612        /*
1613         * the skipped index was on the right page,
1614         */
1615        else {
1616                /* adjust the skip index to reflect the new position */
1617                skip -= nxt;
1618
1619                /* insert the new entry in the right page */
1620                dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1621        }
1622
1623      out:
1624        *rmpp = rmp;
1625        *rpxdp = *pxd;
1626
1627        return rc;
1628}
1629
1630
1631/*
1632 *      dtExtendPage()
1633 *
1634 * function: extend 1st/only directory leaf page
1635 *
1636 * parameter:
1637 *
1638 * return: 0 - success;
1639 *         errno - failure;
1640 *      return extended page pinned;
1641 */
1642static int dtExtendPage(tid_t tid,
1643             struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1644{
1645        struct super_block *sb = ip->i_sb;
1646        int rc;
1647        struct metapage *smp, *pmp, *mp;
1648        dtpage_t *sp, *pp;
1649        struct pxdlist *pxdlist;
1650        pxd_t *pxd, *tpxd;
1651        int xlen, xsize;
1652        int newstblindex, newstblsize;
1653        int oldstblindex, oldstblsize;
1654        int fsi, last;
1655        struct dtslot *f;
1656        struct btframe *parent;
1657        int n;
1658        struct dt_lock *dtlck;
1659        s64 xaddr, txaddr;
1660        struct tlock *tlck;
1661        struct pxd_lock *pxdlock;
1662        struct lv *lv;
1663        uint type;
1664        struct ldtentry *ldtentry;
1665        u8 *stbl;
1666
1667        /* get page to extend */
1668        smp = split->mp;
1669        sp = DT_PAGE(ip, smp);
1670
1671        /* get parent/root page */
1672        parent = BT_POP(btstack);
1673        DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1674        if (rc)
1675                return (rc);
1676
1677        /*
1678         *      extend the extent
1679         */
1680        pxdlist = split->pxdlist;
1681        pxd = &pxdlist->pxd[pxdlist->npxd];
1682        pxdlist->npxd++;
1683
1684        xaddr = addressPXD(pxd);
1685        tpxd = &sp->header.self;
1686        txaddr = addressPXD(tpxd);
1687        /* in-place extension */
1688        if (xaddr == txaddr) {
1689                type = tlckEXTEND;
1690        }
1691        /* relocation */
1692        else {
1693                type = tlckNEW;
1694
1695                /* save moved extent descriptor for later free */
1696                tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1697                pxdlock = (struct pxd_lock *) & tlck->lock;
1698                pxdlock->flag = mlckFREEPXD;
1699                pxdlock->pxd = sp->header.self;
1700                pxdlock->index = 1;
1701
1702                /*
1703                 * Update directory index table to reflect new page address
1704                 */
1705                if (DO_INDEX(ip)) {
1706                        s64 lblock;
1707
1708                        mp = NULL;
1709                        stbl = DT_GETSTBL(sp);
1710                        for (n = 0; n < sp->header.nextindex; n++) {
1711                                ldtentry =
1712                                    (struct ldtentry *) & sp->slot[stbl[n]];
1713                                modify_index(tid, ip,
1714                                             le32_to_cpu(ldtentry->index),
1715                                             xaddr, n, &mp, &lblock);
1716                        }
1717                        if (mp)
1718                                release_metapage(mp);
1719                }
1720        }
1721
1722        /*
1723         *      extend the page
1724         */
1725        sp->header.self = *pxd;
1726
1727        jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1728
1729        BT_MARK_DIRTY(smp, ip);
1730        /*
1731         * acquire a transaction lock on the extended/leaf page
1732         */
1733        tlck = txLock(tid, ip, smp, tlckDTREE | type);
1734        dtlck = (struct dt_lock *) & tlck->lock;
1735        lv = & dtlck->lv[0];
1736
1737        /* update buffer extent descriptor of extended page */
1738        xlen = lengthPXD(pxd);
1739        xsize = xlen << JFS_SBI(sb)->l2bsize;
1740
1741        /*
1742         * copy old stbl to new stbl at start of extended area
1743         */
1744        oldstblindex = sp->header.stblindex;
1745        oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1746        newstblindex = sp->header.maxslot;
1747        n = xsize >> L2DTSLOTSIZE;
1748        newstblsize = (n + 31) >> L2DTSLOTSIZE;
1749        memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1750               sp->header.nextindex);
1751
1752        /*
1753         * in-line extension: linelock old area of extended page
1754         */
1755        if (type == tlckEXTEND) {
1756                /* linelock header */
1757                lv->offset = 0;
1758                lv->length = 1;
1759                dtlck->index++;
1760                lv++;
1761
1762                /* linelock new stbl of extended page */
1763                lv->offset = newstblindex;
1764                lv->length = newstblsize;
1765        }
1766        /*
1767         * relocation: linelock whole relocated area
1768         */
1769        else {
1770                lv->offset = 0;
1771                lv->length = sp->header.maxslot + newstblsize;
1772        }
1773
1774        dtlck->index++;
1775
1776        sp->header.maxslot = n;
1777        sp->header.stblindex = newstblindex;
1778        /* sp->header.nextindex remains the same */
1779
1780        /*
1781         * add old stbl region at head of freelist
1782         */
1783        fsi = oldstblindex;
1784        f = &sp->slot[fsi];
1785        last = sp->header.freelist;
1786        for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1787                f->next = last;
1788                last = fsi;
1789        }
1790        sp->header.freelist = last;
1791        sp->header.freecnt += oldstblsize;
1792
1793        /*
1794         * append free region of newly extended area at tail of freelist
1795         */
1796        /* init free region of newly extended area */
1797        fsi = n = newstblindex + newstblsize;
1798        f = &sp->slot[fsi];
1799        for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1800                f->next = fsi;
1801        f->next = -1;
1802
1803        /* append new free region at tail of old freelist */
1804        fsi = sp->header.freelist;
1805        if (fsi == -1)
1806                sp->header.freelist = n;
1807        else {
1808                do {
1809                        f = &sp->slot[fsi];
1810                        fsi = f->next;
1811                } while (fsi != -1);
1812
1813                f->next = n;
1814        }
1815
1816        sp->header.freecnt += sp->header.maxslot - n;
1817
1818        /*
1819         * insert the new entry
1820         */
1821        dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1822
1823        BT_MARK_DIRTY(pmp, ip);
1824        /*
1825         * linelock any freeslots residing in old extent
1826         */
1827        if (type == tlckEXTEND) {
1828                n = sp->header.maxslot >> 2;
1829                if (sp->header.freelist < n)
1830                        dtLinelockFreelist(sp, n, &dtlck);
1831        }
1832
1833        /*
1834         *      update parent entry on the parent/root page
1835         */
1836        /*
1837         * acquire a transaction lock on the parent/root page
1838         */
1839        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1840        dtlck = (struct dt_lock *) & tlck->lock;
1841        lv = & dtlck->lv[dtlck->index];
1842
1843        /* linelock parent entry - 1st slot */
1844        lv->offset = 1;
1845        lv->length = 1;
1846        dtlck->index++;
1847
1848        /* update the parent pxd for page extension */
1849        tpxd = (pxd_t *) & pp->slot[1];
1850        *tpxd = *pxd;
1851
1852        DT_PUTPAGE(pmp);
1853        return 0;
1854}
1855
1856
1857/*
1858 *      dtSplitRoot()
1859 *
1860 * function:
1861 *      split the full root page into
1862 *      original/root/split page and new right page
1863 *      i.e., root remains fixed in tree anchor (inode) and
1864 *      the root is copied to a single new right child page
1865 *      since root page << non-root page, and
1866 *      the split root page contains a single entry for the
1867 *      new right child page.
1868 *
1869 * parameter:
1870 *
1871 * return: 0 - success;
1872 *         errno - failure;
1873 *      return new page pinned;
1874 */
1875static int dtSplitRoot(tid_t tid,
1876            struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1877{
1878        struct super_block *sb = ip->i_sb;
1879        struct metapage *smp;
1880        dtroot_t *sp;
1881        struct metapage *rmp;
1882        dtpage_t *rp;
1883        s64 rbn;
1884        int xlen;
1885        int xsize;
1886        struct dtslot *f;
1887        s8 *stbl;
1888        int fsi, stblsize, n;
1889        struct idtentry *s;
1890        pxd_t *ppxd;
1891        struct pxdlist *pxdlist;
1892        pxd_t *pxd;
1893        struct dt_lock *dtlck;
1894        struct tlock *tlck;
1895        struct lv *lv;
1896        int rc;
1897
1898        /* get split root page */
1899        smp = split->mp;
1900        sp = &JFS_IP(ip)->i_dtroot;
1901
1902        /*
1903         *      allocate/initialize a single (right) child page
1904         *
1905         * N.B. at first split, a one (or two) block to fit new entry
1906         * is allocated; at subsequent split, a full page is allocated;
1907         */
1908        pxdlist = split->pxdlist;
1909        pxd = &pxdlist->pxd[pxdlist->npxd];
1910        pxdlist->npxd++;
1911        rbn = addressPXD(pxd);
1912        xlen = lengthPXD(pxd);
1913        xsize = xlen << JFS_SBI(sb)->l2bsize;
1914        rmp = get_metapage(ip, rbn, xsize, 1);
1915        if (!rmp)
1916                return -EIO;
1917
1918        rp = rmp->data;
1919
1920        /* Allocate blocks to quota. */
1921        rc = dquot_alloc_block(ip, lengthPXD(pxd));
1922        if (rc) {
1923                release_metapage(rmp);
1924                return rc;
1925        }
1926
1927        BT_MARK_DIRTY(rmp, ip);
1928        /*
1929         * acquire a transaction lock on the new right page
1930         */
1931        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1932        dtlck = (struct dt_lock *) & tlck->lock;
1933
1934        rp->header.flag =
1935            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1936        rp->header.self = *pxd;
1937
1938        /* initialize sibling pointers */
1939        rp->header.next = 0;
1940        rp->header.prev = 0;
1941
1942        /*
1943         *      move in-line root page into new right page extent
1944         */
1945        /* linelock header + copied entries + new stbl (1st slot) in new page */
1946        ASSERT(dtlck->index == 0);
1947        lv = & dtlck->lv[0];
1948        lv->offset = 0;
1949        lv->length = 10;        /* 1 + 8 + 1 */
1950        dtlck->index++;
1951
1952        n = xsize >> L2DTSLOTSIZE;
1953        rp->header.maxslot = n;
1954        stblsize = (n + 31) >> L2DTSLOTSIZE;
1955
1956        /* copy old stbl to new stbl at start of extended area */
1957        rp->header.stblindex = DTROOTMAXSLOT;
1958        stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1959        memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1960        rp->header.nextindex = sp->header.nextindex;
1961
1962        /* copy old data area to start of new data area */
1963        memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1964
1965        /*
1966         * append free region of newly extended area at tail of freelist
1967         */
1968        /* init free region of newly extended area */
1969        fsi = n = DTROOTMAXSLOT + stblsize;
1970        f = &rp->slot[fsi];
1971        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1972                f->next = fsi;
1973        f->next = -1;
1974
1975        /* append new free region at tail of old freelist */
1976        fsi = sp->header.freelist;
1977        if (fsi == -1)
1978                rp->header.freelist = n;
1979        else {
1980                rp->header.freelist = fsi;
1981
1982                do {
1983                        f = &rp->slot[fsi];
1984                        fsi = f->next;
1985                } while (fsi != -1);
1986
1987                f->next = n;
1988        }
1989
1990        rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1991
1992        /*
1993         * Update directory index table for entries now in right page
1994         */
1995        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1996                s64 lblock;
1997                struct metapage *mp = NULL;
1998                struct ldtentry *ldtentry;
1999
2000                stbl = DT_GETSTBL(rp);
2001                for (n = 0; n < rp->header.nextindex; n++) {
2002                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
2003                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
2004                                     rbn, n, &mp, &lblock);
2005                }
2006                if (mp)
2007                        release_metapage(mp);
2008        }
2009        /*
2010         * insert the new entry into the new right/child page
2011         * (skip index in the new right page will not change)
2012         */
2013        dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2014
2015        /*
2016         *      reset parent/root page
2017         *
2018         * set the 1st entry offset to 0, which force the left-most key
2019         * at any level of the tree to be less than any search key.
2020         *
2021         * The btree comparison code guarantees that the left-most key on any
2022         * level of the tree is never used, so it doesn't need to be filled in.
2023         */
2024        BT_MARK_DIRTY(smp, ip);
2025        /*
2026         * acquire a transaction lock on the root page (in-memory inode)
2027         */
2028        tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
2029        dtlck = (struct dt_lock *) & tlck->lock;
2030
2031        /* linelock root */
2032        ASSERT(dtlck->index == 0);
2033        lv = & dtlck->lv[0];
2034        lv->offset = 0;
2035        lv->length = DTROOTMAXSLOT;
2036        dtlck->index++;
2037
2038        /* update page header of root */
2039        if (sp->header.flag & BT_LEAF) {
2040                sp->header.flag &= ~BT_LEAF;
2041                sp->header.flag |= BT_INTERNAL;
2042        }
2043
2044        /* init the first entry */
2045        s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2046        ppxd = (pxd_t *) s;
2047        *ppxd = *pxd;
2048        s->next = -1;
2049        s->namlen = 0;
2050
2051        stbl = sp->header.stbl;
2052        stbl[0] = DTENTRYSTART;
2053        sp->header.nextindex = 1;
2054
2055        /* init freelist */
2056        fsi = DTENTRYSTART + 1;
2057        f = &sp->slot[fsi];
2058
2059        /* init free region of remaining area */
2060        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2061                f->next = fsi;
2062        f->next = -1;
2063
2064        sp->header.freelist = DTENTRYSTART + 1;
2065        sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2066
2067        *rmpp = rmp;
2068
2069        return 0;
2070}
2071
2072
2073/*
2074 *      dtDelete()
2075 *
2076 * function: delete the entry(s) referenced by a key.
2077 *
2078 * parameter:
2079 *
2080 * return:
2081 */
2082int dtDelete(tid_t tid,
2083         struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2084{
2085        int rc = 0;
2086        s64 bn;
2087        struct metapage *mp, *imp;
2088        dtpage_t *p;
2089        int index;
2090        struct btstack btstack;
2091        struct dt_lock *dtlck;
2092        struct tlock *tlck;
2093        struct lv *lv;
2094        int i;
2095        struct ldtentry *ldtentry;
2096        u8 *stbl;
2097        u32 table_index, next_index;
2098        struct metapage *nmp;
2099        dtpage_t *np;
2100
2101        /*
2102         *      search for the entry to delete:
2103         *
2104         * dtSearch() returns (leaf page pinned, index at which to delete).
2105         */
2106        if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2107                return rc;
2108
2109        /* retrieve search result */
2110        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2111
2112        /*
2113         * We need to find put the index of the next entry into the
2114         * directory index table in order to resume a readdir from this
2115         * entry.
2116         */
2117        if (DO_INDEX(ip)) {
2118                stbl = DT_GETSTBL(p);
2119                ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2120                table_index = le32_to_cpu(ldtentry->index);
2121                if (index == (p->header.nextindex - 1)) {
2122                        /*
2123                         * Last entry in this leaf page
2124                         */
2125                        if ((p->header.flag & BT_ROOT)
2126                            || (p->header.next == 0))
2127                                next_index = -1;
2128                        else {
2129                                /* Read next leaf page */
2130                                DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2131                                           nmp, PSIZE, np, rc);
2132                                if (rc)
2133                                        next_index = -1;
2134                                else {
2135                                        stbl = DT_GETSTBL(np);
2136                                        ldtentry =
2137                                            (struct ldtentry *) & np->
2138                                            slot[stbl[0]];
2139                                        next_index =
2140                                            le32_to_cpu(ldtentry->index);
2141                                        DT_PUTPAGE(nmp);
2142                                }
2143                        }
2144                } else {
2145                        ldtentry =
2146                            (struct ldtentry *) & p->slot[stbl[index + 1]];
2147                        next_index = le32_to_cpu(ldtentry->index);
2148                }
2149                free_index(tid, ip, table_index, next_index);
2150        }
2151        /*
2152         * the leaf page becomes empty, delete the page
2153         */
2154        if (p->header.nextindex == 1) {
2155                /* delete empty page */
2156                rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2157        }
2158        /*
2159         * the leaf page has other entries remaining:
2160         *
2161         * delete the entry from the leaf page.
2162         */
2163        else {
2164                BT_MARK_DIRTY(mp, ip);
2165                /*
2166                 * acquire a transaction lock on the leaf page
2167                 */
2168                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2169                dtlck = (struct dt_lock *) & tlck->lock;
2170
2171                /*
2172                 * Do not assume that dtlck->index will be zero.  During a
2173                 * rename within a directory, this transaction may have
2174                 * modified this page already when adding the new entry.
2175                 */
2176
2177                /* linelock header */
2178                if (dtlck->index >= dtlck->maxcnt)
2179                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2180                lv = & dtlck->lv[dtlck->index];
2181                lv->offset = 0;
2182                lv->length = 1;
2183                dtlck->index++;
2184
2185                /* linelock stbl of non-root leaf page */
2186                if (!(p->header.flag & BT_ROOT)) {
2187                        if (dtlck->index >= dtlck->maxcnt)
2188                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2189                        lv = & dtlck->lv[dtlck->index];
2190                        i = index >> L2DTSLOTSIZE;
2191                        lv->offset = p->header.stblindex + i;
2192                        lv->length =
2193                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2194                            i + 1;
2195                        dtlck->index++;
2196                }
2197
2198                /* free the leaf entry */
2199                dtDeleteEntry(p, index, &dtlck);
2200
2201                /*
2202                 * Update directory index table for entries moved in stbl
2203                 */
2204                if (DO_INDEX(ip) && index < p->header.nextindex) {
2205                        s64 lblock;
2206
2207                        imp = NULL;
2208                        stbl = DT_GETSTBL(p);
2209                        for (i = index; i < p->header.nextindex; i++) {
2210                                ldtentry =
2211                                    (struct ldtentry *) & p->slot[stbl[i]];
2212                                modify_index(tid, ip,
2213                                             le32_to_cpu(ldtentry->index),
2214                                             bn, i, &imp, &lblock);
2215                        }
2216                        if (imp)
2217                                release_metapage(imp);
2218                }
2219
2220                DT_PUTPAGE(mp);
2221        }
2222
2223        return rc;
2224}
2225
2226
2227/*
2228 *      dtDeleteUp()
2229 *
2230 * function:
2231 *      free empty pages as propagating deletion up the tree
2232 *
2233 * parameter:
2234 *
2235 * return:
2236 */
2237static int dtDeleteUp(tid_t tid, struct inode *ip,
2238           struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2239{
2240        int rc = 0;
2241        struct metapage *mp;
2242        dtpage_t *p;
2243        int index, nextindex;
2244        int xlen;
2245        struct btframe *parent;
2246        struct dt_lock *dtlck;
2247        struct tlock *tlck;
2248        struct lv *lv;
2249        struct pxd_lock *pxdlock;
2250        int i;
2251
2252        /*
2253         *      keep the root leaf page which has become empty
2254         */
2255        if (BT_IS_ROOT(fmp)) {
2256                /*
2257                 * reset the root
2258                 *
2259                 * dtInitRoot() acquires txlock on the root
2260                 */
2261                dtInitRoot(tid, ip, PARENT(ip));
2262
2263                DT_PUTPAGE(fmp);
2264
2265                return 0;
2266        }
2267
2268        /*
2269         *      free the non-root leaf page
2270         */
2271        /*
2272         * acquire a transaction lock on the page
2273         *
2274         * write FREEXTENT|NOREDOPAGE log record
2275         * N.B. linelock is overlaid as freed extent descriptor, and
2276         * the buffer page is freed;
2277         */
2278        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2279        pxdlock = (struct pxd_lock *) & tlck->lock;
2280        pxdlock->flag = mlckFREEPXD;
2281        pxdlock->pxd = fp->header.self;
2282        pxdlock->index = 1;
2283
2284        /* update sibling pointers */
2285        if ((rc = dtRelink(tid, ip, fp))) {
2286                BT_PUTPAGE(fmp);
2287                return rc;
2288        }
2289
2290        xlen = lengthPXD(&fp->header.self);
2291
2292        /* Free quota allocation. */
2293        dquot_free_block(ip, xlen);
2294
2295        /* free/invalidate its buffer page */
2296        discard_metapage(fmp);
2297
2298        /*
2299         *      propagate page deletion up the directory tree
2300         *
2301         * If the delete from the parent page makes it empty,
2302         * continue all the way up the tree.
2303         * stop if the root page is reached (which is never deleted) or
2304         * if the entry deletion does not empty the page.
2305         */
2306        while ((parent = BT_POP(btstack)) != NULL) {
2307                /* pin the parent page <sp> */
2308                DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2309                if (rc)
2310                        return rc;
2311
2312                /*
2313                 * free the extent of the child page deleted
2314                 */
2315                index = parent->index;
2316
2317                /*
2318                 * delete the entry for the child page from parent
2319                 */
2320                nextindex = p->header.nextindex;
2321
2322                /*
2323                 * the parent has the single entry being deleted:
2324                 *
2325                 * free the parent page which has become empty.
2326                 */
2327                if (nextindex == 1) {
2328                        /*
2329                         * keep the root internal page which has become empty
2330                         */
2331                        if (p->header.flag & BT_ROOT) {
2332                                /*
2333                                 * reset the root
2334                                 *
2335                                 * dtInitRoot() acquires txlock on the root
2336                                 */
2337                                dtInitRoot(tid, ip, PARENT(ip));
2338
2339                                DT_PUTPAGE(mp);
2340
2341                                return 0;
2342                        }
2343                        /*
2344                         * free the parent page
2345                         */
2346                        else {
2347                                /*
2348                                 * acquire a transaction lock on the page
2349                                 *
2350                                 * write FREEXTENT|NOREDOPAGE log record
2351                                 */
2352                                tlck =
2353                                    txMaplock(tid, ip,
2354                                              tlckDTREE | tlckFREE);
2355                                pxdlock = (struct pxd_lock *) & tlck->lock;
2356                                pxdlock->flag = mlckFREEPXD;
2357                                pxdlock->pxd = p->header.self;
2358                                pxdlock->index = 1;
2359
2360                                /* update sibling pointers */
2361                                if ((rc = dtRelink(tid, ip, p))) {
2362                                        DT_PUTPAGE(mp);
2363                                        return rc;
2364                                }
2365
2366                                xlen = lengthPXD(&p->header.self);
2367
2368                                /* Free quota allocation */
2369                                dquot_free_block(ip, xlen);
2370
2371                                /* free/invalidate its buffer page */
2372                                discard_metapage(mp);
2373
2374                                /* propagate up */
2375                                continue;
2376                        }
2377                }
2378
2379                /*
2380                 * the parent has other entries remaining:
2381                 *
2382                 * delete the router entry from the parent page.
2383                 */
2384                BT_MARK_DIRTY(mp, ip);
2385                /*
2386                 * acquire a transaction lock on the page
2387                 *
2388                 * action: router entry deletion
2389                 */
2390                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2391                dtlck = (struct dt_lock *) & tlck->lock;
2392
2393                /* linelock header */
2394                if (dtlck->index >= dtlck->maxcnt)
2395                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2396                lv = & dtlck->lv[dtlck->index];
2397                lv->offset = 0;
2398                lv->length = 1;
2399                dtlck->index++;
2400
2401                /* linelock stbl of non-root leaf page */
2402                if (!(p->header.flag & BT_ROOT)) {
2403                        if (dtlck->index < dtlck->maxcnt)
2404                                lv++;
2405                        else {
2406                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2407                                lv = & dtlck->lv[0];
2408                        }
2409                        i = index >> L2DTSLOTSIZE;
2410                        lv->offset = p->header.stblindex + i;
2411                        lv->length =
2412                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2413                            i + 1;
2414                        dtlck->index++;
2415                }
2416
2417                /* free the router entry */
2418                dtDeleteEntry(p, index, &dtlck);
2419
2420                /* reset key of new leftmost entry of level (for consistency) */
2421                if (index == 0 &&
2422                    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2423                        dtTruncateEntry(p, 0, &dtlck);
2424
2425                /* unpin the parent page */
2426                DT_PUTPAGE(mp);
2427
2428                /* exit propagation up */
2429                break;
2430        }
2431
2432        if (!DO_INDEX(ip))
2433                ip->i_size -= PSIZE;
2434
2435        return 0;
2436}
2437
2438#ifdef _NOTYET
2439/*
2440 * NAME:        dtRelocate()
2441 *
2442 * FUNCTION:    relocate dtpage (internal or leaf) of directory;
2443 *              This function is mainly used by defragfs utility.
2444 */
2445int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2446               s64 nxaddr)
2447{
2448        int rc = 0;
2449        struct metapage *mp, *pmp, *lmp, *rmp;
2450        dtpage_t *p, *pp, *rp = 0, *lp= 0;
2451        s64 bn;
2452        int index;
2453        struct btstack btstack;
2454        pxd_t *pxd;
2455        s64 oxaddr, nextbn, prevbn;
2456        int xlen, xsize;
2457        struct tlock *tlck;
2458        struct dt_lock *dtlck;
2459        struct pxd_lock *pxdlock;
2460        s8 *stbl;
2461        struct lv *lv;
2462
2463        oxaddr = addressPXD(opxd);
2464        xlen = lengthPXD(opxd);
2465
2466        jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
2467                   (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
2468                   xlen);
2469
2470        /*
2471         *      1. get the internal parent dtpage covering
2472         *      router entry for the tartget page to be relocated;
2473         */
2474        rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2475        if (rc)
2476                return rc;
2477
2478        /* retrieve search result */
2479        DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2480        jfs_info("dtRelocate: parent router entry validated.");
2481
2482        /*
2483         *      2. relocate the target dtpage
2484         */
2485        /* read in the target page from src extent */
2486        DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2487        if (rc) {
2488                /* release the pinned parent page */
2489                DT_PUTPAGE(pmp);
2490                return rc;
2491        }
2492
2493        /*
2494         * read in sibling pages if any to update sibling pointers;
2495         */
2496        rmp = NULL;
2497        if (p->header.next) {
2498                nextbn = le64_to_cpu(p->header.next);
2499                DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2500                if (rc) {
2501                        DT_PUTPAGE(mp);
2502                        DT_PUTPAGE(pmp);
2503                        return (rc);
2504                }
2505        }
2506
2507        lmp = NULL;
2508        if (p->header.prev) {
2509                prevbn = le64_to_cpu(p->header.prev);
2510                DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2511                if (rc) {
2512                        DT_PUTPAGE(mp);
2513                        DT_PUTPAGE(pmp);
2514                        if (rmp)
2515                                DT_PUTPAGE(rmp);
2516                        return (rc);
2517                }
2518        }
2519
2520        /* at this point, all xtpages to be updated are in memory */
2521
2522        /*
2523         * update sibling pointers of sibling dtpages if any;
2524         */
2525        if (lmp) {
2526                tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
2527                dtlck = (struct dt_lock *) & tlck->lock;
2528                /* linelock header */
2529                ASSERT(dtlck->index == 0);
2530                lv = & dtlck->lv[0];
2531                lv->offset = 0;
2532                lv->length = 1;
2533                dtlck->index++;
2534
2535                lp->header.next = cpu_to_le64(nxaddr);
2536                DT_PUTPAGE(lmp);
2537        }
2538
2539        if (rmp) {
2540                tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
2541                dtlck = (struct dt_lock *) & tlck->lock;
2542                /* linelock header */
2543                ASSERT(dtlck->index == 0);
2544                lv = & dtlck->lv[0];
2545                lv->offset = 0;
2546                lv->length = 1;
2547                dtlck->index++;
2548
2549                rp->header.prev = cpu_to_le64(nxaddr);
2550                DT_PUTPAGE(rmp);
2551        }
2552
2553        /*
2554         * update the target dtpage to be relocated
2555         *
2556         * write LOG_REDOPAGE of LOG_NEW type for dst page
2557         * for the whole target page (logredo() will apply
2558         * after image and update bmap for allocation of the
2559         * dst extent), and update bmap for allocation of
2560         * the dst extent;
2561         */
2562        tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
2563        dtlck = (struct dt_lock *) & tlck->lock;
2564        /* linelock header */
2565        ASSERT(dtlck->index == 0);
2566        lv = & dtlck->lv[0];
2567
2568        /* update the self address in the dtpage header */
2569        pxd = &p->header.self;
2570        PXDaddress(pxd, nxaddr);
2571
2572        /* the dst page is the same as the src page, i.e.,
2573         * linelock for afterimage of the whole page;
2574         */
2575        lv->offset = 0;
2576        lv->length = p->header.maxslot;
2577        dtlck->index++;
2578
2579        /* update the buffer extent descriptor of the dtpage */
2580        xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2581
2582        /* unpin the relocated page */
2583        DT_PUTPAGE(mp);
2584        jfs_info("dtRelocate: target dtpage relocated.");
2585
2586        /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
2587         * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
2588         * will also force a bmap update ).
2589         */
2590
2591        /*
2592         *      3. acquire maplock for the source extent to be freed;
2593         */
2594        /* for dtpage relocation, write a LOG_NOREDOPAGE record
2595         * for the source dtpage (logredo() will init NoRedoPage
2596         * filter and will also update bmap for free of the source
2597         * dtpage), and upadte bmap for free of the source dtpage;
2598         */
2599        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2600        pxdlock = (struct pxd_lock *) & tlck->lock;
2601        pxdlock->flag = mlckFREEPXD;
2602        PXDaddress(&pxdlock->pxd, oxaddr);
2603        PXDlength(&pxdlock->pxd, xlen);
2604        pxdlock->index = 1;
2605
2606        /*
2607         *      4. update the parent router entry for relocation;
2608         *
2609         * acquire tlck for the parent entry covering the target dtpage;
2610         * write LOG_REDOPAGE to apply after image only;
2611         */
2612        jfs_info("dtRelocate: update parent router entry.");
2613        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
2614        dtlck = (struct dt_lock *) & tlck->lock;
2615        lv = & dtlck->lv[dtlck->index];
2616
2617        /* update the PXD with the new address */
2618        stbl = DT_GETSTBL(pp);
2619        pxd = (pxd_t *) & pp->slot[stbl[index]];
2620        PXDaddress(pxd, nxaddr);
2621        lv->offset = stbl[index];
2622        lv->length = 1;
2623        dtlck->index++;
2624
2625        /* unpin the parent dtpage */
2626        DT_PUTPAGE(pmp);
2627
2628        return rc;
2629}
2630
2631/*
2632 * NAME:        dtSearchNode()
2633 *
2634 * FUNCTION:    Search for an dtpage containing a specified address
2635 *              This function is mainly used by defragfs utility.
2636 *
2637 * NOTE:        Search result on stack, the found page is pinned at exit.
2638 *              The result page must be an internal dtpage.
2639 *              lmxaddr give the address of the left most page of the
2640 *              dtree level, in which the required dtpage resides.
2641 */
2642static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2643                        struct btstack * btstack)
2644{
2645        int rc = 0;
2646        s64 bn;
2647        struct metapage *mp;
2648        dtpage_t *p;
2649        int psize = 288;        /* initial in-line directory */
2650        s8 *stbl;
2651        int i;
2652        pxd_t *pxd;
2653        struct btframe *btsp;
2654
2655        BT_CLR(btstack);        /* reset stack */
2656
2657        /*
2658         *      descend tree to the level with specified leftmost page
2659         *
2660         *  by convention, root bn = 0.
2661         */
2662        for (bn = 0;;) {
2663                /* get/pin the page to search */
2664                DT_GETPAGE(ip, bn, mp, psize, p, rc);
2665                if (rc)
2666                        return rc;
2667
2668                /* does the xaddr of leftmost page of the levevl
2669                 * matches levevl search key ?
2670                 */
2671                if (p->header.flag & BT_ROOT) {
2672                        if (lmxaddr == 0)
2673                                break;
2674                } else if (addressPXD(&p->header.self) == lmxaddr)
2675                        break;
2676
2677                /*
2678                 * descend down to leftmost child page
2679                 */
2680                if (p->header.flag & BT_LEAF) {
2681                        DT_PUTPAGE(mp);
2682                        return -ESTALE;
2683                }
2684
2685                /* get the leftmost entry */
2686                stbl = DT_GETSTBL(p);
2687                pxd = (pxd_t *) & p->slot[stbl[0]];
2688
2689                /* get the child page block address */
2690                bn = addressPXD(pxd);
2691                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
2692                /* unpin the parent page */
2693                DT_PUTPAGE(mp);
2694        }
2695
2696        /*
2697         *      search each page at the current levevl
2698         */
2699      loop:
2700        stbl = DT_GETSTBL(p);
2701        for (i = 0; i < p->header.nextindex; i++) {
2702                pxd = (pxd_t *) & p->slot[stbl[i]];
2703
2704                /* found the specified router entry */
2705                if (addressPXD(pxd) == addressPXD(kpxd) &&
2706                    lengthPXD(pxd) == lengthPXD(kpxd)) {
2707                        btsp = btstack->top;
2708                        btsp->bn = bn;
2709                        btsp->index = i;
2710                        btsp->mp = mp;
2711
2712                        return 0;
2713                }
2714        }
2715
2716        /* get the right sibling page if any */
2717        if (p->header.next)
2718                bn = le64_to_cpu(p->header.next);
2719        else {
2720                DT_PUTPAGE(mp);
2721                return -ESTALE;
2722        }
2723
2724        /* unpin current page */
2725        DT_PUTPAGE(mp);
2726
2727        /* get the right sibling page */
2728        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2729        if (rc)
2730                return rc;
2731
2732        goto loop;
2733}
2734#endif /* _NOTYET */
2735
2736/*
2737 *      dtRelink()
2738 *
2739 * function:
2740 *      link around a freed page.
2741 *
2742 * parameter:
2743 *      fp:     page to be freed
2744 *
2745 * return:
2746 */
2747static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2748{
2749        int rc;
2750        struct metapage *mp;
2751        s64 nextbn, prevbn;
2752        struct tlock *tlck;
2753        struct dt_lock *dtlck;
2754        struct lv *lv;
2755
2756        nextbn = le64_to_cpu(p->header.next);
2757        prevbn = le64_to_cpu(p->header.prev);
2758
2759        /* update prev pointer of the next page */
2760        if (nextbn != 0) {
2761                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2762                if (rc)
2763                        return rc;
2764
2765                BT_MARK_DIRTY(mp, ip);
2766                /*
2767                 * acquire a transaction lock on the next page
2768                 *
2769                 * action: update prev pointer;
2770                 */
2771                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2772                jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2773                        tlck, ip, mp);
2774                dtlck = (struct dt_lock *) & tlck->lock;
2775
2776                /* linelock header */
2777                if (dtlck->index >= dtlck->maxcnt)
2778                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2779                lv = & dtlck->lv[dtlck->index];
2780                lv->offset = 0;
2781                lv->length = 1;
2782                dtlck->index++;
2783
2784                p->header.prev = cpu_to_le64(prevbn);
2785                DT_PUTPAGE(mp);
2786        }
2787
2788        /* update next pointer of the previous page */
2789        if (prevbn != 0) {
2790                DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2791                if (rc)
2792                        return rc;
2793
2794                BT_MARK_DIRTY(mp, ip);
2795                /*
2796                 * acquire a transaction lock on the prev page
2797                 *
2798                 * action: update next pointer;
2799                 */
2800                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2801                jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2802                        tlck, ip, mp);
2803                dtlck = (struct dt_lock *) & tlck->lock;
2804
2805                /* linelock header */
2806                if (dtlck->index >= dtlck->maxcnt)
2807                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2808                lv = & dtlck->lv[dtlck->index];
2809                lv->offset = 0;
2810                lv->length = 1;
2811                dtlck->index++;
2812
2813                p->header.next = cpu_to_le64(nextbn);
2814                DT_PUTPAGE(mp);
2815        }
2816
2817        return 0;
2818}
2819
2820
2821/*
2822 *      dtInitRoot()
2823 *
2824 * initialize directory root (inline in inode)
2825 */
2826void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2827{
2828        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2829        dtroot_t *p;
2830        int fsi;
2831        struct dtslot *f;
2832        struct tlock *tlck;
2833        struct dt_lock *dtlck;
2834        struct lv *lv;
2835        u16 xflag_save;
2836
2837        /*
2838         * If this was previously an non-empty directory, we need to remove
2839         * the old directory table.
2840         */
2841        if (DO_INDEX(ip)) {
2842                if (!jfs_dirtable_inline(ip)) {
2843                        struct tblock *tblk = tid_to_tblock(tid);
2844                        /*
2845                         * We're playing games with the tid's xflag.  If
2846                         * we're removing a regular file, the file's xtree
2847                         * is committed with COMMIT_PMAP, but we always
2848                         * commit the directories xtree with COMMIT_PWMAP.
2849                         */
2850                        xflag_save = tblk->xflag;
2851                        tblk->xflag = 0;
2852                        /*
2853                         * xtTruncate isn't guaranteed to fully truncate
2854                         * the xtree.  The caller needs to check i_size
2855                         * after committing the transaction to see if
2856                         * additional truncation is needed.  The
2857                         * COMMIT_Stale flag tells caller that we
2858                         * initiated the truncation.
2859                         */
2860                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2861                        set_cflag(COMMIT_Stale, ip);
2862
2863                        tblk->xflag = xflag_save;
2864                } else
2865                        ip->i_size = 1;
2866
2867                jfs_ip->next_index = 2;
2868        } else
2869                ip->i_size = IDATASIZE;
2870
2871        /*
2872         * acquire a transaction lock on the root
2873         *
2874         * action: directory initialization;
2875         */
2876        tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2877                      tlckDTREE | tlckENTRY | tlckBTROOT);
2878        dtlck = (struct dt_lock *) & tlck->lock;
2879
2880        /* linelock root */
2881        ASSERT(dtlck->index == 0);
2882        lv = & dtlck->lv[0];
2883        lv->offset = 0;
2884        lv->length = DTROOTMAXSLOT;
2885        dtlck->index++;
2886
2887        p = &jfs_ip->i_dtroot;
2888
2889        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2890
2891        p->header.nextindex = 0;
2892
2893        /* init freelist */
2894        fsi = 1;
2895        f = &p->slot[fsi];
2896
2897        /* init data area of root */
2898        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2899                f->next = fsi;
2900        f->next = -1;
2901
2902        p->header.freelist = 1;
2903        p->header.freecnt = 8;
2904
2905        /* init '..' entry */
2906        p->header.idotdot = cpu_to_le32(idotdot);
2907
2908        return;
2909}
2910
2911/*
2912 *      add_missing_indices()
2913 *
2914 * function: Fix dtree page in which one or more entries has an invalid index.
2915 *           fsck.jfs should really fix this, but it currently does not.
2916 *           Called from jfs_readdir when bad index is detected.
2917 */
2918static void add_missing_indices(struct inode *inode, s64 bn)
2919{
2920        struct ldtentry *d;
2921        struct dt_lock *dtlck;
2922        int i;
2923        uint index;
2924        struct lv *lv;
2925        struct metapage *mp;
2926        dtpage_t *p;
2927        int rc;
2928        s8 *stbl;
2929        tid_t tid;
2930        struct tlock *tlck;
2931
2932        tid = txBegin(inode->i_sb, 0);
2933
2934        DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2935
2936        if (rc) {
2937                printk(KERN_ERR "DT_GETPAGE failed!\n");
2938                goto end;
2939        }
2940        BT_MARK_DIRTY(mp, inode);
2941
2942        ASSERT(p->header.flag & BT_LEAF);
2943
2944        tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2945        if (BT_IS_ROOT(mp))
2946                tlck->type |= tlckBTROOT;
2947
2948        dtlck = (struct dt_lock *) &tlck->lock;
2949
2950        stbl = DT_GETSTBL(p);
2951        for (i = 0; i < p->header.nextindex; i++) {
2952                d = (struct ldtentry *) &p->slot[stbl[i]];
2953                index = le32_to_cpu(d->index);
2954                if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2955                        d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2956                        if (dtlck->index >= dtlck->maxcnt)
2957                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2958                        lv = &dtlck->lv[dtlck->index];
2959                        lv->offset = stbl[i];
2960                        lv->length = 1;
2961                        dtlck->index++;
2962                }
2963        }
2964
2965        DT_PUTPAGE(mp);
2966        (void) txCommit(tid, 1, &inode, 0);
2967end:
2968        txEnd(tid);
2969}
2970
2971/*
2972 * Buffer to hold directory entry info while traversing a dtree page
2973 * before being fed to the filldir function
2974 */
2975struct jfs_dirent {
2976        loff_t position;
2977        int ino;
2978        u16 name_len;
2979        char name[0];
2980};
2981
2982/*
2983 * function to determine next variable-sized jfs_dirent in buffer
2984 */
2985static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2986{
2987        return (struct jfs_dirent *)
2988                ((char *)dirent +
2989                 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2990                   sizeof (loff_t) - 1) &
2991                  ~(sizeof (loff_t) - 1)));
2992}
2993
2994/*
2995 *      jfs_readdir()
2996 *
2997 * function: read directory entries sequentially
2998 *      from the specified entry offset
2999 *
3000 * parameter:
3001 *
3002 * return: offset = (pn, index) of start entry
3003 *      of next jfs_readdir()/dtRead()
3004 */
3005int jfs_readdir(struct file *file, struct dir_context *ctx)
3006{
3007        struct inode *ip = file_inode(file);
3008        struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
3009        int rc = 0;
3010        loff_t dtpos;   /* legacy OS/2 style position */
3011        struct dtoffset {
3012                s16 pn;
3013                s16 index;
3014                s32 unused;
3015        } *dtoffset = (struct dtoffset *) &dtpos;
3016        s64 bn;
3017        struct metapage *mp;
3018        dtpage_t *p;
3019        int index;
3020        s8 *stbl;
3021        struct btstack btstack;
3022        int i, next;
3023        struct ldtentry *d;
3024        struct dtslot *t;
3025        int d_namleft, len, outlen;
3026        unsigned long dirent_buf;
3027        char *name_ptr;
3028        u32 dir_index;
3029        int do_index = 0;
3030        uint loop_count = 0;
3031        struct jfs_dirent *jfs_dirent;
3032        int jfs_dirents;
3033        int overflow, fix_page, page_fixed = 0;
3034        static int unique_pos = 2;      /* If we can't fix broken index */
3035
3036        if (ctx->pos == DIREND)
3037                return 0;
3038
3039        if (DO_INDEX(ip)) {
3040                /*
3041                 * persistent index is stored in directory entries.
3042                 * Special cases:        0 = .
3043                 *                       1 = ..
3044                 *                      -1 = End of directory
3045                 */
3046                do_index = 1;
3047
3048                dir_index = (u32) ctx->pos;
3049
3050                /*
3051                 * NFSv4 reserves cookies 1 and 2 for . and .. so the value
3052                 * we return to the vfs is one greater than the one we use
3053                 * internally.
3054                 */
3055                if (dir_index)
3056                        dir_index--;
3057
3058                if (dir_index > 1) {
3059                        struct dir_table_slot dirtab_slot;
3060
3061                        if (dtEmpty(ip) ||
3062                            (dir_index >= JFS_IP(ip)->next_index)) {
3063                                /* Stale position.  Directory has shrunk */
3064                                ctx->pos = DIREND;
3065                                return 0;
3066                        }
3067                      repeat:
3068                        rc = read_index(ip, dir_index, &dirtab_slot);
3069                        if (rc) {
3070                                ctx->pos = DIREND;
3071                                return rc;
3072                        }
3073                        if (dirtab_slot.flag == DIR_INDEX_FREE) {
3074                                if (loop_count++ > JFS_IP(ip)->next_index) {
3075                                        jfs_err("jfs_readdir detected "
3076                                                   "infinite loop!");
3077                                        ctx->pos = DIREND;
3078                                        return 0;
3079                                }
3080                                dir_index = le32_to_cpu(dirtab_slot.addr2);
3081                                if (dir_index == -1) {
3082                                        ctx->pos = DIREND;
3083                                        return 0;
3084                                }
3085                                goto repeat;
3086                        }
3087                        bn = addressDTS(&dirtab_slot);
3088                        index = dirtab_slot.slot;
3089                        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3090                        if (rc) {
3091                                ctx->pos = DIREND;
3092                                return 0;
3093                        }
3094                        if (p->header.flag & BT_INTERNAL) {
3095                                jfs_err("jfs_readdir: bad index table");
3096                                DT_PUTPAGE(mp);
3097                                ctx->pos = DIREND;
3098                                return 0;
3099                        }
3100                } else {
3101                        if (dir_index == 0) {
3102                                /*
3103                                 * self "."
3104                                 */
3105                                ctx->pos = 1;
3106                                if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
3107                                        return 0;
3108                        }
3109                        /*
3110                         * parent ".."
3111                         */
3112                        ctx->pos = 2;
3113                        if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
3114                                return 0;
3115
3116                        /*
3117                         * Find first entry of left-most leaf
3118                         */
3119                        if (dtEmpty(ip)) {
3120                                ctx->pos = DIREND;
3121                                return 0;
3122                        }
3123
3124                        if ((rc = dtReadFirst(ip, &btstack)))
3125                                return rc;
3126
3127                        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3128                }
3129        } else {
3130                /*
3131                 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3132                 *
3133                 * pn = 0; index = 1:   First entry "."
3134                 * pn = 0; index = 2:   Second entry ".."
3135                 * pn > 0:              Real entries, pn=1 -> leftmost page
3136                 * pn = index = -1:     No more entries
3137                 */
3138                dtpos = ctx->pos;
3139                if (dtpos < 2) {
3140                        /* build "." entry */
3141                        ctx->pos = 1;
3142                        if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
3143                                return 0;
3144                        dtoffset->index = 2;
3145                        ctx->pos = dtpos;
3146                }
3147
3148                if (dtoffset->pn == 0) {
3149                        if (dtoffset->index == 2) {
3150                                /* build ".." entry */
3151                                if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
3152                                        return 0;
3153                        } else {
3154                                jfs_err("jfs_readdir called with "
3155                                        "invalid offset!");
3156                        }
3157                        dtoffset->pn = 1;
3158                        dtoffset->index = 0;
3159                        ctx->pos = dtpos;
3160                }
3161
3162                if (dtEmpty(ip)) {
3163                        ctx->pos = DIREND;
3164                        return 0;
3165                }
3166
3167                if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) {
3168                        jfs_err("jfs_readdir: unexpected rc = %d "
3169                                "from dtReadNext", rc);
3170                        ctx->pos = DIREND;
3171                        return 0;
3172                }
3173                /* get start leaf page and index */
3174                DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3175
3176                /* offset beyond directory eof ? */
3177                if (bn < 0) {
3178                        ctx->pos = DIREND;
3179                        return 0;
3180                }
3181        }
3182
3183        dirent_buf = __get_free_page(GFP_KERNEL);
3184        if (dirent_buf == 0) {
3185                DT_PUTPAGE(mp);
3186                jfs_warn("jfs_readdir: __get_free_page failed!");
3187                ctx->pos = DIREND;
3188                return -ENOMEM;
3189        }
3190
3191        while (1) {
3192                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3193                jfs_dirents = 0;
3194                overflow = fix_page = 0;
3195
3196                stbl = DT_GETSTBL(p);
3197
3198                for (i = index; i < p->header.nextindex; i++) {
3199                        d = (struct ldtentry *) & p->slot[stbl[i]];
3200
3201                        if (((long) jfs_dirent + d->namlen + 1) >
3202                            (dirent_buf + PAGE_SIZE)) {
3203                                /* DBCS codepages could overrun dirent_buf */
3204                                index = i;
3205                                overflow = 1;
3206                                break;
3207                        }
3208
3209                        d_namleft = d->namlen;
3210                        name_ptr = jfs_dirent->name;
3211                        jfs_dirent->ino = le32_to_cpu(d->inumber);
3212
3213                        if (do_index) {
3214                                len = min(d_namleft, DTLHDRDATALEN);
3215                                jfs_dirent->position = le32_to_cpu(d->index);
3216                                /*
3217                                 * d->index should always be valid, but it
3218                                 * isn't.  fsck.jfs doesn't create the
3219                                 * directory index for the lost+found
3220                                 * directory.  Rather than let it go,
3221                                 * we can try to fix it.
3222                                 */
3223                                if ((jfs_dirent->position < 2) ||
3224                                    (jfs_dirent->position >=
3225                                     JFS_IP(ip)->next_index)) {
3226                                        if (!page_fixed && !isReadOnly(ip)) {
3227                                                fix_page = 1;
3228                                                /*
3229                                                 * setting overflow and setting
3230                                                 * index to i will cause the
3231                                                 * same page to be processed
3232                                                 * again starting here
3233                                                 */
3234                                                overflow = 1;
3235                                                index = i;
3236                                                break;
3237                                        }
3238                                        jfs_dirent->position = unique_pos++;
3239                                }
3240                                /*
3241                                 * We add 1 to the index because we may
3242                                 * use a value of 2 internally, and NFSv4
3243                                 * doesn't like that.
3244                                 */
3245                                jfs_dirent->position++;
3246                        } else {
3247                                jfs_dirent->position = dtpos;
3248                                len = min(d_namleft, DTLHDRDATALEN_LEGACY);
3249                        }
3250
3251                        /* copy the name of head/only segment */
3252                        outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
3253                                                   codepage);
3254                        jfs_dirent->name_len = outlen;
3255
3256                        /* copy name in the additional segment(s) */
3257                        next = d->next;
3258                        while (next >= 0) {
3259                                t = (struct dtslot *) & p->slot[next];
3260                                name_ptr += outlen;
3261                                d_namleft -= len;
3262                                /* Sanity Check */
3263                                if (d_namleft == 0) {
3264                                        jfs_error(ip->i_sb,
3265                                                  "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n",
3266                                                  (long)ip->i_ino,
3267                                                  (long long)bn,
3268                                                  i);
3269                                        goto skip_one;
3270                                }
3271                                len = min(d_namleft, DTSLOTDATALEN);
3272                                outlen = jfs_strfromUCS_le(name_ptr, t->name,
3273                                                           len, codepage);
3274                                jfs_dirent->name_len += outlen;
3275
3276                                next = t->next;
3277                        }
3278
3279                        jfs_dirents++;
3280                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3281skip_one:
3282                        if (!do_index)
3283                                dtoffset->index++;
3284                }
3285
3286                if (!overflow) {
3287                        /* Point to next leaf page */
3288                        if (p->header.flag & BT_ROOT)
3289                                bn = 0;
3290                        else {
3291                                bn = le64_to_cpu(p->header.next);
3292                                index = 0;
3293                                /* update offset (pn:index) for new page */
3294                                if (!do_index) {
3295                                        dtoffset->pn++;
3296                                        dtoffset->index = 0;
3297                                }
3298                        }
3299                        page_fixed = 0;
3300                }
3301
3302                /* unpin previous leaf page */
3303                DT_PUTPAGE(mp);
3304
3305                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3306                while (jfs_dirents--) {
3307                        ctx->pos = jfs_dirent->position;
3308                        if (!dir_emit(ctx, jfs_dirent->name,
3309                                    jfs_dirent->name_len,
3310                                    jfs_dirent->ino, DT_UNKNOWN))
3311                                goto out;
3312                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3313                }
3314
3315                if (fix_page) {
3316                        add_missing_indices(ip, bn);
3317                        page_fixed = 1;
3318                }
3319
3320                if (!overflow && (bn == 0)) {
3321                        ctx->pos = DIREND;
3322                        break;
3323                }
3324
3325                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3326                if (rc) {
3327                        free_page(dirent_buf);
3328                        return rc;
3329                }
3330        }
3331
3332      out:
3333        free_page(dirent_buf);
3334
3335        return rc;
3336}
3337
3338
3339/*
3340 *      dtReadFirst()
3341 *
3342 * function: get the leftmost page of the directory
3343 */
3344static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3345{
3346        int rc = 0;
3347        s64 bn;
3348        int psize = 288;        /* initial in-line directory */
3349        struct metapage *mp;
3350        dtpage_t *p;
3351        s8 *stbl;
3352        struct btframe *btsp;
3353        pxd_t *xd;
3354
3355        BT_CLR(btstack);        /* reset stack */
3356
3357        /*
3358         *      descend leftmost path of the tree
3359         *
3360         * by convention, root bn = 0.
3361         */
3362        for (bn = 0;;) {
3363                DT_GETPAGE(ip, bn, mp, psize, p, rc);
3364                if (rc)
3365                        return rc;
3366
3367                /*
3368                 * leftmost leaf page
3369                 */
3370                if (p->header.flag & BT_LEAF) {
3371                        /* return leftmost entry */
3372                        btsp = btstack->top;
3373                        btsp->bn = bn;
3374                        btsp->index = 0;
3375                        btsp->mp = mp;
3376
3377                        return 0;
3378                }
3379
3380                /*
3381                 * descend down to leftmost child page
3382                 */
3383                if (BT_STACK_FULL(btstack)) {
3384                        DT_PUTPAGE(mp);
3385                        jfs_error(ip->i_sb, "btstack overrun\n");
3386                        BT_STACK_DUMP(btstack);
3387                        return -EIO;
3388                }
3389                /* push (bn, index) of the parent page/entry */
3390                BT_PUSH(btstack, bn, 0);
3391
3392                /* get the leftmost entry */
3393                stbl = DT_GETSTBL(p);
3394                xd = (pxd_t *) & p->slot[stbl[0]];
3395
3396                /* get the child page block address */
3397                bn = addressPXD(xd);
3398                psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3399
3400                /* unpin the parent page */
3401                DT_PUTPAGE(mp);
3402        }
3403}
3404
3405
3406/*
3407 *      dtReadNext()
3408 *
3409 * function: get the page of the specified offset (pn:index)
3410 *
3411 * return: if (offset > eof), bn = -1;
3412 *
3413 * note: if index > nextindex of the target leaf page,
3414 * start with 1st entry of next leaf page;
3415 */
3416static int dtReadNext(struct inode *ip, loff_t * offset,
3417                      struct btstack * btstack)
3418{
3419        int rc = 0;
3420        struct dtoffset {
3421                s16 pn;
3422                s16 index;
3423                s32 unused;
3424        } *dtoffset = (struct dtoffset *) offset;
3425        s64 bn;
3426        struct metapage *mp;
3427        dtpage_t *p;
3428        int index;
3429        int pn;
3430        s8 *stbl;
3431        struct btframe *btsp, *parent;
3432        pxd_t *xd;
3433
3434        /*
3435         * get leftmost leaf page pinned
3436         */
3437        if ((rc = dtReadFirst(ip, btstack)))
3438                return rc;
3439
3440        /* get leaf page */
3441        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3442
3443        /* get the start offset (pn:index) */
3444        pn = dtoffset->pn - 1;  /* Now pn = 0 represents leftmost leaf */
3445        index = dtoffset->index;
3446
3447        /* start at leftmost page ? */
3448        if (pn == 0) {
3449                /* offset beyond eof ? */
3450                if (index < p->header.nextindex)
3451                        goto out;
3452
3453                if (p->header.flag & BT_ROOT) {
3454                        bn = -1;
3455                        goto out;
3456                }
3457
3458                /* start with 1st entry of next leaf page */
3459                dtoffset->pn++;
3460                dtoffset->index = index = 0;
3461                goto a;
3462        }
3463
3464        /* start at non-leftmost page: scan parent pages for large pn */
3465        if (p->header.flag & BT_ROOT) {
3466                bn = -1;
3467                goto out;
3468        }
3469
3470        /* start after next leaf page ? */
3471        if (pn > 1)
3472                goto b;
3473
3474        /* get leaf page pn = 1 */
3475      a:
3476        bn = le64_to_cpu(p->header.next);
3477
3478        /* unpin leaf page */
3479        DT_PUTPAGE(mp);
3480
3481        /* offset beyond eof ? */
3482        if (bn == 0) {
3483                bn = -1;
3484                goto out;
3485        }
3486
3487        goto c;
3488
3489        /*
3490         * scan last internal page level to get target leaf page
3491         */
3492      b:
3493        /* unpin leftmost leaf page */
3494        DT_PUTPAGE(mp);
3495
3496        /* get left most parent page */
3497        btsp = btstack->top;
3498        parent = btsp - 1;
3499        bn = parent->bn;
3500        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3501        if (rc)
3502                return rc;
3503
3504        /* scan parent pages at last internal page level */
3505        while (pn >= p->header.nextindex) {
3506                pn -= p->header.nextindex;
3507
3508                /* get next parent page address */
3509                bn = le64_to_cpu(p->header.next);
3510
3511                /* unpin current parent page */
3512                DT_PUTPAGE(mp);
3513
3514                /* offset beyond eof ? */
3515                if (bn == 0) {
3516                        bn = -1;
3517                        goto out;
3518                }
3519
3520                /* get next parent page */
3521                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3522                if (rc)
3523                        return rc;
3524
3525                /* update parent page stack frame */
3526                parent->bn = bn;
3527        }
3528
3529        /* get leaf page address */
3530        stbl = DT_GETSTBL(p);
3531        xd = (pxd_t *) & p->slot[stbl[pn]];
3532        bn = addressPXD(xd);
3533
3534        /* unpin parent page */
3535        DT_PUTPAGE(mp);
3536
3537        /*
3538         * get target leaf page
3539         */
3540      c:
3541        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3542        if (rc)
3543                return rc;
3544
3545        /*
3546         * leaf page has been completed:
3547         * start with 1st entry of next leaf page
3548         */
3549        if (index >= p->header.nextindex) {
3550                bn = le64_to_cpu(p->header.next);
3551
3552                /* unpin leaf page */
3553                DT_PUTPAGE(mp);
3554
3555                /* offset beyond eof ? */
3556                if (bn == 0) {
3557                        bn = -1;
3558                        goto out;
3559                }
3560
3561                /* get next leaf page */
3562                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3563                if (rc)
3564                        return rc;
3565
3566                /* start with 1st entry of next leaf page */
3567                dtoffset->pn++;
3568                dtoffset->index = 0;
3569        }
3570
3571      out:
3572        /* return target leaf page pinned */
3573        btsp = btstack->top;
3574        btsp->bn = bn;
3575        btsp->index = dtoffset->index;
3576        btsp->mp = mp;
3577
3578        return 0;
3579}
3580
3581
3582/*
3583 *      dtCompare()
3584 *
3585 * function: compare search key with an internal entry
3586 *
3587 * return:
3588 *      < 0 if k is < record
3589 *      = 0 if k is = record
3590 *      > 0 if k is > record
3591 */
3592static int dtCompare(struct component_name * key,       /* search key */
3593                     dtpage_t * p,      /* directory page */
3594                     int si)
3595{                               /* entry slot index */
3596        wchar_t *kname;
3597        __le16 *name;
3598        int klen, namlen, len, rc;
3599        struct idtentry *ih;
3600        struct dtslot *t;
3601
3602        /*
3603         * force the left-most key on internal pages, at any level of
3604         * the tree, to be less than any search key.
3605         * this obviates having to update the leftmost key on an internal
3606         * page when the user inserts a new key in the tree smaller than
3607         * anything that has been stored.
3608         *
3609         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3610         * at any internal page at any level of the tree,
3611         * it descends to child of the entry anyway -
3612         * ? make the entry as min size dummy entry)
3613         *
3614         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3615         * return (1);
3616         */
3617
3618        kname = key->name;
3619        klen = key->namlen;
3620
3621        ih = (struct idtentry *) & p->slot[si];
3622        si = ih->next;
3623        name = ih->name;
3624        namlen = ih->namlen;
3625        len = min(namlen, DTIHDRDATALEN);
3626
3627        /* compare with head/only segment */
3628        len = min(klen, len);
3629        if ((rc = UniStrncmp_le(kname, name, len)))
3630                return rc;
3631
3632        klen -= len;
3633        namlen -= len;
3634
3635        /* compare with additional segment(s) */
3636        kname += len;
3637        while (klen > 0 && namlen > 0) {
3638                /* compare with next name segment */
3639                t = (struct dtslot *) & p->slot[si];
3640                len = min(namlen, DTSLOTDATALEN);
3641                len = min(klen, len);
3642                name = t->name;
3643                if ((rc = UniStrncmp_le(kname, name, len)))
3644                        return rc;
3645
3646                klen -= len;
3647                namlen -= len;
3648                kname += len;
3649                si = t->next;
3650        }
3651
3652        return (klen - namlen);
3653}
3654
3655
3656
3657
3658/*
3659 *      ciCompare()
3660 *
3661 * function: compare search key with an (leaf/internal) entry
3662 *
3663 * return:
3664 *      < 0 if k is < record
3665 *      = 0 if k is = record
3666 *      > 0 if k is > record
3667 */
3668static int ciCompare(struct component_name * key,       /* search key */
3669                     dtpage_t * p,      /* directory page */
3670                     int si,    /* entry slot index */
3671                     int flag)
3672{
3673        wchar_t *kname, x;
3674        __le16 *name;
3675        int klen, namlen, len, rc;
3676        struct ldtentry *lh;
3677        struct idtentry *ih;
3678        struct dtslot *t;
3679        int i;
3680
3681        /*
3682         * force the left-most key on internal pages, at any level of
3683         * the tree, to be less than any search key.
3684         * this obviates having to update the leftmost key on an internal
3685         * page when the user inserts a new key in the tree smaller than
3686         * anything that has been stored.
3687         *
3688         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3689         * at any internal page at any level of the tree,
3690         * it descends to child of the entry anyway -
3691         * ? make the entry as min size dummy entry)
3692         *
3693         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3694         * return (1);
3695         */
3696
3697        kname = key->name;
3698        klen = key->namlen;
3699
3700        /*
3701         * leaf page entry
3702         */
3703        if (p->header.flag & BT_LEAF) {
3704                lh = (struct ldtentry *) & p->slot[si];
3705                si = lh->next;
3706                name = lh->name;
3707                namlen = lh->namlen;
3708                if (flag & JFS_DIR_INDEX)
3709                        len = min(namlen, DTLHDRDATALEN);
3710                else
3711                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3712        }
3713        /*
3714         * internal page entry
3715         */
3716        else {
3717                ih = (struct idtentry *) & p->slot[si];
3718                si = ih->next;
3719                name = ih->name;
3720                namlen = ih->namlen;
3721                len = min(namlen, DTIHDRDATALEN);
3722        }
3723
3724        /* compare with head/only segment */
3725        len = min(klen, len);
3726        for (i = 0; i < len; i++, kname++, name++) {
3727                /* only uppercase if case-insensitive support is on */
3728                if ((flag & JFS_OS2) == JFS_OS2)
3729                        x = UniToupper(le16_to_cpu(*name));
3730                else
3731                        x = le16_to_cpu(*name);
3732                if ((rc = *kname - x))
3733                        return rc;
3734        }
3735
3736        klen -= len;
3737        namlen -= len;
3738
3739        /* compare with additional segment(s) */
3740        while (klen > 0 && namlen > 0) {
3741                /* compare with next name segment */
3742                t = (struct dtslot *) & p->slot[si];
3743                len = min(namlen, DTSLOTDATALEN);
3744                len = min(klen, len);
3745                name = t->name;
3746                for (i = 0; i < len; i++, kname++, name++) {
3747                        /* only uppercase if case-insensitive support is on */
3748                        if ((flag & JFS_OS2) == JFS_OS2)
3749                                x = UniToupper(le16_to_cpu(*name));
3750                        else
3751                                x = le16_to_cpu(*name);
3752
3753                        if ((rc = *kname - x))
3754                                return rc;
3755                }
3756
3757                klen -= len;
3758                namlen -= len;
3759                si = t->next;
3760        }
3761
3762        return (klen - namlen);
3763}
3764
3765
3766/*
3767 *      ciGetLeafPrefixKey()
3768 *
3769 * function: compute prefix of suffix compression
3770 *           from two adjacent leaf entries
3771 *           across page boundary
3772 *
3773 * return: non-zero on error
3774 *
3775 */
3776static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3777                               int ri, struct component_name * key, int flag)
3778{
3779        int klen, namlen;
3780        wchar_t *pl, *pr, *kname;
3781        struct component_name lkey;
3782        struct component_name rkey;
3783
3784        lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3785                                        GFP_KERNEL);
3786        if (lkey.name == NULL)
3787                return -ENOMEM;
3788
3789        rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3790                                        GFP_KERNEL);
3791        if (rkey.name == NULL) {
3792                kfree(lkey.name);
3793                return -ENOMEM;
3794        }
3795
3796        /* get left and right key */
3797        dtGetKey(lp, li, &lkey, flag);
3798        lkey.name[lkey.namlen] = 0;
3799
3800        if ((flag & JFS_OS2) == JFS_OS2)
3801                ciToUpper(&lkey);
3802
3803        dtGetKey(rp, ri, &rkey, flag);
3804        rkey.name[rkey.namlen] = 0;
3805
3806
3807        if ((flag & JFS_OS2) == JFS_OS2)
3808                ciToUpper(&rkey);
3809
3810        /* compute prefix */
3811        klen = 0;
3812        kname = key->name;
3813        namlen = min(lkey.namlen, rkey.namlen);
3814        for (pl = lkey.name, pr = rkey.name;
3815             namlen; pl++, pr++, namlen--, klen++, kname++) {
3816                *kname = *pr;
3817                if (*pl != *pr) {
3818                        key->namlen = klen + 1;
3819                        goto free_names;
3820                }
3821        }
3822
3823        /* l->namlen <= r->namlen since l <= r */
3824        if (lkey.namlen < rkey.namlen) {
3825                *kname = *pr;
3826                key->namlen = klen + 1;
3827        } else                  /* l->namelen == r->namelen */
3828                key->namlen = klen;
3829
3830free_names:
3831        kfree(lkey.name);
3832        kfree(rkey.name);
3833        return 0;
3834}
3835
3836
3837
3838/*
3839 *      dtGetKey()
3840 *
3841 * function: get key of the entry
3842 */
3843static void dtGetKey(dtpage_t * p, int i,       /* entry index */
3844                     struct component_name * key, int flag)
3845{
3846        int si;
3847        s8 *stbl;
3848        struct ldtentry *lh;
3849        struct idtentry *ih;
3850        struct dtslot *t;
3851        int namlen, len;
3852        wchar_t *kname;
3853        __le16 *name;
3854
3855        /* get entry */
3856        stbl = DT_GETSTBL(p);
3857        si = stbl[i];
3858        if (p->header.flag & BT_LEAF) {
3859                lh = (struct ldtentry *) & p->slot[si];
3860                si = lh->next;
3861                namlen = lh->namlen;
3862                name = lh->name;
3863                if (flag & JFS_DIR_INDEX)
3864                        len = min(namlen, DTLHDRDATALEN);
3865                else
3866                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3867        } else {
3868                ih = (struct idtentry *) & p->slot[si];
3869                si = ih->next;
3870                namlen = ih->namlen;
3871                name = ih->name;
3872                len = min(namlen, DTIHDRDATALEN);
3873        }
3874
3875        key->namlen = namlen;
3876        kname = key->name;
3877
3878        /*
3879         * move head/only segment
3880         */
3881        UniStrncpy_from_le(kname, name, len);
3882
3883        /*
3884         * move additional segment(s)
3885         */
3886        while (si >= 0) {
3887                /* get next segment */
3888                t = &p->slot[si];
3889                kname += len;
3890                namlen -= len;
3891                len = min(namlen, DTSLOTDATALEN);
3892                UniStrncpy_from_le(kname, t->name, len);
3893
3894                si = t->next;
3895        }
3896}
3897
3898
3899/*
3900 *      dtInsertEntry()
3901 *
3902 * function: allocate free slot(s) and
3903 *           write a leaf/internal entry
3904 *
3905 * return: entry slot index
3906 */
3907static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3908                          ddata_t * data, struct dt_lock ** dtlock)
3909{
3910        struct dtslot *h, *t;
3911        struct ldtentry *lh = NULL;
3912        struct idtentry *ih = NULL;
3913        int hsi, fsi, klen, len, nextindex;
3914        wchar_t *kname;
3915        __le16 *name;
3916        s8 *stbl;
3917        pxd_t *xd;
3918        struct dt_lock *dtlck = *dtlock;
3919        struct lv *lv;
3920        int xsi, n;
3921        s64 bn = 0;
3922        struct metapage *mp = NULL;
3923
3924        klen = key->namlen;
3925        kname = key->name;
3926
3927        /* allocate a free slot */
3928        hsi = fsi = p->header.freelist;
3929        h = &p->slot[fsi];
3930        p->header.freelist = h->next;
3931        --p->header.freecnt;
3932
3933        /* open new linelock */
3934        if (dtlck->index >= dtlck->maxcnt)
3935                dtlck = (struct dt_lock *) txLinelock(dtlck);
3936
3937        lv = & dtlck->lv[dtlck->index];
3938        lv->offset = hsi;
3939
3940        /* write head/only segment */
3941        if (p->header.flag & BT_LEAF) {
3942                lh = (struct ldtentry *) h;
3943                lh->next = h->next;
3944                lh->inumber = cpu_to_le32(data->leaf.ino);
3945                lh->namlen = klen;
3946                name = lh->name;
3947                if (data->leaf.ip) {
3948                        len = min(klen, DTLHDRDATALEN);
3949                        if (!(p->header.flag & BT_ROOT))
3950                                bn = addressPXD(&p->header.self);
3951                        lh->index = cpu_to_le32(add_index(data->leaf.tid,
3952                                                          data->leaf.ip,
3953                                                          bn, index));
3954                } else
3955                        len = min(klen, DTLHDRDATALEN_LEGACY);
3956        } else {
3957                ih = (struct idtentry *) h;
3958                ih->next = h->next;
3959                xd = (pxd_t *) ih;
3960                *xd = data->xd;
3961                ih->namlen = klen;
3962                name = ih->name;
3963                len = min(klen, DTIHDRDATALEN);
3964        }
3965
3966        UniStrncpy_to_le(name, kname, len);
3967
3968        n = 1;
3969        xsi = hsi;
3970
3971        /* write additional segment(s) */
3972        t = h;
3973        klen -= len;
3974        while (klen) {
3975                /* get free slot */
3976                fsi = p->header.freelist;
3977                t = &p->slot[fsi];
3978                p->header.freelist = t->next;
3979                --p->header.freecnt;
3980
3981                /* is next slot contiguous ? */
3982                if (fsi != xsi + 1) {
3983                        /* close current linelock */
3984                        lv->length = n;
3985                        dtlck->index++;
3986
3987                        /* open new linelock */
3988                        if (dtlck->index < dtlck->maxcnt)
3989                                lv++;
3990                        else {
3991                                dtlck = (struct dt_lock *) txLinelock(dtlck);
3992                                lv = & dtlck->lv[0];
3993                        }
3994
3995                        lv->offset = fsi;
3996                        n = 0;
3997                }
3998
3999                kname += len;
4000                len = min(klen, DTSLOTDATALEN);
4001                UniStrncpy_to_le(t->name, kname, len);
4002
4003                n++;
4004                xsi = fsi;
4005                klen -= len;
4006        }
4007
4008        /* close current linelock */
4009        lv->length = n;
4010        dtlck->index++;
4011
4012        *dtlock = dtlck;
4013
4014        /* terminate last/only segment */
4015        if (h == t) {
4016                /* single segment entry */
4017                if (p->header.flag & BT_LEAF)
4018                        lh->next = -1;
4019                else
4020                        ih->next = -1;
4021        } else
4022                /* multi-segment entry */
4023                t->next = -1;
4024
4025        /* if insert into middle, shift right succeeding entries in stbl */
4026        stbl = DT_GETSTBL(p);
4027        nextindex = p->header.nextindex;
4028        if (index < nextindex) {
4029                memmove(stbl + index + 1, stbl + index, nextindex - index);
4030
4031                if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
4032                        s64 lblock;
4033
4034                        /*
4035                         * Need to update slot number for entries that moved
4036                         * in the stbl
4037                         */
4038                        mp = NULL;
4039                        for (n = index + 1; n <= nextindex; n++) {
4040                                lh = (struct ldtentry *) & (p->slot[stbl[n]]);
4041                                modify_index(data->leaf.tid, data->leaf.ip,
4042                                             le32_to_cpu(lh->index), bn, n,
4043                                             &mp, &lblock);
4044                        }
4045                        if (mp)
4046                                release_metapage(mp);
4047                }
4048        }
4049
4050        stbl[index] = hsi;
4051
4052        /* advance next available entry index of stbl */
4053        ++p->header.nextindex;
4054}
4055
4056
4057/*
4058 *      dtMoveEntry()
4059 *
4060 * function: move entries from split/left page to new/right page
4061 *
4062 *      nextindex of dst page and freelist/freecnt of both pages
4063 *      are updated.
4064 */
4065static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
4066                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
4067                        int do_index)
4068{
4069        int ssi, next;          /* src slot index */
4070        int di;                 /* dst entry index */
4071        int dsi;                /* dst slot index */
4072        s8 *sstbl, *dstbl;      /* sorted entry table */
4073        int snamlen, len;
4074        struct ldtentry *slh, *dlh = NULL;
4075        struct idtentry *sih, *dih = NULL;
4076        struct dtslot *h, *s, *d;
4077        struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
4078        struct lv *slv, *dlv;
4079        int xssi, ns, nd;
4080        int sfsi;
4081
4082        sstbl = (s8 *) & sp->slot[sp->header.stblindex];
4083        dstbl = (s8 *) & dp->slot[dp->header.stblindex];
4084
4085        dsi = dp->header.freelist;      /* first (whole page) free slot */
4086        sfsi = sp->header.freelist;
4087
4088        /* linelock destination entry slot */
4089        dlv = & ddtlck->lv[ddtlck->index];
4090        dlv->offset = dsi;
4091
4092        /* linelock source entry slot */
4093        slv = & sdtlck->lv[sdtlck->index];
4094        slv->offset = sstbl[si];
4095        xssi = slv->offset - 1;
4096
4097        /*
4098         * move entries
4099         */
4100        ns = nd = 0;
4101        for (di = 0; si < sp->header.nextindex; si++, di++) {
4102                ssi = sstbl[si];
4103                dstbl[di] = dsi;
4104
4105                /* is next slot contiguous ? */
4106                if (ssi != xssi + 1) {
4107                        /* close current linelock */
4108                        slv->length = ns;
4109                        sdtlck->index++;
4110
4111                        /* open new linelock */
4112                        if (sdtlck->index < sdtlck->maxcnt)
4113                                slv++;
4114                        else {
4115                                sdtlck = (struct dt_lock *) txLinelock(sdtlck);
4116                                slv = & sdtlck->lv[0];
4117                        }
4118
4119                        slv->offset = ssi;
4120                        ns = 0;
4121                }
4122
4123                /*
4124                 * move head/only segment of an entry
4125                 */
4126                /* get dst slot */
4127                h = d = &dp->slot[dsi];
4128
4129                /* get src slot and move */
4130                s = &sp->slot[ssi];
4131                if (sp->header.flag & BT_LEAF) {
4132                        /* get source entry */
4133                        slh = (struct ldtentry *) s;
4134                        dlh = (struct ldtentry *) h;
4135                        snamlen = slh->namlen;
4136
4137                        if (do_index) {
4138                                len = min(snamlen, DTLHDRDATALEN);
4139                                dlh->index = slh->index; /* little-endian */
4140                        } else
4141                                len = min(snamlen, DTLHDRDATALEN_LEGACY);
4142
4143                        memcpy(dlh, slh, 6 + len * 2);
4144
4145                        next = slh->next;
4146
4147                        /* update dst head/only segment next field */
4148                        dsi++;
4149                        dlh->next = dsi;
4150                } else {
4151                        sih = (struct idtentry *) s;
4152                        snamlen = sih->namlen;
4153
4154                        len = min(snamlen, DTIHDRDATALEN);
4155                        dih = (struct idtentry *) h;
4156                        memcpy(dih, sih, 10 + len * 2);
4157                        next = sih->next;
4158
4159                        dsi++;
4160                        dih->next = dsi;
4161                }
4162
4163                /* free src head/only segment */
4164                s->next = sfsi;
4165                s->cnt = 1;
4166                sfsi = ssi;
4167
4168                ns++;
4169                nd++;
4170                xssi = ssi;
4171
4172                /*
4173                 * move additional segment(s) of the entry
4174                 */
4175                snamlen -= len;
4176                while ((ssi = next) >= 0) {
4177                        /* is next slot contiguous ? */
4178                        if (ssi != xssi + 1) {
4179                                /* close current linelock */
4180                                slv->length = ns;
4181                                sdtlck->index++;
4182
4183                                /* open new linelock */
4184                                if (sdtlck->index < sdtlck->maxcnt)
4185                                        slv++;
4186                                else {
4187                                        sdtlck =
4188                                            (struct dt_lock *)
4189                                            txLinelock(sdtlck);
4190                                        slv = & sdtlck->lv[0];
4191                                }
4192
4193                                slv->offset = ssi;
4194                                ns = 0;
4195                        }
4196
4197                        /* get next source segment */
4198                        s = &sp->slot[ssi];
4199
4200                        /* get next destination free slot */
4201                        d++;
4202
4203                        len = min(snamlen, DTSLOTDATALEN);
4204                        UniStrncpy_le(d->name, s->name, len);
4205
4206                        ns++;
4207                        nd++;
4208                        xssi = ssi;
4209
4210                        dsi++;
4211                        d->next = dsi;
4212
4213                        /* free source segment */
4214                        next = s->next;
4215                        s->next = sfsi;
4216                        s->cnt = 1;
4217                        sfsi = ssi;
4218
4219                        snamlen -= len;
4220                }               /* end while */
4221
4222                /* terminate dst last/only segment */
4223                if (h == d) {
4224                        /* single segment entry */
4225                        if (dp->header.flag & BT_LEAF)
4226                                dlh->next = -1;
4227                        else
4228                                dih->next = -1;
4229                } else
4230                        /* multi-segment entry */
4231                        d->next = -1;
4232        }                       /* end for */
4233
4234        /* close current linelock */
4235        slv->length = ns;
4236        sdtlck->index++;
4237        *sdtlock = sdtlck;
4238
4239        dlv->length = nd;
4240        ddtlck->index++;
4241        *ddtlock = ddtlck;
4242
4243        /* update source header */
4244        sp->header.freelist = sfsi;
4245        sp->header.freecnt += nd;
4246
4247        /* update destination header */
4248        dp->header.nextindex = di;
4249
4250        dp->header.freelist = dsi;
4251        dp->header.freecnt -= nd;
4252}
4253
4254
4255/*
4256 *      dtDeleteEntry()
4257 *
4258 * function: free a (leaf/internal) entry
4259 *
4260 * log freelist header, stbl, and each segment slot of entry
4261 * (even though last/only segment next field is modified,
4262 * physical image logging requires all segment slots of
4263 * the entry logged to avoid applying previous updates
4264 * to the same slots)
4265 */
4266static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
4267{
4268        int fsi;                /* free entry slot index */
4269        s8 *stbl;
4270        struct dtslot *t;
4271        int si, freecnt;
4272        struct dt_lock *dtlck = *dtlock;
4273        struct lv *lv;
4274        int xsi, n;
4275
4276        /* get free entry slot index */
4277        stbl = DT_GETSTBL(p);
4278        fsi = stbl[fi];
4279
4280        /* open new linelock */
4281        if (dtlck->index >= dtlck->maxcnt)
4282                dtlck = (struct dt_lock *) txLinelock(dtlck);
4283        lv = & dtlck->lv[dtlck->index];
4284
4285        lv->offset = fsi;
4286
4287        /* get the head/only segment */
4288        t = &p->slot[fsi];
4289        if (p->header.flag & BT_LEAF)
4290                si = ((struct ldtentry *) t)->next;
4291        else
4292                si = ((struct idtentry *) t)->next;
4293        t->next = si;
4294        t->cnt = 1;
4295
4296        n = freecnt = 1;
4297        xsi = fsi;
4298
4299        /* find the last/only segment */
4300        while (si >= 0) {
4301                /* is next slot contiguous ? */
4302                if (si != xsi + 1) {
4303                        /* close current linelock */
4304                        lv->length = n;
4305                        dtlck->index++;
4306
4307                        /* open new linelock */
4308                        if (dtlck->index < dtlck->maxcnt)
4309                                lv++;
4310                        else {
4311                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4312                                lv = & dtlck->lv[0];
4313                        }
4314
4315                        lv->offset = si;
4316                        n = 0;
4317                }
4318
4319                n++;
4320                xsi = si;
4321                freecnt++;
4322
4323                t = &p->slot[si];
4324                t->cnt = 1;
4325                si = t->next;
4326        }
4327
4328        /* close current linelock */
4329        lv->length = n;
4330        dtlck->index++;
4331
4332        *dtlock = dtlck;
4333
4334        /* update freelist */
4335        t->next = p->header.freelist;
4336        p->header.freelist = fsi;
4337        p->header.freecnt += freecnt;
4338
4339        /* if delete from middle,
4340         * shift left the succedding entries in the stbl
4341         */
4342        si = p->header.nextindex;
4343        if (fi < si - 1)
4344                memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4345
4346        p->header.nextindex--;
4347}
4348
4349
4350/*
4351 *      dtTruncateEntry()
4352 *
4353 * function: truncate a (leaf/internal) entry
4354 *
4355 * log freelist header, stbl, and each segment slot of entry
4356 * (even though last/only segment next field is modified,
4357 * physical image logging requires all segment slots of
4358 * the entry logged to avoid applying previous updates
4359 * to the same slots)
4360 */
4361static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4362{
4363        int tsi;                /* truncate entry slot index */
4364        s8 *stbl;
4365        struct dtslot *t;
4366        int si, freecnt;
4367        struct dt_lock *dtlck = *dtlock;
4368        struct lv *lv;
4369        int fsi, xsi, n;
4370
4371        /* get free entry slot index */
4372        stbl = DT_GETSTBL(p);
4373        tsi = stbl[ti];
4374
4375        /* open new linelock */
4376        if (dtlck->index >= dtlck->maxcnt)
4377                dtlck = (struct dt_lock *) txLinelock(dtlck);
4378        lv = & dtlck->lv[dtlck->index];
4379
4380        lv->offset = tsi;
4381
4382        /* get the head/only segment */
4383        t = &p->slot[tsi];
4384        ASSERT(p->header.flag & BT_INTERNAL);
4385        ((struct idtentry *) t)->namlen = 0;
4386        si = ((struct idtentry *) t)->next;
4387        ((struct idtentry *) t)->next = -1;
4388
4389        n = 1;
4390        freecnt = 0;
4391        fsi = si;
4392        xsi = tsi;
4393
4394        /* find the last/only segment */
4395        while (si >= 0) {
4396                /* is next slot contiguous ? */
4397                if (si != xsi + 1) {
4398                        /* close current linelock */
4399                        lv->length = n;
4400                        dtlck->index++;
4401
4402                        /* open new linelock */
4403                        if (dtlck->index < dtlck->maxcnt)
4404                                lv++;
4405                        else {
4406                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4407                                lv = & dtlck->lv[0];
4408                        }
4409
4410                        lv->offset = si;
4411                        n = 0;
4412                }
4413
4414                n++;
4415                xsi = si;
4416                freecnt++;
4417
4418                t = &p->slot[si];
4419                t->cnt = 1;
4420                si = t->next;
4421        }
4422
4423        /* close current linelock */
4424        lv->length = n;
4425        dtlck->index++;
4426
4427        *dtlock = dtlck;
4428
4429        /* update freelist */
4430        if (freecnt == 0)
4431                return;
4432        t->next = p->header.freelist;
4433        p->header.freelist = fsi;
4434        p->header.freecnt += freecnt;
4435}
4436
4437
4438/*
4439 *      dtLinelockFreelist()
4440 */
4441static void dtLinelockFreelist(dtpage_t * p,    /* directory page */
4442                               int m,   /* max slot index */
4443                               struct dt_lock ** dtlock)
4444{
4445        int fsi;                /* free entry slot index */
4446        struct dtslot *t;
4447        int si;
4448        struct dt_lock *dtlck = *dtlock;
4449        struct lv *lv;
4450        int xsi, n;
4451
4452        /* get free entry slot index */
4453        fsi = p->header.freelist;
4454
4455        /* open new linelock */
4456        if (dtlck->index >= dtlck->maxcnt)
4457                dtlck = (struct dt_lock *) txLinelock(dtlck);
4458        lv = & dtlck->lv[dtlck->index];
4459
4460        lv->offset = fsi;
4461
4462        n = 1;
4463        xsi = fsi;
4464
4465        t = &p->slot[fsi];
4466        si = t->next;
4467
4468        /* find the last/only segment */
4469        while (si < m && si >= 0) {
4470                /* is next slot contiguous ? */
4471                if (si != xsi + 1) {
4472                        /* close current linelock */
4473                        lv->length = n;
4474                        dtlck->index++;
4475
4476                        /* open new linelock */
4477                        if (dtlck->index < dtlck->maxcnt)
4478                                lv++;
4479                        else {
4480                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4481                                lv = & dtlck->lv[0];
4482                        }
4483
4484                        lv->offset = si;
4485                        n = 0;
4486                }
4487
4488                n++;
4489                xsi = si;
4490
4491                t = &p->slot[si];
4492                si = t->next;
4493        }
4494
4495        /* close current linelock */
4496        lv->length = n;
4497        dtlck->index++;
4498
4499        *dtlock = dtlck;
4500}
4501
4502
4503/*
4504 * NAME: dtModify
4505 *
4506 * FUNCTION: Modify the inode number part of a directory entry
4507 *
4508 * PARAMETERS:
4509 *      tid     - Transaction id
4510 *      ip      - Inode of parent directory
4511 *      key     - Name of entry to be modified
4512 *      orig_ino        - Original inode number expected in entry
4513 *      new_ino - New inode number to put into entry
4514 *      flag    - JFS_RENAME
4515 *
4516 * RETURNS:
4517 *      -ESTALE - If entry found does not match orig_ino passed in
4518 *      -ENOENT - If no entry can be found to match key
4519 *      0       - If successfully modified entry
4520 */
4521int dtModify(tid_t tid, struct inode *ip,
4522         struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4523{
4524        int rc;
4525        s64 bn;
4526        struct metapage *mp;
4527        dtpage_t *p;
4528        int index;
4529        struct btstack btstack;
4530        struct tlock *tlck;
4531        struct dt_lock *dtlck;
4532        struct lv *lv;
4533        s8 *stbl;
4534        int entry_si;           /* entry slot index */
4535        struct ldtentry *entry;
4536
4537        /*
4538         *      search for the entry to modify:
4539         *
4540         * dtSearch() returns (leaf page pinned, index at which to modify).
4541         */
4542        if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4543                return rc;
4544
4545        /* retrieve search result */
4546        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4547
4548        BT_MARK_DIRTY(mp, ip);
4549        /*
4550         * acquire a transaction lock on the leaf page of named entry
4551         */
4552        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4553        dtlck = (struct dt_lock *) & tlck->lock;
4554
4555        /* get slot index of the entry */
4556        stbl = DT_GETSTBL(p);
4557        entry_si = stbl[index];
4558
4559        /* linelock entry */
4560        ASSERT(dtlck->index == 0);
4561        lv = & dtlck->lv[0];
4562        lv->offset = entry_si;
4563        lv->length = 1;
4564        dtlck->index++;
4565
4566        /* get the head/only segment */
4567        entry = (struct ldtentry *) & p->slot[entry_si];
4568
4569        /* substitute the inode number of the entry */
4570        entry->inumber = cpu_to_le32(new_ino);
4571
4572        /* unpin the leaf page */
4573        DT_PUTPAGE(mp);
4574
4575        return 0;
4576}
4577