linux/fs/jfs/jfs_xtree.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2005
   4 */
   5/*
   6 *      jfs_xtree.c: extent allocation descriptor B+-tree manager
   7 */
   8
   9#include <linux/fs.h>
  10#include <linux/module.h>
  11#include <linux/quotaops.h>
  12#include <linux/seq_file.h>
  13#include "jfs_incore.h"
  14#include "jfs_filsys.h"
  15#include "jfs_metapage.h"
  16#include "jfs_dmap.h"
  17#include "jfs_dinode.h"
  18#include "jfs_superblock.h"
  19#include "jfs_debug.h"
  20
  21/*
  22 * xtree local flag
  23 */
  24#define XT_INSERT       0x00000001
  25
  26/*
  27 *      xtree key/entry comparison: extent offset
  28 *
  29 * return:
  30 *      -1: k < start of extent
  31 *       0: start_of_extent <= k <= end_of_extent
  32 *       1: k > end_of_extent
  33 */
  34#define XT_CMP(CMP, K, X, OFFSET64)\
  35{\
  36        OFFSET64 = offsetXAD(X);\
  37        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
  38                ((K) < OFFSET64) ? -1 : 0;\
  39}
  40
  41/* write a xad entry */
  42#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
  43{\
  44        (XAD)->flag = (FLAG);\
  45        XADoffset((XAD), (OFF));\
  46        XADlength((XAD), (LEN));\
  47        XADaddress((XAD), (ADDR));\
  48}
  49
  50#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
  51
  52/* get page buffer for specified block address */
  53/* ToDo: Replace this ugly macro with a function */
  54#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)                             \
  55do {                                                                    \
  56        BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot);        \
  57        if (!(RC)) {                                                    \
  58                if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
  59                    (le16_to_cpu((P)->header.nextindex) >               \
  60                     le16_to_cpu((P)->header.maxentry)) ||              \
  61                    (le16_to_cpu((P)->header.maxentry) >                \
  62                     (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
  63                        jfs_error((IP)->i_sb,                           \
  64                                  "XT_GETPAGE: xtree page corrupt\n");  \
  65                        BT_PUTPAGE(MP);                                 \
  66                        MP = NULL;                                      \
  67                        RC = -EIO;                                      \
  68                }                                                       \
  69        }                                                               \
  70} while (0)
  71
  72/* for consistency */
  73#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
  74
  75#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
  76        BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
  77/* xtree entry parameter descriptor */
  78struct xtsplit {
  79        struct metapage *mp;
  80        s16 index;
  81        u8 flag;
  82        s64 off;
  83        s64 addr;
  84        int len;
  85        struct pxdlist *pxdlist;
  86};
  87
  88
  89/*
  90 *      statistics
  91 */
  92#ifdef CONFIG_JFS_STATISTICS
  93static struct {
  94        uint search;
  95        uint fastSearch;
  96        uint split;
  97} xtStat;
  98#endif
  99
 100
 101/*
 102 * forward references
 103 */
 104static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
 105                    struct btstack * btstack, int flag);
 106
 107static int xtSplitUp(tid_t tid,
 108                     struct inode *ip,
 109                     struct xtsplit * split, struct btstack * btstack);
 110
 111static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
 112                       struct metapage ** rmpp, s64 * rbnp);
 113
 114static int xtSplitRoot(tid_t tid, struct inode *ip,
 115                       struct xtsplit * split, struct metapage ** rmpp);
 116
 117#ifdef _STILL_TO_PORT
 118static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 119                      xtpage_t * fp, struct btstack * btstack);
 120
 121static int xtSearchNode(struct inode *ip,
 122                        xad_t * xad,
 123                        int *cmpp, struct btstack * btstack, int flag);
 124
 125static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 126#endif                          /*  _STILL_TO_PORT */
 127
 128/*
 129 *      xtLookup()
 130 *
 131 * function: map a single page into a physical extent;
 132 */
 133int xtLookup(struct inode *ip, s64 lstart,
 134             s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
 135{
 136        int rc = 0;
 137        struct btstack btstack;
 138        int cmp;
 139        s64 bn;
 140        struct metapage *mp;
 141        xtpage_t *p;
 142        int index;
 143        xad_t *xad;
 144        s64 next, size, xoff, xend;
 145        int xlen;
 146        s64 xaddr;
 147
 148        *paddr = 0;
 149        *plen = llen;
 150
 151        if (!no_check) {
 152                /* is lookup offset beyond eof ? */
 153                size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
 154                    JFS_SBI(ip->i_sb)->l2bsize;
 155                if (lstart >= size)
 156                        return 0;
 157        }
 158
 159        /*
 160         * search for the xad entry covering the logical extent
 161         */
 162//search:
 163        if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
 164                jfs_err("xtLookup: xtSearch returned %d", rc);
 165                return rc;
 166        }
 167
 168        /*
 169         *      compute the physical extent covering logical extent
 170         *
 171         * N.B. search may have failed (e.g., hole in sparse file),
 172         * and returned the index of the next entry.
 173         */
 174        /* retrieve search result */
 175        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 176
 177        /* is xad found covering start of logical extent ?
 178         * lstart is a page start address,
 179         * i.e., lstart cannot start in a hole;
 180         */
 181        if (cmp) {
 182                if (next)
 183                        *plen = min(next - lstart, llen);
 184                goto out;
 185        }
 186
 187        /*
 188         * lxd covered by xad
 189         */
 190        xad = &p->xad[index];
 191        xoff = offsetXAD(xad);
 192        xlen = lengthXAD(xad);
 193        xend = xoff + xlen;
 194        xaddr = addressXAD(xad);
 195
 196        /* initialize new pxd */
 197        *pflag = xad->flag;
 198        *paddr = xaddr + (lstart - xoff);
 199        /* a page must be fully covered by an xad */
 200        *plen = min(xend - lstart, llen);
 201
 202      out:
 203        XT_PUTPAGE(mp);
 204
 205        return rc;
 206}
 207
 208/*
 209 *      xtSearch()
 210 *
 211 * function:    search for the xad entry covering specified offset.
 212 *
 213 * parameters:
 214 *      ip      - file object;
 215 *      xoff    - extent offset;
 216 *      nextp   - address of next extent (if any) for search miss
 217 *      cmpp    - comparison result:
 218 *      btstack - traverse stack;
 219 *      flag    - search process flag (XT_INSERT);
 220 *
 221 * returns:
 222 *      btstack contains (bn, index) of search path traversed to the entry.
 223 *      *cmpp is set to result of comparison with the entry returned.
 224 *      the page containing the entry is pinned at exit.
 225 */
 226static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
 227                    int *cmpp, struct btstack * btstack, int flag)
 228{
 229        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 230        int rc = 0;
 231        int cmp = 1;            /* init for empty page */
 232        s64 bn;                 /* block number */
 233        struct metapage *mp;    /* page buffer */
 234        xtpage_t *p;            /* page */
 235        xad_t *xad;
 236        int base, index, lim, btindex;
 237        struct btframe *btsp;
 238        int nsplit = 0;         /* number of pages to split */
 239        s64 t64;
 240        s64 next = 0;
 241
 242        INCREMENT(xtStat.search);
 243
 244        BT_CLR(btstack);
 245
 246        btstack->nsplit = 0;
 247
 248        /*
 249         *      search down tree from root:
 250         *
 251         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 252         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 253         *
 254         * if entry with search key K is not found
 255         * internal page search find the entry with largest key Ki
 256         * less than K which point to the child page to search;
 257         * leaf page search find the entry with smallest key Kj
 258         * greater than K so that the returned index is the position of
 259         * the entry to be shifted right for insertion of new entry.
 260         * for empty tree, search key is greater than any key of the tree.
 261         *
 262         * by convention, root bn = 0.
 263         */
 264        for (bn = 0;;) {
 265                /* get/pin the page to search */
 266                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 267                if (rc)
 268                        return rc;
 269
 270                /* try sequential access heuristics with the previous
 271                 * access entry in target leaf page:
 272                 * once search narrowed down into the target leaf,
 273                 * key must either match an entry in the leaf or
 274                 * key entry does not exist in the tree;
 275                 */
 276//fastSearch:
 277                if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
 278                    (p->header.flag & BT_LEAF) &&
 279                    (index = jfs_ip->btindex) <
 280                    le16_to_cpu(p->header.nextindex)) {
 281                        xad = &p->xad[index];
 282                        t64 = offsetXAD(xad);
 283                        if (xoff < t64 + lengthXAD(xad)) {
 284                                if (xoff >= t64) {
 285                                        *cmpp = 0;
 286                                        goto out;
 287                                }
 288
 289                                /* stop sequential access heuristics */
 290                                goto binarySearch;
 291                        } else {        /* (t64 + lengthXAD(xad)) <= xoff */
 292
 293                                /* try next sequential entry */
 294                                index++;
 295                                if (index <
 296                                    le16_to_cpu(p->header.nextindex)) {
 297                                        xad++;
 298                                        t64 = offsetXAD(xad);
 299                                        if (xoff < t64 + lengthXAD(xad)) {
 300                                                if (xoff >= t64) {
 301                                                        *cmpp = 0;
 302                                                        goto out;
 303                                                }
 304
 305                                                /* miss: key falls between
 306                                                 * previous and this entry
 307                                                 */
 308                                                *cmpp = 1;
 309                                                next = t64;
 310                                                goto out;
 311                                        }
 312
 313                                        /* (xoff >= t64 + lengthXAD(xad));
 314                                         * matching entry may be further out:
 315                                         * stop heuristic search
 316                                         */
 317                                        /* stop sequential access heuristics */
 318                                        goto binarySearch;
 319                                }
 320
 321                                /* (index == p->header.nextindex);
 322                                 * miss: key entry does not exist in
 323                                 * the target leaf/tree
 324                                 */
 325                                *cmpp = 1;
 326                                goto out;
 327                        }
 328
 329                        /*
 330                         * if hit, return index of the entry found, and
 331                         * if miss, where new entry with search key is
 332                         * to be inserted;
 333                         */
 334                      out:
 335                        /* compute number of pages to split */
 336                        if (flag & XT_INSERT) {
 337                                if (p->header.nextindex ==      /* little-endian */
 338                                    p->header.maxentry)
 339                                        nsplit++;
 340                                else
 341                                        nsplit = 0;
 342                                btstack->nsplit = nsplit;
 343                        }
 344
 345                        /* save search result */
 346                        btsp = btstack->top;
 347                        btsp->bn = bn;
 348                        btsp->index = index;
 349                        btsp->mp = mp;
 350
 351                        /* update sequential access heuristics */
 352                        jfs_ip->btindex = index;
 353
 354                        if (nextp)
 355                                *nextp = next;
 356
 357                        INCREMENT(xtStat.fastSearch);
 358                        return 0;
 359                }
 360
 361                /* well, ... full search now */
 362              binarySearch:
 363                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
 364
 365                /*
 366                 * binary search with search key K on the current page
 367                 */
 368                for (base = XTENTRYSTART; lim; lim >>= 1) {
 369                        index = base + (lim >> 1);
 370
 371                        XT_CMP(cmp, xoff, &p->xad[index], t64);
 372                        if (cmp == 0) {
 373                                /*
 374                                 *      search hit
 375                                 */
 376                                /* search hit - leaf page:
 377                                 * return the entry found
 378                                 */
 379                                if (p->header.flag & BT_LEAF) {
 380                                        *cmpp = cmp;
 381
 382                                        /* compute number of pages to split */
 383                                        if (flag & XT_INSERT) {
 384                                                if (p->header.nextindex ==
 385                                                    p->header.maxentry)
 386                                                        nsplit++;
 387                                                else
 388                                                        nsplit = 0;
 389                                                btstack->nsplit = nsplit;
 390                                        }
 391
 392                                        /* save search result */
 393                                        btsp = btstack->top;
 394                                        btsp->bn = bn;
 395                                        btsp->index = index;
 396                                        btsp->mp = mp;
 397
 398                                        /* init sequential access heuristics */
 399                                        btindex = jfs_ip->btindex;
 400                                        if (index == btindex ||
 401                                            index == btindex + 1)
 402                                                jfs_ip->btorder = BT_SEQUENTIAL;
 403                                        else
 404                                                jfs_ip->btorder = BT_RANDOM;
 405                                        jfs_ip->btindex = index;
 406
 407                                        return 0;
 408                                }
 409                                /* search hit - internal page:
 410                                 * descend/search its child page
 411                                 */
 412                                if (index < le16_to_cpu(p->header.nextindex)-1)
 413                                        next = offsetXAD(&p->xad[index + 1]);
 414                                goto next;
 415                        }
 416
 417                        if (cmp > 0) {
 418                                base = index + 1;
 419                                --lim;
 420                        }
 421                }
 422
 423                /*
 424                 *      search miss
 425                 *
 426                 * base is the smallest index with key (Kj) greater than
 427                 * search key (K) and may be zero or maxentry index.
 428                 */
 429                if (base < le16_to_cpu(p->header.nextindex))
 430                        next = offsetXAD(&p->xad[base]);
 431                /*
 432                 * search miss - leaf page:
 433                 *
 434                 * return location of entry (base) where new entry with
 435                 * search key K is to be inserted.
 436                 */
 437                if (p->header.flag & BT_LEAF) {
 438                        *cmpp = cmp;
 439
 440                        /* compute number of pages to split */
 441                        if (flag & XT_INSERT) {
 442                                if (p->header.nextindex ==
 443                                    p->header.maxentry)
 444                                        nsplit++;
 445                                else
 446                                        nsplit = 0;
 447                                btstack->nsplit = nsplit;
 448                        }
 449
 450                        /* save search result */
 451                        btsp = btstack->top;
 452                        btsp->bn = bn;
 453                        btsp->index = base;
 454                        btsp->mp = mp;
 455
 456                        /* init sequential access heuristics */
 457                        btindex = jfs_ip->btindex;
 458                        if (base == btindex || base == btindex + 1)
 459                                jfs_ip->btorder = BT_SEQUENTIAL;
 460                        else
 461                                jfs_ip->btorder = BT_RANDOM;
 462                        jfs_ip->btindex = base;
 463
 464                        if (nextp)
 465                                *nextp = next;
 466
 467                        return 0;
 468                }
 469
 470                /*
 471                 * search miss - non-leaf page:
 472                 *
 473                 * if base is non-zero, decrement base by one to get the parent
 474                 * entry of the child page to search.
 475                 */
 476                index = base ? base - 1 : base;
 477
 478                /*
 479                 * go down to child page
 480                 */
 481              next:
 482                /* update number of pages to split */
 483                if (p->header.nextindex == p->header.maxentry)
 484                        nsplit++;
 485                else
 486                        nsplit = 0;
 487
 488                /* push (bn, index) of the parent page/entry */
 489                if (BT_STACK_FULL(btstack)) {
 490                        jfs_error(ip->i_sb, "stack overrun!\n");
 491                        XT_PUTPAGE(mp);
 492                        return -EIO;
 493                }
 494                BT_PUSH(btstack, bn, index);
 495
 496                /* get the child page block number */
 497                bn = addressXAD(&p->xad[index]);
 498
 499                /* unpin the parent page */
 500                XT_PUTPAGE(mp);
 501        }
 502}
 503
 504/*
 505 *      xtInsert()
 506 *
 507 * function:
 508 *
 509 * parameter:
 510 *      tid     - transaction id;
 511 *      ip      - file object;
 512 *      xflag   - extent flag (XAD_NOTRECORDED):
 513 *      xoff    - extent offset;
 514 *      xlen    - extent length;
 515 *      xaddrp  - extent address pointer (in/out):
 516 *              if (*xaddrp)
 517 *                      caller allocated data extent at *xaddrp;
 518 *              else
 519 *                      allocate data extent and return its xaddr;
 520 *      flag    -
 521 *
 522 * return:
 523 */
 524int xtInsert(tid_t tid,         /* transaction id */
 525             struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
 526             int flag)
 527{
 528        int rc = 0;
 529        s64 xaddr, hint;
 530        struct metapage *mp;    /* meta-page buffer */
 531        xtpage_t *p;            /* base B+-tree index page */
 532        s64 bn;
 533        int index, nextindex;
 534        struct btstack btstack; /* traverse stack */
 535        struct xtsplit split;   /* split information */
 536        xad_t *xad;
 537        int cmp;
 538        s64 next;
 539        struct tlock *tlck;
 540        struct xtlock *xtlck;
 541
 542        jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
 543
 544        /*
 545         *      search for the entry location at which to insert:
 546         *
 547         * xtFastSearch() and xtSearch() both returns (leaf page
 548         * pinned, index at which to insert).
 549         * n.b. xtSearch() may return index of maxentry of
 550         * the full page.
 551         */
 552        if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
 553                return rc;
 554
 555        /* retrieve search result */
 556        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 557
 558        /* This test must follow XT_GETSEARCH since mp must be valid if
 559         * we branch to out: */
 560        if ((cmp == 0) || (next && (xlen > next - xoff))) {
 561                rc = -EEXIST;
 562                goto out;
 563        }
 564
 565        /*
 566         * allocate data extent requested
 567         *
 568         * allocation hint: last xad
 569         */
 570        if ((xaddr = *xaddrp) == 0) {
 571                if (index > XTENTRYSTART) {
 572                        xad = &p->xad[index - 1];
 573                        hint = addressXAD(xad) + lengthXAD(xad) - 1;
 574                } else
 575                        hint = 0;
 576                if ((rc = dquot_alloc_block(ip, xlen)))
 577                        goto out;
 578                if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
 579                        dquot_free_block(ip, xlen);
 580                        goto out;
 581                }
 582        }
 583
 584        /*
 585         *      insert entry for new extent
 586         */
 587        xflag |= XAD_NEW;
 588
 589        /*
 590         *      if the leaf page is full, split the page and
 591         *      propagate up the router entry for the new page from split
 592         *
 593         * The xtSplitUp() will insert the entry and unpin the leaf page.
 594         */
 595        nextindex = le16_to_cpu(p->header.nextindex);
 596        if (nextindex == le16_to_cpu(p->header.maxentry)) {
 597                split.mp = mp;
 598                split.index = index;
 599                split.flag = xflag;
 600                split.off = xoff;
 601                split.len = xlen;
 602                split.addr = xaddr;
 603                split.pxdlist = NULL;
 604                if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
 605                        /* undo data extent allocation */
 606                        if (*xaddrp == 0) {
 607                                dbFree(ip, xaddr, (s64) xlen);
 608                                dquot_free_block(ip, xlen);
 609                        }
 610                        return rc;
 611                }
 612
 613                *xaddrp = xaddr;
 614                return 0;
 615        }
 616
 617        /*
 618         *      insert the new entry into the leaf page
 619         */
 620        /*
 621         * acquire a transaction lock on the leaf page;
 622         *
 623         * action: xad insertion/extension;
 624         */
 625        BT_MARK_DIRTY(mp, ip);
 626
 627        /* if insert into middle, shift right remaining entries. */
 628        if (index < nextindex)
 629                memmove(&p->xad[index + 1], &p->xad[index],
 630                        (nextindex - index) * sizeof(xad_t));
 631
 632        /* insert the new entry: mark the entry NEW */
 633        xad = &p->xad[index];
 634        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 635
 636        /* advance next available entry index */
 637        le16_add_cpu(&p->header.nextindex, 1);
 638
 639        /* Don't log it if there are no links to the file */
 640        if (!test_cflag(COMMIT_Nolink, ip)) {
 641                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 642                xtlck = (struct xtlock *) & tlck->lock;
 643                xtlck->lwm.offset =
 644                    (xtlck->lwm.offset) ? min(index,
 645                                              (int)xtlck->lwm.offset) : index;
 646                xtlck->lwm.length =
 647                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
 648        }
 649
 650        *xaddrp = xaddr;
 651
 652      out:
 653        /* unpin the leaf page */
 654        XT_PUTPAGE(mp);
 655
 656        return rc;
 657}
 658
 659
 660/*
 661 *      xtSplitUp()
 662 *
 663 * function:
 664 *      split full pages as propagating insertion up the tree
 665 *
 666 * parameter:
 667 *      tid     - transaction id;
 668 *      ip      - file object;
 669 *      split   - entry parameter descriptor;
 670 *      btstack - traverse stack from xtSearch()
 671 *
 672 * return:
 673 */
 674static int
 675xtSplitUp(tid_t tid,
 676          struct inode *ip, struct xtsplit * split, struct btstack * btstack)
 677{
 678        int rc = 0;
 679        struct metapage *smp;
 680        xtpage_t *sp;           /* split page */
 681        struct metapage *rmp;
 682        s64 rbn;                /* new right page block number */
 683        struct metapage *rcmp;
 684        xtpage_t *rcp;          /* right child page */
 685        s64 rcbn;               /* right child page block number */
 686        int skip;               /* index of entry of insertion */
 687        int nextindex;          /* next available entry index of p */
 688        struct btframe *parent; /* parent page entry on traverse stack */
 689        xad_t *xad;
 690        s64 xaddr;
 691        int xlen;
 692        int nsplit;             /* number of pages split */
 693        struct pxdlist pxdlist;
 694        pxd_t *pxd;
 695        struct tlock *tlck;
 696        struct xtlock *xtlck;
 697
 698        smp = split->mp;
 699        sp = XT_PAGE(ip, smp);
 700
 701        /* is inode xtree root extension/inline EA area free ? */
 702        if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
 703            (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
 704            (JFS_IP(ip)->mode2 & INLINEEA)) {
 705                sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
 706                JFS_IP(ip)->mode2 &= ~INLINEEA;
 707
 708                BT_MARK_DIRTY(smp, ip);
 709                /*
 710                 * acquire a transaction lock on the leaf page;
 711                 *
 712                 * action: xad insertion/extension;
 713                 */
 714
 715                /* if insert into middle, shift right remaining entries. */
 716                skip = split->index;
 717                nextindex = le16_to_cpu(sp->header.nextindex);
 718                if (skip < nextindex)
 719                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
 720                                (nextindex - skip) * sizeof(xad_t));
 721
 722                /* insert the new entry: mark the entry NEW */
 723                xad = &sp->xad[skip];
 724                XT_PUTENTRY(xad, split->flag, split->off, split->len,
 725                            split->addr);
 726
 727                /* advance next available entry index */
 728                le16_add_cpu(&sp->header.nextindex, 1);
 729
 730                /* Don't log it if there are no links to the file */
 731                if (!test_cflag(COMMIT_Nolink, ip)) {
 732                        tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
 733                        xtlck = (struct xtlock *) & tlck->lock;
 734                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
 735                            min(skip, (int)xtlck->lwm.offset) : skip;
 736                        xtlck->lwm.length =
 737                            le16_to_cpu(sp->header.nextindex) -
 738                            xtlck->lwm.offset;
 739                }
 740
 741                return 0;
 742        }
 743
 744        /*
 745         * allocate new index blocks to cover index page split(s)
 746         *
 747         * allocation hint: ?
 748         */
 749        if (split->pxdlist == NULL) {
 750                nsplit = btstack->nsplit;
 751                split->pxdlist = &pxdlist;
 752                pxdlist.maxnpxd = pxdlist.npxd = 0;
 753                pxd = &pxdlist.pxd[0];
 754                xlen = JFS_SBI(ip->i_sb)->nbperpage;
 755                for (; nsplit > 0; nsplit--, pxd++) {
 756                        if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
 757                            == 0) {
 758                                PXDaddress(pxd, xaddr);
 759                                PXDlength(pxd, xlen);
 760
 761                                pxdlist.maxnpxd++;
 762
 763                                continue;
 764                        }
 765
 766                        /* undo allocation */
 767
 768                        XT_PUTPAGE(smp);
 769                        return rc;
 770                }
 771        }
 772
 773        /*
 774         * Split leaf page <sp> into <sp> and a new right page <rp>.
 775         *
 776         * The split routines insert the new entry into the leaf page,
 777         * and acquire txLock as appropriate.
 778         * return <rp> pinned and its block number <rpbn>.
 779         */
 780        rc = (sp->header.flag & BT_ROOT) ?
 781            xtSplitRoot(tid, ip, split, &rmp) :
 782            xtSplitPage(tid, ip, split, &rmp, &rbn);
 783
 784        XT_PUTPAGE(smp);
 785
 786        if (rc)
 787                return -EIO;
 788        /*
 789         * propagate up the router entry for the leaf page just split
 790         *
 791         * insert a router entry for the new page into the parent page,
 792         * propagate the insert/split up the tree by walking back the stack
 793         * of (bn of parent page, index of child page entry in parent page)
 794         * that were traversed during the search for the page that split.
 795         *
 796         * the propagation of insert/split up the tree stops if the root
 797         * splits or the page inserted into doesn't have to split to hold
 798         * the new entry.
 799         *
 800         * the parent entry for the split page remains the same, and
 801         * a new entry is inserted at its right with the first key and
 802         * block number of the new right page.
 803         *
 804         * There are a maximum of 3 pages pinned at any time:
 805         * right child, left parent and right parent (when the parent splits)
 806         * to keep the child page pinned while working on the parent.
 807         * make sure that all pins are released at exit.
 808         */
 809        while ((parent = BT_POP(btstack)) != NULL) {
 810                /* parent page specified by stack frame <parent> */
 811
 812                /* keep current child pages <rcp> pinned */
 813                rcmp = rmp;
 814                rcbn = rbn;
 815                rcp = XT_PAGE(ip, rcmp);
 816
 817                /*
 818                 * insert router entry in parent for new right child page <rp>
 819                 */
 820                /* get/pin the parent page <sp> */
 821                XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
 822                if (rc) {
 823                        XT_PUTPAGE(rcmp);
 824                        return rc;
 825                }
 826
 827                /*
 828                 * The new key entry goes ONE AFTER the index of parent entry,
 829                 * because the split was to the right.
 830                 */
 831                skip = parent->index + 1;
 832
 833                /*
 834                 * split or shift right remaining entries of the parent page
 835                 */
 836                nextindex = le16_to_cpu(sp->header.nextindex);
 837                /*
 838                 * parent page is full - split the parent page
 839                 */
 840                if (nextindex == le16_to_cpu(sp->header.maxentry)) {
 841                        /* init for parent page split */
 842                        split->mp = smp;
 843                        split->index = skip;    /* index at insert */
 844                        split->flag = XAD_NEW;
 845                        split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
 846                        split->len = JFS_SBI(ip->i_sb)->nbperpage;
 847                        split->addr = rcbn;
 848
 849                        /* unpin previous right child page */
 850                        XT_PUTPAGE(rcmp);
 851
 852                        /* The split routines insert the new entry,
 853                         * and acquire txLock as appropriate.
 854                         * return <rp> pinned and its block number <rpbn>.
 855                         */
 856                        rc = (sp->header.flag & BT_ROOT) ?
 857                            xtSplitRoot(tid, ip, split, &rmp) :
 858                            xtSplitPage(tid, ip, split, &rmp, &rbn);
 859                        if (rc) {
 860                                XT_PUTPAGE(smp);
 861                                return rc;
 862                        }
 863
 864                        XT_PUTPAGE(smp);
 865                        /* keep new child page <rp> pinned */
 866                }
 867                /*
 868                 * parent page is not full - insert in parent page
 869                 */
 870                else {
 871                        /*
 872                         * insert router entry in parent for the right child
 873                         * page from the first entry of the right child page:
 874                         */
 875                        /*
 876                         * acquire a transaction lock on the parent page;
 877                         *
 878                         * action: router xad insertion;
 879                         */
 880                        BT_MARK_DIRTY(smp, ip);
 881
 882                        /*
 883                         * if insert into middle, shift right remaining entries
 884                         */
 885                        if (skip < nextindex)
 886                                memmove(&sp->xad[skip + 1], &sp->xad[skip],
 887                                        (nextindex -
 888                                         skip) << L2XTSLOTSIZE);
 889
 890                        /* insert the router entry */
 891                        xad = &sp->xad[skip];
 892                        XT_PUTENTRY(xad, XAD_NEW,
 893                                    offsetXAD(&rcp->xad[XTENTRYSTART]),
 894                                    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
 895
 896                        /* advance next available entry index. */
 897                        le16_add_cpu(&sp->header.nextindex, 1);
 898
 899                        /* Don't log it if there are no links to the file */
 900                        if (!test_cflag(COMMIT_Nolink, ip)) {
 901                                tlck = txLock(tid, ip, smp,
 902                                              tlckXTREE | tlckGROW);
 903                                xtlck = (struct xtlock *) & tlck->lock;
 904                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
 905                                    min(skip, (int)xtlck->lwm.offset) : skip;
 906                                xtlck->lwm.length =
 907                                    le16_to_cpu(sp->header.nextindex) -
 908                                    xtlck->lwm.offset;
 909                        }
 910
 911                        /* unpin parent page */
 912                        XT_PUTPAGE(smp);
 913
 914                        /* exit propagate up */
 915                        break;
 916                }
 917        }
 918
 919        /* unpin current right page */
 920        XT_PUTPAGE(rmp);
 921
 922        return 0;
 923}
 924
 925
 926/*
 927 *      xtSplitPage()
 928 *
 929 * function:
 930 *      split a full non-root page into
 931 *      original/split/left page and new right page
 932 *      i.e., the original/split page remains as left page.
 933 *
 934 * parameter:
 935 *      int             tid,
 936 *      struct inode    *ip,
 937 *      struct xtsplit  *split,
 938 *      struct metapage **rmpp,
 939 *      u64             *rbnp,
 940 *
 941 * return:
 942 *      Pointer to page in which to insert or NULL on error.
 943 */
 944static int
 945xtSplitPage(tid_t tid, struct inode *ip,
 946            struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
 947{
 948        int rc = 0;
 949        struct metapage *smp;
 950        xtpage_t *sp;
 951        struct metapage *rmp;
 952        xtpage_t *rp;           /* new right page allocated */
 953        s64 rbn;                /* new right page block number */
 954        struct metapage *mp;
 955        xtpage_t *p;
 956        s64 nextbn;
 957        int skip, maxentry, middle, righthalf, n;
 958        xad_t *xad;
 959        struct pxdlist *pxdlist;
 960        pxd_t *pxd;
 961        struct tlock *tlck;
 962        struct xtlock *sxtlck = NULL, *rxtlck = NULL;
 963        int quota_allocation = 0;
 964
 965        smp = split->mp;
 966        sp = XT_PAGE(ip, smp);
 967
 968        INCREMENT(xtStat.split);
 969
 970        pxdlist = split->pxdlist;
 971        pxd = &pxdlist->pxd[pxdlist->npxd];
 972        pxdlist->npxd++;
 973        rbn = addressPXD(pxd);
 974
 975        /* Allocate blocks to quota. */
 976        rc = dquot_alloc_block(ip, lengthPXD(pxd));
 977        if (rc)
 978                goto clean_up;
 979
 980        quota_allocation += lengthPXD(pxd);
 981
 982        /*
 983         * allocate the new right page for the split
 984         */
 985        rmp = get_metapage(ip, rbn, PSIZE, 1);
 986        if (rmp == NULL) {
 987                rc = -EIO;
 988                goto clean_up;
 989        }
 990
 991        jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
 992
 993        BT_MARK_DIRTY(rmp, ip);
 994        /*
 995         * action: new page;
 996         */
 997
 998        rp = (xtpage_t *) rmp->data;
 999        rp->header.self = *pxd;
1000        rp->header.flag = sp->header.flag & BT_TYPE;
1001        rp->header.maxentry = sp->header.maxentry;      /* little-endian */
1002        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1003
1004        BT_MARK_DIRTY(smp, ip);
1005        /* Don't log it if there are no links to the file */
1006        if (!test_cflag(COMMIT_Nolink, ip)) {
1007                /*
1008                 * acquire a transaction lock on the new right page;
1009                 */
1010                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1011                rxtlck = (struct xtlock *) & tlck->lock;
1012                rxtlck->lwm.offset = XTENTRYSTART;
1013                /*
1014                 * acquire a transaction lock on the split page
1015                 */
1016                tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1017                sxtlck = (struct xtlock *) & tlck->lock;
1018        }
1019
1020        /*
1021         * initialize/update sibling pointers of <sp> and <rp>
1022         */
1023        nextbn = le64_to_cpu(sp->header.next);
1024        rp->header.next = cpu_to_le64(nextbn);
1025        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1026        sp->header.next = cpu_to_le64(rbn);
1027
1028        skip = split->index;
1029
1030        /*
1031         *      sequential append at tail (after last entry of last page)
1032         *
1033         * if splitting the last page on a level because of appending
1034         * a entry to it (skip is maxentry), it's likely that the access is
1035         * sequential. adding an empty page on the side of the level is less
1036         * work and can push the fill factor much higher than normal.
1037         * if we're wrong it's no big deal -  we will do the split the right
1038         * way next time.
1039         * (it may look like it's equally easy to do a similar hack for
1040         * reverse sorted data, that is, split the tree left, but it's not.
1041         * Be my guest.)
1042         */
1043        if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1044                /*
1045                 * acquire a transaction lock on the new/right page;
1046                 *
1047                 * action: xad insertion;
1048                 */
1049                /* insert entry at the first entry of the new right page */
1050                xad = &rp->xad[XTENTRYSTART];
1051                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1052                            split->addr);
1053
1054                rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1055
1056                if (!test_cflag(COMMIT_Nolink, ip)) {
1057                        /* rxtlck->lwm.offset = XTENTRYSTART; */
1058                        rxtlck->lwm.length = 1;
1059                }
1060
1061                *rmpp = rmp;
1062                *rbnp = rbn;
1063
1064                jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1065                return 0;
1066        }
1067
1068        /*
1069         *      non-sequential insert (at possibly middle page)
1070         */
1071
1072        /*
1073         * update previous pointer of old next/right page of <sp>
1074         */
1075        if (nextbn != 0) {
1076                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1077                if (rc) {
1078                        XT_PUTPAGE(rmp);
1079                        goto clean_up;
1080                }
1081
1082                BT_MARK_DIRTY(mp, ip);
1083                /*
1084                 * acquire a transaction lock on the next page;
1085                 *
1086                 * action:sibling pointer update;
1087                 */
1088                if (!test_cflag(COMMIT_Nolink, ip))
1089                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1090
1091                p->header.prev = cpu_to_le64(rbn);
1092
1093                /* sibling page may have been updated previously, or
1094                 * it may be updated later;
1095                 */
1096
1097                XT_PUTPAGE(mp);
1098        }
1099
1100        /*
1101         * split the data between the split and new/right pages
1102         */
1103        maxentry = le16_to_cpu(sp->header.maxentry);
1104        middle = maxentry >> 1;
1105        righthalf = maxentry - middle;
1106
1107        /*
1108         * skip index in old split/left page - insert into left page:
1109         */
1110        if (skip <= middle) {
1111                /* move right half of split page to the new right page */
1112                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1113                        righthalf << L2XTSLOTSIZE);
1114
1115                /* shift right tail of left half to make room for new entry */
1116                if (skip < middle)
1117                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
1118                                (middle - skip) << L2XTSLOTSIZE);
1119
1120                /* insert new entry */
1121                xad = &sp->xad[skip];
1122                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1123                            split->addr);
1124
1125                /* update page header */
1126                sp->header.nextindex = cpu_to_le16(middle + 1);
1127                if (!test_cflag(COMMIT_Nolink, ip)) {
1128                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1129                            min(skip, (int)sxtlck->lwm.offset) : skip;
1130                }
1131
1132                rp->header.nextindex =
1133                    cpu_to_le16(XTENTRYSTART + righthalf);
1134        }
1135        /*
1136         * skip index in new right page - insert into right page:
1137         */
1138        else {
1139                /* move left head of right half to right page */
1140                n = skip - middle;
1141                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1142                        n << L2XTSLOTSIZE);
1143
1144                /* insert new entry */
1145                n += XTENTRYSTART;
1146                xad = &rp->xad[n];
1147                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1148                            split->addr);
1149
1150                /* move right tail of right half to right page */
1151                if (skip < maxentry)
1152                        memmove(&rp->xad[n + 1], &sp->xad[skip],
1153                                (maxentry - skip) << L2XTSLOTSIZE);
1154
1155                /* update page header */
1156                sp->header.nextindex = cpu_to_le16(middle);
1157                if (!test_cflag(COMMIT_Nolink, ip)) {
1158                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1159                            min(middle, (int)sxtlck->lwm.offset) : middle;
1160                }
1161
1162                rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1163                                                   righthalf + 1);
1164        }
1165
1166        if (!test_cflag(COMMIT_Nolink, ip)) {
1167                sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1168                    sxtlck->lwm.offset;
1169
1170                /* rxtlck->lwm.offset = XTENTRYSTART; */
1171                rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1172                    XTENTRYSTART;
1173        }
1174
1175        *rmpp = rmp;
1176        *rbnp = rbn;
1177
1178        jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1179        return rc;
1180
1181      clean_up:
1182
1183        /* Rollback quota allocation. */
1184        if (quota_allocation)
1185                dquot_free_block(ip, quota_allocation);
1186
1187        return (rc);
1188}
1189
1190
1191/*
1192 *      xtSplitRoot()
1193 *
1194 * function:
1195 *      split the full root page into original/root/split page and new
1196 *      right page
1197 *      i.e., root remains fixed in tree anchor (inode) and the root is
1198 *      copied to a single new right child page since root page <<
1199 *      non-root page, and the split root page contains a single entry
1200 *      for the new right child page.
1201 *
1202 * parameter:
1203 *      int             tid,
1204 *      struct inode    *ip,
1205 *      struct xtsplit  *split,
1206 *      struct metapage **rmpp)
1207 *
1208 * return:
1209 *      Pointer to page in which to insert or NULL on error.
1210 */
1211static int
1212xtSplitRoot(tid_t tid,
1213            struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1214{
1215        xtpage_t *sp;
1216        struct metapage *rmp;
1217        xtpage_t *rp;
1218        s64 rbn;
1219        int skip, nextindex;
1220        xad_t *xad;
1221        pxd_t *pxd;
1222        struct pxdlist *pxdlist;
1223        struct tlock *tlck;
1224        struct xtlock *xtlck;
1225        int rc;
1226
1227        sp = &JFS_IP(ip)->i_xtroot;
1228
1229        INCREMENT(xtStat.split);
1230
1231        /*
1232         *      allocate a single (right) child page
1233         */
1234        pxdlist = split->pxdlist;
1235        pxd = &pxdlist->pxd[pxdlist->npxd];
1236        pxdlist->npxd++;
1237        rbn = addressPXD(pxd);
1238        rmp = get_metapage(ip, rbn, PSIZE, 1);
1239        if (rmp == NULL)
1240                return -EIO;
1241
1242        /* Allocate blocks to quota. */
1243        rc = dquot_alloc_block(ip, lengthPXD(pxd));
1244        if (rc) {
1245                release_metapage(rmp);
1246                return rc;
1247        }
1248
1249        jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1250
1251        /*
1252         * acquire a transaction lock on the new right page;
1253         *
1254         * action: new page;
1255         */
1256        BT_MARK_DIRTY(rmp, ip);
1257
1258        rp = (xtpage_t *) rmp->data;
1259        rp->header.flag =
1260            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1261        rp->header.self = *pxd;
1262        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1263        rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1264
1265        /* initialize sibling pointers */
1266        rp->header.next = 0;
1267        rp->header.prev = 0;
1268
1269        /*
1270         * copy the in-line root page into new right page extent
1271         */
1272        nextindex = le16_to_cpu(sp->header.maxentry);
1273        memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1274                (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1275
1276        /*
1277         * insert the new entry into the new right/child page
1278         * (skip index in the new right page will not change)
1279         */
1280        skip = split->index;
1281        /* if insert into middle, shift right remaining entries */
1282        if (skip != nextindex)
1283                memmove(&rp->xad[skip + 1], &rp->xad[skip],
1284                        (nextindex - skip) * sizeof(xad_t));
1285
1286        xad = &rp->xad[skip];
1287        XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1288
1289        /* update page header */
1290        rp->header.nextindex = cpu_to_le16(nextindex + 1);
1291
1292        if (!test_cflag(COMMIT_Nolink, ip)) {
1293                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1294                xtlck = (struct xtlock *) & tlck->lock;
1295                xtlck->lwm.offset = XTENTRYSTART;
1296                xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1297                    XTENTRYSTART;
1298        }
1299
1300        /*
1301         *      reset the root
1302         *
1303         * init root with the single entry for the new right page
1304         * set the 1st entry offset to 0, which force the left-most key
1305         * at any level of the tree to be less than any search key.
1306         */
1307        /*
1308         * acquire a transaction lock on the root page (in-memory inode);
1309         *
1310         * action: root split;
1311         */
1312        BT_MARK_DIRTY(split->mp, ip);
1313
1314        xad = &sp->xad[XTENTRYSTART];
1315        XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1316
1317        /* update page header of root */
1318        sp->header.flag &= ~BT_LEAF;
1319        sp->header.flag |= BT_INTERNAL;
1320
1321        sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1322
1323        if (!test_cflag(COMMIT_Nolink, ip)) {
1324                tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1325                xtlck = (struct xtlock *) & tlck->lock;
1326                xtlck->lwm.offset = XTENTRYSTART;
1327                xtlck->lwm.length = 1;
1328        }
1329
1330        *rmpp = rmp;
1331
1332        jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1333        return 0;
1334}
1335
1336
1337/*
1338 *      xtExtend()
1339 *
1340 * function: extend in-place;
1341 *
1342 * note: existing extent may or may not have been committed.
1343 * caller is responsible for pager buffer cache update, and
1344 * working block allocation map update;
1345 * update pmap: alloc whole extended extent;
1346 */
1347int xtExtend(tid_t tid,         /* transaction id */
1348             struct inode *ip, s64 xoff,        /* delta extent offset */
1349             s32 xlen,          /* delta extent length */
1350             int flag)
1351{
1352        int rc = 0;
1353        int cmp;
1354        struct metapage *mp;    /* meta-page buffer */
1355        xtpage_t *p;            /* base B+-tree index page */
1356        s64 bn;
1357        int index, nextindex, len;
1358        struct btstack btstack; /* traverse stack */
1359        struct xtsplit split;   /* split information */
1360        xad_t *xad;
1361        s64 xaddr;
1362        struct tlock *tlck;
1363        struct xtlock *xtlck = NULL;
1364
1365        jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1366
1367        /* there must exist extent to be extended */
1368        if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1369                return rc;
1370
1371        /* retrieve search result */
1372        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1373
1374        if (cmp != 0) {
1375                XT_PUTPAGE(mp);
1376                jfs_error(ip->i_sb, "xtSearch did not find extent\n");
1377                return -EIO;
1378        }
1379
1380        /* extension must be contiguous */
1381        xad = &p->xad[index];
1382        if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1383                XT_PUTPAGE(mp);
1384                jfs_error(ip->i_sb, "extension is not contiguous\n");
1385                return -EIO;
1386        }
1387
1388        /*
1389         * acquire a transaction lock on the leaf page;
1390         *
1391         * action: xad insertion/extension;
1392         */
1393        BT_MARK_DIRTY(mp, ip);
1394        if (!test_cflag(COMMIT_Nolink, ip)) {
1395                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1396                xtlck = (struct xtlock *) & tlck->lock;
1397        }
1398
1399        /* extend will overflow extent ? */
1400        xlen = lengthXAD(xad) + xlen;
1401        if ((len = xlen - MAXXLEN) <= 0)
1402                goto extendOld;
1403
1404        /*
1405         *      extent overflow: insert entry for new extent
1406         */
1407//insertNew:
1408        xoff = offsetXAD(xad) + MAXXLEN;
1409        xaddr = addressXAD(xad) + MAXXLEN;
1410        nextindex = le16_to_cpu(p->header.nextindex);
1411
1412        /*
1413         *      if the leaf page is full, insert the new entry and
1414         *      propagate up the router entry for the new page from split
1415         *
1416         * The xtSplitUp() will insert the entry and unpin the leaf page.
1417         */
1418        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1419                /* xtSpliUp() unpins leaf pages */
1420                split.mp = mp;
1421                split.index = index + 1;
1422                split.flag = XAD_NEW;
1423                split.off = xoff;       /* split offset */
1424                split.len = len;
1425                split.addr = xaddr;
1426                split.pxdlist = NULL;
1427                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1428                        return rc;
1429
1430                /* get back old page */
1431                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1432                if (rc)
1433                        return rc;
1434                /*
1435                 * if leaf root has been split, original root has been
1436                 * copied to new child page, i.e., original entry now
1437                 * resides on the new child page;
1438                 */
1439                if (p->header.flag & BT_INTERNAL) {
1440                        ASSERT(p->header.nextindex ==
1441                               cpu_to_le16(XTENTRYSTART + 1));
1442                        xad = &p->xad[XTENTRYSTART];
1443                        bn = addressXAD(xad);
1444                        XT_PUTPAGE(mp);
1445
1446                        /* get new child page */
1447                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1448                        if (rc)
1449                                return rc;
1450
1451                        BT_MARK_DIRTY(mp, ip);
1452                        if (!test_cflag(COMMIT_Nolink, ip)) {
1453                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1454                                xtlck = (struct xtlock *) & tlck->lock;
1455                        }
1456                }
1457        }
1458        /*
1459         *      insert the new entry into the leaf page
1460         */
1461        else {
1462                /* insert the new entry: mark the entry NEW */
1463                xad = &p->xad[index + 1];
1464                XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1465
1466                /* advance next available entry index */
1467                le16_add_cpu(&p->header.nextindex, 1);
1468        }
1469
1470        /* get back old entry */
1471        xad = &p->xad[index];
1472        xlen = MAXXLEN;
1473
1474        /*
1475         * extend old extent
1476         */
1477      extendOld:
1478        XADlength(xad, xlen);
1479        if (!(xad->flag & XAD_NEW))
1480                xad->flag |= XAD_EXTENDED;
1481
1482        if (!test_cflag(COMMIT_Nolink, ip)) {
1483                xtlck->lwm.offset =
1484                    (xtlck->lwm.offset) ? min(index,
1485                                              (int)xtlck->lwm.offset) : index;
1486                xtlck->lwm.length =
1487                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1488        }
1489
1490        /* unpin the leaf page */
1491        XT_PUTPAGE(mp);
1492
1493        return rc;
1494}
1495
1496#ifdef _NOTYET
1497/*
1498 *      xtTailgate()
1499 *
1500 * function: split existing 'tail' extent
1501 *      (split offset >= start offset of tail extent), and
1502 *      relocate and extend the split tail half;
1503 *
1504 * note: existing extent may or may not have been committed.
1505 * caller is responsible for pager buffer cache update, and
1506 * working block allocation map update;
1507 * update pmap: free old split tail extent, alloc new extent;
1508 */
1509int xtTailgate(tid_t tid,               /* transaction id */
1510               struct inode *ip, s64 xoff,      /* split/new extent offset */
1511               s32 xlen,        /* new extent length */
1512               s64 xaddr,       /* new extent address */
1513               int flag)
1514{
1515        int rc = 0;
1516        int cmp;
1517        struct metapage *mp;    /* meta-page buffer */
1518        xtpage_t *p;            /* base B+-tree index page */
1519        s64 bn;
1520        int index, nextindex, llen, rlen;
1521        struct btstack btstack; /* traverse stack */
1522        struct xtsplit split;   /* split information */
1523        xad_t *xad;
1524        struct tlock *tlck;
1525        struct xtlock *xtlck = 0;
1526        struct tlock *mtlck;
1527        struct maplock *pxdlock;
1528
1529/*
1530printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1531        (ulong)xoff, xlen, (ulong)xaddr);
1532*/
1533
1534        /* there must exist extent to be tailgated */
1535        if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1536                return rc;
1537
1538        /* retrieve search result */
1539        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1540
1541        if (cmp != 0) {
1542                XT_PUTPAGE(mp);
1543                jfs_error(ip->i_sb, "couldn't find extent\n");
1544                return -EIO;
1545        }
1546
1547        /* entry found must be last entry */
1548        nextindex = le16_to_cpu(p->header.nextindex);
1549        if (index != nextindex - 1) {
1550                XT_PUTPAGE(mp);
1551                jfs_error(ip->i_sb, "the entry found is not the last entry\n");
1552                return -EIO;
1553        }
1554
1555        BT_MARK_DIRTY(mp, ip);
1556        /*
1557         * acquire tlock of the leaf page containing original entry
1558         */
1559        if (!test_cflag(COMMIT_Nolink, ip)) {
1560                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1561                xtlck = (struct xtlock *) & tlck->lock;
1562        }
1563
1564        /* completely replace extent ? */
1565        xad = &p->xad[index];
1566/*
1567printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1568        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1569*/
1570        if ((llen = xoff - offsetXAD(xad)) == 0)
1571                goto updateOld;
1572
1573        /*
1574         *      partially replace extent: insert entry for new extent
1575         */
1576//insertNew:
1577        /*
1578         *      if the leaf page is full, insert the new entry and
1579         *      propagate up the router entry for the new page from split
1580         *
1581         * The xtSplitUp() will insert the entry and unpin the leaf page.
1582         */
1583        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1584                /* xtSpliUp() unpins leaf pages */
1585                split.mp = mp;
1586                split.index = index + 1;
1587                split.flag = XAD_NEW;
1588                split.off = xoff;       /* split offset */
1589                split.len = xlen;
1590                split.addr = xaddr;
1591                split.pxdlist = NULL;
1592                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1593                        return rc;
1594
1595                /* get back old page */
1596                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1597                if (rc)
1598                        return rc;
1599                /*
1600                 * if leaf root has been split, original root has been
1601                 * copied to new child page, i.e., original entry now
1602                 * resides on the new child page;
1603                 */
1604                if (p->header.flag & BT_INTERNAL) {
1605                        ASSERT(p->header.nextindex ==
1606                               cpu_to_le16(XTENTRYSTART + 1));
1607                        xad = &p->xad[XTENTRYSTART];
1608                        bn = addressXAD(xad);
1609                        XT_PUTPAGE(mp);
1610
1611                        /* get new child page */
1612                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1613                        if (rc)
1614                                return rc;
1615
1616                        BT_MARK_DIRTY(mp, ip);
1617                        if (!test_cflag(COMMIT_Nolink, ip)) {
1618                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1619                                xtlck = (struct xtlock *) & tlck->lock;
1620                        }
1621                }
1622        }
1623        /*
1624         *      insert the new entry into the leaf page
1625         */
1626        else {
1627                /* insert the new entry: mark the entry NEW */
1628                xad = &p->xad[index + 1];
1629                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1630
1631                /* advance next available entry index */
1632                le16_add_cpu(&p->header.nextindex, 1);
1633        }
1634
1635        /* get back old XAD */
1636        xad = &p->xad[index];
1637
1638        /*
1639         * truncate/relocate old extent at split offset
1640         */
1641      updateOld:
1642        /* update dmap for old/committed/truncated extent */
1643        rlen = lengthXAD(xad) - llen;
1644        if (!(xad->flag & XAD_NEW)) {
1645                /* free from PWMAP at commit */
1646                if (!test_cflag(COMMIT_Nolink, ip)) {
1647                        mtlck = txMaplock(tid, ip, tlckMAP);
1648                        pxdlock = (struct maplock *) & mtlck->lock;
1649                        pxdlock->flag = mlckFREEPXD;
1650                        PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1651                        PXDlength(&pxdlock->pxd, rlen);
1652                        pxdlock->index = 1;
1653                }
1654        } else
1655                /* free from WMAP */
1656                dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1657
1658        if (llen)
1659                /* truncate */
1660                XADlength(xad, llen);
1661        else
1662                /* replace */
1663                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1664
1665        if (!test_cflag(COMMIT_Nolink, ip)) {
1666                xtlck->lwm.offset = (xtlck->lwm.offset) ?
1667                    min(index, (int)xtlck->lwm.offset) : index;
1668                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1669                    xtlck->lwm.offset;
1670        }
1671
1672        /* unpin the leaf page */
1673        XT_PUTPAGE(mp);
1674
1675        return rc;
1676}
1677#endif /* _NOTYET */
1678
1679/*
1680 *      xtUpdate()
1681 *
1682 * function: update XAD;
1683 *
1684 *      update extent for allocated_but_not_recorded or
1685 *      compressed extent;
1686 *
1687 * parameter:
1688 *      nxad    - new XAD;
1689 *              logical extent of the specified XAD must be completely
1690 *              contained by an existing XAD;
1691 */
1692int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1693{                               /* new XAD */
1694        int rc = 0;
1695        int cmp;
1696        struct metapage *mp;    /* meta-page buffer */
1697        xtpage_t *p;            /* base B+-tree index page */
1698        s64 bn;
1699        int index0, index, newindex, nextindex;
1700        struct btstack btstack; /* traverse stack */
1701        struct xtsplit split;   /* split information */
1702        xad_t *xad, *lxad, *rxad;
1703        int xflag;
1704        s64 nxoff, xoff;
1705        int nxlen, xlen, lxlen, rxlen;
1706        s64 nxaddr, xaddr;
1707        struct tlock *tlck;
1708        struct xtlock *xtlck = NULL;
1709        int newpage = 0;
1710
1711        /* there must exist extent to be tailgated */
1712        nxoff = offsetXAD(nxad);
1713        nxlen = lengthXAD(nxad);
1714        nxaddr = addressXAD(nxad);
1715
1716        if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1717                return rc;
1718
1719        /* retrieve search result */
1720        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1721
1722        if (cmp != 0) {
1723                XT_PUTPAGE(mp);
1724                jfs_error(ip->i_sb, "Could not find extent\n");
1725                return -EIO;
1726        }
1727
1728        BT_MARK_DIRTY(mp, ip);
1729        /*
1730         * acquire tlock of the leaf page containing original entry
1731         */
1732        if (!test_cflag(COMMIT_Nolink, ip)) {
1733                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1734                xtlck = (struct xtlock *) & tlck->lock;
1735        }
1736
1737        xad = &p->xad[index0];
1738        xflag = xad->flag;
1739        xoff = offsetXAD(xad);
1740        xlen = lengthXAD(xad);
1741        xaddr = addressXAD(xad);
1742
1743        /* nXAD must be completely contained within XAD */
1744        if ((xoff > nxoff) ||
1745            (nxoff + nxlen > xoff + xlen)) {
1746                XT_PUTPAGE(mp);
1747                jfs_error(ip->i_sb,
1748                          "nXAD in not completely contained within XAD\n");
1749                return -EIO;
1750        }
1751
1752        index = index0;
1753        newindex = index + 1;
1754        nextindex = le16_to_cpu(p->header.nextindex);
1755
1756#ifdef  _JFS_WIP_NOCOALESCE
1757        if (xoff < nxoff)
1758                goto updateRight;
1759
1760        /*
1761         * replace XAD with nXAD
1762         */
1763      replace:                  /* (nxoff == xoff) */
1764        if (nxlen == xlen) {
1765                /* replace XAD with nXAD:recorded */
1766                *xad = *nxad;
1767                xad->flag = xflag & ~XAD_NOTRECORDED;
1768
1769                goto out;
1770        } else                  /* (nxlen < xlen) */
1771                goto updateLeft;
1772#endif                          /* _JFS_WIP_NOCOALESCE */
1773
1774/* #ifdef _JFS_WIP_COALESCE */
1775        if (xoff < nxoff)
1776                goto coalesceRight;
1777
1778        /*
1779         * coalesce with left XAD
1780         */
1781//coalesceLeft: /* (xoff == nxoff) */
1782        /* is XAD first entry of page ? */
1783        if (index == XTENTRYSTART)
1784                goto replace;
1785
1786        /* is nXAD logically and physically contiguous with lXAD ? */
1787        lxad = &p->xad[index - 1];
1788        lxlen = lengthXAD(lxad);
1789        if (!(lxad->flag & XAD_NOTRECORDED) &&
1790            (nxoff == offsetXAD(lxad) + lxlen) &&
1791            (nxaddr == addressXAD(lxad) + lxlen) &&
1792            (lxlen + nxlen < MAXXLEN)) {
1793                /* extend right lXAD */
1794                index0 = index - 1;
1795                XADlength(lxad, lxlen + nxlen);
1796
1797                /* If we just merged two extents together, need to make sure the
1798                 * right extent gets logged.  If the left one is marked XAD_NEW,
1799                 * then we know it will be logged.  Otherwise, mark as
1800                 * XAD_EXTENDED
1801                 */
1802                if (!(lxad->flag & XAD_NEW))
1803                        lxad->flag |= XAD_EXTENDED;
1804
1805                if (xlen > nxlen) {
1806                        /* truncate XAD */
1807                        XADoffset(xad, xoff + nxlen);
1808                        XADlength(xad, xlen - nxlen);
1809                        XADaddress(xad, xaddr + nxlen);
1810                        goto out;
1811                } else {        /* (xlen == nxlen) */
1812
1813                        /* remove XAD */
1814                        if (index < nextindex - 1)
1815                                memmove(&p->xad[index], &p->xad[index + 1],
1816                                        (nextindex - index -
1817                                         1) << L2XTSLOTSIZE);
1818
1819                        p->header.nextindex =
1820                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1821                                        1);
1822
1823                        index = index0;
1824                        newindex = index + 1;
1825                        nextindex = le16_to_cpu(p->header.nextindex);
1826                        xoff = nxoff = offsetXAD(lxad);
1827                        xlen = nxlen = lxlen + nxlen;
1828                        xaddr = nxaddr = addressXAD(lxad);
1829                        goto coalesceRight;
1830                }
1831        }
1832
1833        /*
1834         * replace XAD with nXAD
1835         */
1836      replace:                  /* (nxoff == xoff) */
1837        if (nxlen == xlen) {
1838                /* replace XAD with nXAD:recorded */
1839                *xad = *nxad;
1840                xad->flag = xflag & ~XAD_NOTRECORDED;
1841
1842                goto coalesceRight;
1843        } else                  /* (nxlen < xlen) */
1844                goto updateLeft;
1845
1846        /*
1847         * coalesce with right XAD
1848         */
1849      coalesceRight:            /* (xoff <= nxoff) */
1850        /* is XAD last entry of page ? */
1851        if (newindex == nextindex) {
1852                if (xoff == nxoff)
1853                        goto out;
1854                goto updateRight;
1855        }
1856
1857        /* is nXAD logically and physically contiguous with rXAD ? */
1858        rxad = &p->xad[index + 1];
1859        rxlen = lengthXAD(rxad);
1860        if (!(rxad->flag & XAD_NOTRECORDED) &&
1861            (nxoff + nxlen == offsetXAD(rxad)) &&
1862            (nxaddr + nxlen == addressXAD(rxad)) &&
1863            (rxlen + nxlen < MAXXLEN)) {
1864                /* extend left rXAD */
1865                XADoffset(rxad, nxoff);
1866                XADlength(rxad, rxlen + nxlen);
1867                XADaddress(rxad, nxaddr);
1868
1869                /* If we just merged two extents together, need to make sure
1870                 * the left extent gets logged.  If the right one is marked
1871                 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
1872                 * XAD_EXTENDED
1873                 */
1874                if (!(rxad->flag & XAD_NEW))
1875                        rxad->flag |= XAD_EXTENDED;
1876
1877                if (xlen > nxlen)
1878                        /* truncate XAD */
1879                        XADlength(xad, xlen - nxlen);
1880                else {          /* (xlen == nxlen) */
1881
1882                        /* remove XAD */
1883                        memmove(&p->xad[index], &p->xad[index + 1],
1884                                (nextindex - index - 1) << L2XTSLOTSIZE);
1885
1886                        p->header.nextindex =
1887                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1888                                        1);
1889                }
1890
1891                goto out;
1892        } else if (xoff == nxoff)
1893                goto out;
1894
1895        if (xoff >= nxoff) {
1896                XT_PUTPAGE(mp);
1897                jfs_error(ip->i_sb, "xoff >= nxoff\n");
1898                return -EIO;
1899        }
1900/* #endif _JFS_WIP_COALESCE */
1901
1902        /*
1903         * split XAD into (lXAD, nXAD):
1904         *
1905         *          |---nXAD--->
1906         * --|----------XAD----------|--
1907         *   |-lXAD-|
1908         */
1909      updateRight:              /* (xoff < nxoff) */
1910        /* truncate old XAD as lXAD:not_recorded */
1911        xad = &p->xad[index];
1912        XADlength(xad, nxoff - xoff);
1913
1914        /* insert nXAD:recorded */
1915        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1916
1917                /* xtSpliUp() unpins leaf pages */
1918                split.mp = mp;
1919                split.index = newindex;
1920                split.flag = xflag & ~XAD_NOTRECORDED;
1921                split.off = nxoff;
1922                split.len = nxlen;
1923                split.addr = nxaddr;
1924                split.pxdlist = NULL;
1925                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1926                        return rc;
1927
1928                /* get back old page */
1929                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1930                if (rc)
1931                        return rc;
1932                /*
1933                 * if leaf root has been split, original root has been
1934                 * copied to new child page, i.e., original entry now
1935                 * resides on the new child page;
1936                 */
1937                if (p->header.flag & BT_INTERNAL) {
1938                        ASSERT(p->header.nextindex ==
1939                               cpu_to_le16(XTENTRYSTART + 1));
1940                        xad = &p->xad[XTENTRYSTART];
1941                        bn = addressXAD(xad);
1942                        XT_PUTPAGE(mp);
1943
1944                        /* get new child page */
1945                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1946                        if (rc)
1947                                return rc;
1948
1949                        BT_MARK_DIRTY(mp, ip);
1950                        if (!test_cflag(COMMIT_Nolink, ip)) {
1951                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1952                                xtlck = (struct xtlock *) & tlck->lock;
1953                        }
1954                } else {
1955                        /* is nXAD on new page ? */
1956                        if (newindex >
1957                            (le16_to_cpu(p->header.maxentry) >> 1)) {
1958                                newindex =
1959                                    newindex -
1960                                    le16_to_cpu(p->header.nextindex) +
1961                                    XTENTRYSTART;
1962                                newpage = 1;
1963                        }
1964                }
1965        } else {
1966                /* if insert into middle, shift right remaining entries */
1967                if (newindex < nextindex)
1968                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
1969                                (nextindex - newindex) << L2XTSLOTSIZE);
1970
1971                /* insert the entry */
1972                xad = &p->xad[newindex];
1973                *xad = *nxad;
1974                xad->flag = xflag & ~XAD_NOTRECORDED;
1975
1976                /* advance next available entry index. */
1977                p->header.nextindex =
1978                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1979        }
1980
1981        /*
1982         * does nXAD force 3-way split ?
1983         *
1984         *          |---nXAD--->|
1985         * --|----------XAD-------------|--
1986         *   |-lXAD-|           |-rXAD -|
1987         */
1988        if (nxoff + nxlen == xoff + xlen)
1989                goto out;
1990
1991        /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
1992        if (newpage) {
1993                /* close out old page */
1994                if (!test_cflag(COMMIT_Nolink, ip)) {
1995                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
1996                            min(index0, (int)xtlck->lwm.offset) : index0;
1997                        xtlck->lwm.length =
1998                            le16_to_cpu(p->header.nextindex) -
1999                            xtlck->lwm.offset;
2000                }
2001
2002                bn = le64_to_cpu(p->header.next);
2003                XT_PUTPAGE(mp);
2004
2005                /* get new right page */
2006                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2007                if (rc)
2008                        return rc;
2009
2010                BT_MARK_DIRTY(mp, ip);
2011                if (!test_cflag(COMMIT_Nolink, ip)) {
2012                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2013                        xtlck = (struct xtlock *) & tlck->lock;
2014                }
2015
2016                index0 = index = newindex;
2017        } else
2018                index++;
2019
2020        newindex = index + 1;
2021        nextindex = le16_to_cpu(p->header.nextindex);
2022        xlen = xlen - (nxoff - xoff);
2023        xoff = nxoff;
2024        xaddr = nxaddr;
2025
2026        /* recompute split pages */
2027        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2028                XT_PUTPAGE(mp);
2029
2030                if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
2031                        return rc;
2032
2033                /* retrieve search result */
2034                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2035
2036                if (cmp != 0) {
2037                        XT_PUTPAGE(mp);
2038                        jfs_error(ip->i_sb, "xtSearch failed\n");
2039                        return -EIO;
2040                }
2041
2042                if (index0 != index) {
2043                        XT_PUTPAGE(mp);
2044                        jfs_error(ip->i_sb, "unexpected value of index\n");
2045                        return -EIO;
2046                }
2047        }
2048
2049        /*
2050         * split XAD into (nXAD, rXAD)
2051         *
2052         *          ---nXAD---|
2053         * --|----------XAD----------|--
2054         *                    |-rXAD-|
2055         */
2056      updateLeft:               /* (nxoff == xoff) && (nxlen < xlen) */
2057        /* update old XAD with nXAD:recorded */
2058        xad = &p->xad[index];
2059        *xad = *nxad;
2060        xad->flag = xflag & ~XAD_NOTRECORDED;
2061
2062        /* insert rXAD:not_recorded */
2063        xoff = xoff + nxlen;
2064        xlen = xlen - nxlen;
2065        xaddr = xaddr + nxlen;
2066        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2067/*
2068printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2069*/
2070                /* xtSpliUp() unpins leaf pages */
2071                split.mp = mp;
2072                split.index = newindex;
2073                split.flag = xflag;
2074                split.off = xoff;
2075                split.len = xlen;
2076                split.addr = xaddr;
2077                split.pxdlist = NULL;
2078                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2079                        return rc;
2080
2081                /* get back old page */
2082                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2083                if (rc)
2084                        return rc;
2085
2086                /*
2087                 * if leaf root has been split, original root has been
2088                 * copied to new child page, i.e., original entry now
2089                 * resides on the new child page;
2090                 */
2091                if (p->header.flag & BT_INTERNAL) {
2092                        ASSERT(p->header.nextindex ==
2093                               cpu_to_le16(XTENTRYSTART + 1));
2094                        xad = &p->xad[XTENTRYSTART];
2095                        bn = addressXAD(xad);
2096                        XT_PUTPAGE(mp);
2097
2098                        /* get new child page */
2099                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2100                        if (rc)
2101                                return rc;
2102
2103                        BT_MARK_DIRTY(mp, ip);
2104                        if (!test_cflag(COMMIT_Nolink, ip)) {
2105                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2106                                xtlck = (struct xtlock *) & tlck->lock;
2107                        }
2108                }
2109        } else {
2110                /* if insert into middle, shift right remaining entries */
2111                if (newindex < nextindex)
2112                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
2113                                (nextindex - newindex) << L2XTSLOTSIZE);
2114
2115                /* insert the entry */
2116                xad = &p->xad[newindex];
2117                XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2118
2119                /* advance next available entry index. */
2120                p->header.nextindex =
2121                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2122        }
2123
2124      out:
2125        if (!test_cflag(COMMIT_Nolink, ip)) {
2126                xtlck->lwm.offset = (xtlck->lwm.offset) ?
2127                    min(index0, (int)xtlck->lwm.offset) : index0;
2128                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2129                    xtlck->lwm.offset;
2130        }
2131
2132        /* unpin the leaf page */
2133        XT_PUTPAGE(mp);
2134
2135        return rc;
2136}
2137
2138
2139/*
2140 *      xtAppend()
2141 *
2142 * function: grow in append mode from contiguous region specified ;
2143 *
2144 * parameter:
2145 *      tid             - transaction id;
2146 *      ip              - file object;
2147 *      xflag           - extent flag:
2148 *      xoff            - extent offset;
2149 *      maxblocks       - max extent length;
2150 *      xlen            - extent length (in/out);
2151 *      xaddrp          - extent address pointer (in/out):
2152 *      flag            -
2153 *
2154 * return:
2155 */
2156int xtAppend(tid_t tid,         /* transaction id */
2157             struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2158             s32 * xlenp,       /* (in/out) */
2159             s64 * xaddrp,      /* (in/out) */
2160             int flag)
2161{
2162        int rc = 0;
2163        struct metapage *mp;    /* meta-page buffer */
2164        xtpage_t *p;            /* base B+-tree index page */
2165        s64 bn, xaddr;
2166        int index, nextindex;
2167        struct btstack btstack; /* traverse stack */
2168        struct xtsplit split;   /* split information */
2169        xad_t *xad;
2170        int cmp;
2171        struct tlock *tlck;
2172        struct xtlock *xtlck;
2173        int nsplit, nblocks, xlen;
2174        struct pxdlist pxdlist;
2175        pxd_t *pxd;
2176        s64 next;
2177
2178        xaddr = *xaddrp;
2179        xlen = *xlenp;
2180        jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2181                 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2182
2183        /*
2184         *      search for the entry location at which to insert:
2185         *
2186         * xtFastSearch() and xtSearch() both returns (leaf page
2187         * pinned, index at which to insert).
2188         * n.b. xtSearch() may return index of maxentry of
2189         * the full page.
2190         */
2191        if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
2192                return rc;
2193
2194        /* retrieve search result */
2195        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2196
2197        if (cmp == 0) {
2198                rc = -EEXIST;
2199                goto out;
2200        }
2201
2202        if (next)
2203                xlen = min(xlen, (int)(next - xoff));
2204//insert:
2205        /*
2206         *      insert entry for new extent
2207         */
2208        xflag |= XAD_NEW;
2209
2210        /*
2211         *      if the leaf page is full, split the page and
2212         *      propagate up the router entry for the new page from split
2213         *
2214         * The xtSplitUp() will insert the entry and unpin the leaf page.
2215         */
2216        nextindex = le16_to_cpu(p->header.nextindex);
2217        if (nextindex < le16_to_cpu(p->header.maxentry))
2218                goto insertLeaf;
2219
2220        /*
2221         * allocate new index blocks to cover index page split(s)
2222         */
2223        nsplit = btstack.nsplit;
2224        split.pxdlist = &pxdlist;
2225        pxdlist.maxnpxd = pxdlist.npxd = 0;
2226        pxd = &pxdlist.pxd[0];
2227        nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2228        for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2229                if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2230                        PXDaddress(pxd, xaddr);
2231                        PXDlength(pxd, nblocks);
2232
2233                        pxdlist.maxnpxd++;
2234
2235                        continue;
2236                }
2237
2238                /* undo allocation */
2239
2240                goto out;
2241        }
2242
2243        xlen = min(xlen, maxblocks);
2244
2245        /*
2246         * allocate data extent requested
2247         */
2248        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2249                goto out;
2250
2251        split.mp = mp;
2252        split.index = index;
2253        split.flag = xflag;
2254        split.off = xoff;
2255        split.len = xlen;
2256        split.addr = xaddr;
2257        if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2258                /* undo data extent allocation */
2259                dbFree(ip, *xaddrp, (s64) * xlenp);
2260
2261                return rc;
2262        }
2263
2264        *xaddrp = xaddr;
2265        *xlenp = xlen;
2266        return 0;
2267
2268        /*
2269         *      insert the new entry into the leaf page
2270         */
2271      insertLeaf:
2272        /*
2273         * allocate data extent requested
2274         */
2275        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2276                goto out;
2277
2278        BT_MARK_DIRTY(mp, ip);
2279        /*
2280         * acquire a transaction lock on the leaf page;
2281         *
2282         * action: xad insertion/extension;
2283         */
2284        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2285        xtlck = (struct xtlock *) & tlck->lock;
2286
2287        /* insert the new entry: mark the entry NEW */
2288        xad = &p->xad[index];
2289        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2290
2291        /* advance next available entry index */
2292        le16_add_cpu(&p->header.nextindex, 1);
2293
2294        xtlck->lwm.offset =
2295            (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2296        xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2297            xtlck->lwm.offset;
2298
2299        *xaddrp = xaddr;
2300        *xlenp = xlen;
2301
2302      out:
2303        /* unpin the leaf page */
2304        XT_PUTPAGE(mp);
2305
2306        return rc;
2307}
2308#ifdef _STILL_TO_PORT
2309
2310/* - TBD for defragmentaion/reorganization -
2311 *
2312 *      xtDelete()
2313 *
2314 * function:
2315 *      delete the entry with the specified key.
2316 *
2317 *      N.B.: whole extent of the entry is assumed to be deleted.
2318 *
2319 * parameter:
2320 *
2321 * return:
2322 *      ENOENT: if the entry is not found.
2323 *
2324 * exception:
2325 */
2326int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2327{
2328        int rc = 0;
2329        struct btstack btstack;
2330        int cmp;
2331        s64 bn;
2332        struct metapage *mp;
2333        xtpage_t *p;
2334        int index, nextindex;
2335        struct tlock *tlck;
2336        struct xtlock *xtlck;
2337
2338        /*
2339         * find the matching entry; xtSearch() pins the page
2340         */
2341        if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2342                return rc;
2343
2344        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2345        if (cmp) {
2346                /* unpin the leaf page */
2347                XT_PUTPAGE(mp);
2348                return -ENOENT;
2349        }
2350
2351        /*
2352         * delete the entry from the leaf page
2353         */
2354        nextindex = le16_to_cpu(p->header.nextindex);
2355        le16_add_cpu(&p->header.nextindex, -1);
2356
2357        /*
2358         * if the leaf page bocome empty, free the page
2359         */
2360        if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2361                return (xtDeleteUp(tid, ip, mp, p, &btstack));
2362
2363        BT_MARK_DIRTY(mp, ip);
2364        /*
2365         * acquire a transaction lock on the leaf page;
2366         *
2367         * action:xad deletion;
2368         */
2369        tlck = txLock(tid, ip, mp, tlckXTREE);
2370        xtlck = (struct xtlock *) & tlck->lock;
2371        xtlck->lwm.offset =
2372            (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2373
2374        /* if delete from middle, shift left/compact the remaining entries */
2375        if (index < nextindex - 1)
2376                memmove(&p->xad[index], &p->xad[index + 1],
2377                        (nextindex - index - 1) * sizeof(xad_t));
2378
2379        XT_PUTPAGE(mp);
2380
2381        return 0;
2382}
2383
2384
2385/* - TBD for defragmentaion/reorganization -
2386 *
2387 *      xtDeleteUp()
2388 *
2389 * function:
2390 *      free empty pages as propagating deletion up the tree
2391 *
2392 * parameter:
2393 *
2394 * return:
2395 */
2396static int
2397xtDeleteUp(tid_t tid, struct inode *ip,
2398           struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2399{
2400        int rc = 0;
2401        struct metapage *mp;
2402        xtpage_t *p;
2403        int index, nextindex;
2404        s64 xaddr;
2405        int xlen;
2406        struct btframe *parent;
2407        struct tlock *tlck;
2408        struct xtlock *xtlck;
2409
2410        /*
2411         * keep root leaf page which has become empty
2412         */
2413        if (fp->header.flag & BT_ROOT) {
2414                /* keep the root page */
2415                fp->header.flag &= ~BT_INTERNAL;
2416                fp->header.flag |= BT_LEAF;
2417                fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2418
2419                /* XT_PUTPAGE(fmp); */
2420
2421                return 0;
2422        }
2423
2424        /*
2425         * free non-root leaf page
2426         */
2427        if ((rc = xtRelink(tid, ip, fp))) {
2428                XT_PUTPAGE(fmp);
2429                return rc;
2430        }
2431
2432        xaddr = addressPXD(&fp->header.self);
2433        xlen = lengthPXD(&fp->header.self);
2434        /* free the page extent */
2435        dbFree(ip, xaddr, (s64) xlen);
2436
2437        /* free the buffer page */
2438        discard_metapage(fmp);
2439
2440        /*
2441         * propagate page deletion up the index tree
2442         *
2443         * If the delete from the parent page makes it empty,
2444         * continue all the way up the tree.
2445         * stop if the root page is reached (which is never deleted) or
2446         * if the entry deletion does not empty the page.
2447         */
2448        while ((parent = BT_POP(btstack)) != NULL) {
2449                /* get/pin the parent page <sp> */
2450                XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2451                if (rc)
2452                        return rc;
2453
2454                index = parent->index;
2455
2456                /* delete the entry for the freed child page from parent.
2457                 */
2458                nextindex = le16_to_cpu(p->header.nextindex);
2459
2460                /*
2461                 * the parent has the single entry being deleted:
2462                 * free the parent page which has become empty.
2463                 */
2464                if (nextindex == 1) {
2465                        if (p->header.flag & BT_ROOT) {
2466                                /* keep the root page */
2467                                p->header.flag &= ~BT_INTERNAL;
2468                                p->header.flag |= BT_LEAF;
2469                                p->header.nextindex =
2470                                    cpu_to_le16(XTENTRYSTART);
2471
2472                                /* XT_PUTPAGE(mp); */
2473
2474                                break;
2475                        } else {
2476                                /* free the parent page */
2477                                if ((rc = xtRelink(tid, ip, p)))
2478                                        return rc;
2479
2480                                xaddr = addressPXD(&p->header.self);
2481                                /* free the page extent */
2482                                dbFree(ip, xaddr,
2483                                       (s64) JFS_SBI(ip->i_sb)->nbperpage);
2484
2485                                /* unpin/free the buffer page */
2486                                discard_metapage(mp);
2487
2488                                /* propagate up */
2489                                continue;
2490                        }
2491                }
2492                /*
2493                 * the parent has other entries remaining:
2494                 * delete the router entry from the parent page.
2495                 */
2496                else {
2497                        BT_MARK_DIRTY(mp, ip);
2498                        /*
2499                         * acquire a transaction lock on the leaf page;
2500                         *
2501                         * action:xad deletion;
2502                         */
2503                        tlck = txLock(tid, ip, mp, tlckXTREE);
2504                        xtlck = (struct xtlock *) & tlck->lock;
2505                        xtlck->lwm.offset =
2506                            (xtlck->lwm.offset) ? min(index,
2507                                                      xtlck->lwm.
2508                                                      offset) : index;
2509
2510                        /* if delete from middle,
2511                         * shift left/compact the remaining entries in the page
2512                         */
2513                        if (index < nextindex - 1)
2514                                memmove(&p->xad[index], &p->xad[index + 1],
2515                                        (nextindex - index -
2516                                         1) << L2XTSLOTSIZE);
2517
2518                        le16_add_cpu(&p->header.nextindex, -1);
2519                        jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2520                                 (ulong) parent->bn, index);
2521                }
2522
2523                /* unpin the parent page */
2524                XT_PUTPAGE(mp);
2525
2526                /* exit propagation up */
2527                break;
2528        }
2529
2530        return 0;
2531}
2532
2533
2534/*
2535 * NAME:        xtRelocate()
2536 *
2537 * FUNCTION:    relocate xtpage or data extent of regular file;
2538 *              This function is mainly used by defragfs utility.
2539 *
2540 * NOTE:        This routine does not have the logic to handle
2541 *              uncommitted allocated extent. The caller should call
2542 *              txCommit() to commit all the allocation before call
2543 *              this routine.
2544 */
2545int
2546xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
2547           s64 nxaddr,          /* new xaddr */
2548           int xtype)
2549{                               /* extent type: XTPAGE or DATAEXT */
2550        int rc = 0;
2551        struct tblock *tblk;
2552        struct tlock *tlck;
2553        struct xtlock *xtlck;
2554        struct metapage *mp, *pmp, *lmp, *rmp;  /* meta-page buffer */
2555        xtpage_t *p, *pp, *rp, *lp;     /* base B+-tree index page */
2556        xad_t *xad;
2557        pxd_t *pxd;
2558        s64 xoff, xsize;
2559        int xlen;
2560        s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2561        cbuf_t *cp;
2562        s64 offset, nbytes, nbrd, pno;
2563        int nb, npages, nblks;
2564        s64 bn;
2565        int cmp;
2566        int index;
2567        struct pxd_lock *pxdlock;
2568        struct btstack btstack; /* traverse stack */
2569
2570        xtype = xtype & EXTENT_TYPE;
2571
2572        xoff = offsetXAD(oxad);
2573        oxaddr = addressXAD(oxad);
2574        xlen = lengthXAD(oxad);
2575
2576        /* validate extent offset */
2577        offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2578        if (offset >= ip->i_size)
2579                return -ESTALE; /* stale extent */
2580
2581        jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2582                 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2583
2584        /*
2585         *      1. get and validate the parent xtpage/xad entry
2586         *      covering the source extent to be relocated;
2587         */
2588        if (xtype == DATAEXT) {
2589                /* search in leaf entry */
2590                rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2591                if (rc)
2592                        return rc;
2593
2594                /* retrieve search result */
2595                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2596
2597                if (cmp) {
2598                        XT_PUTPAGE(pmp);
2599                        return -ESTALE;
2600                }
2601
2602                /* validate for exact match with a single entry */
2603                xad = &pp->xad[index];
2604                if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2605                        XT_PUTPAGE(pmp);
2606                        return -ESTALE;
2607                }
2608        } else {                /* (xtype == XTPAGE) */
2609
2610                /* search in internal entry */
2611                rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2612                if (rc)
2613                        return rc;
2614
2615                /* retrieve search result */
2616                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2617
2618                if (cmp) {
2619                        XT_PUTPAGE(pmp);
2620                        return -ESTALE;
2621                }
2622
2623                /* xtSearchNode() validated for exact match with a single entry
2624                 */
2625                xad = &pp->xad[index];
2626        }
2627        jfs_info("xtRelocate: parent xad entry validated.");
2628
2629        /*
2630         *      2. relocate the extent
2631         */
2632        if (xtype == DATAEXT) {
2633                /* if the extent is allocated-but-not-recorded
2634                 * there is no real data to be moved in this extent,
2635                 */
2636                if (xad->flag & XAD_NOTRECORDED)
2637                        goto out;
2638                else
2639                        /* release xtpage for cmRead()/xtLookup() */
2640                        XT_PUTPAGE(pmp);
2641
2642                /*
2643                 *      cmRelocate()
2644                 *
2645                 * copy target data pages to be relocated;
2646                 *
2647                 * data extent must start at page boundary and
2648                 * multiple of page size (except the last data extent);
2649                 * read in each page of the source data extent into cbuf,
2650                 * update the cbuf extent descriptor of the page to be
2651                 * homeward bound to new dst data extent
2652                 * copy the data from the old extent to new extent.
2653                 * copy is essential for compressed files to avoid problems
2654                 * that can arise if there was a change in compression
2655                 * algorithms.
2656                 * it is a good strategy because it may disrupt cache
2657                 * policy to keep the pages in memory afterwards.
2658                 */
2659                offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2660                assert((offset & CM_OFFSET) == 0);
2661                nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2662                pno = offset >> CM_L2BSIZE;
2663                npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2664/*
2665                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2666                          (offset >> CM_L2BSIZE) + 1;
2667*/
2668                sxaddr = oxaddr;
2669                dxaddr = nxaddr;
2670
2671                /* process the request one cache buffer at a time */
2672                for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2673                     offset += nb, pno++, npages--) {
2674                        /* compute page size */
2675                        nb = min(nbytes - nbrd, CM_BSIZE);
2676
2677                        /* get the cache buffer of the page */
2678                        if (rc = cmRead(ip, offset, npages, &cp))
2679                                break;
2680
2681                        assert(addressPXD(&cp->cm_pxd) == sxaddr);
2682                        assert(!cp->cm_modified);
2683
2684                        /* bind buffer with the new extent address */
2685                        nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2686                        cmSetXD(ip, cp, pno, dxaddr, nblks);
2687
2688                        /* release the cbuf, mark it as modified */
2689                        cmPut(cp, true);
2690
2691                        dxaddr += nblks;
2692                        sxaddr += nblks;
2693                }
2694
2695                /* get back parent page */
2696                if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2697                        return rc;
2698
2699                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2700                jfs_info("xtRelocate: target data extent relocated.");
2701        } else {                /* (xtype == XTPAGE) */
2702
2703                /*
2704                 * read in the target xtpage from the source extent;
2705                 */
2706                XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2707                if (rc) {
2708                        XT_PUTPAGE(pmp);
2709                        return rc;
2710                }
2711
2712                /*
2713                 * read in sibling pages if any to update sibling pointers;
2714                 */
2715                rmp = NULL;
2716                if (p->header.next) {
2717                        nextbn = le64_to_cpu(p->header.next);
2718                        XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2719                        if (rc) {
2720                                XT_PUTPAGE(pmp);
2721                                XT_PUTPAGE(mp);
2722                                return (rc);
2723                        }
2724                }
2725
2726                lmp = NULL;
2727                if (p->header.prev) {
2728                        prevbn = le64_to_cpu(p->header.prev);
2729                        XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2730                        if (rc) {
2731                                XT_PUTPAGE(pmp);
2732                                XT_PUTPAGE(mp);
2733                                if (rmp)
2734                                        XT_PUTPAGE(rmp);
2735                                return (rc);
2736                        }
2737                }
2738
2739                /* at this point, all xtpages to be updated are in memory */
2740
2741                /*
2742                 * update sibling pointers of sibling xtpages if any;
2743                 */
2744                if (lmp) {
2745                        BT_MARK_DIRTY(lmp, ip);
2746                        tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
2747                        lp->header.next = cpu_to_le64(nxaddr);
2748                        XT_PUTPAGE(lmp);
2749                }
2750
2751                if (rmp) {
2752                        BT_MARK_DIRTY(rmp, ip);
2753                        tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
2754                        rp->header.prev = cpu_to_le64(nxaddr);
2755                        XT_PUTPAGE(rmp);
2756                }
2757
2758                /*
2759                 * update the target xtpage to be relocated
2760                 *
2761                 * update the self address of the target page
2762                 * and write to destination extent;
2763                 * redo image covers the whole xtpage since it is new page
2764                 * to the destination extent;
2765                 * update of bmap for the free of source extent
2766                 * of the target xtpage itself:
2767                 * update of bmap for the allocation of destination extent
2768                 * of the target xtpage itself:
2769                 * update of bmap for the extents covered by xad entries in
2770                 * the target xtpage is not necessary since they are not
2771                 * updated;
2772                 * if not committed before this relocation,
2773                 * target page may contain XAD_NEW entries which must
2774                 * be scanned for bmap update (logredo() always
2775                 * scan xtpage REDOPAGE image for bmap update);
2776                 * if committed before this relocation (tlckRELOCATE),
2777                 * scan may be skipped by commit() and logredo();
2778                 */
2779                BT_MARK_DIRTY(mp, ip);
2780                /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
2781                tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
2782                xtlck = (struct xtlock *) & tlck->lock;
2783
2784                /* update the self address in the xtpage header */
2785                pxd = &p->header.self;
2786                PXDaddress(pxd, nxaddr);
2787
2788                /* linelock for the after image of the whole page */
2789                xtlck->lwm.length =
2790                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
2791
2792                /* update the buffer extent descriptor of target xtpage */
2793                xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2794                bmSetXD(mp, nxaddr, xsize);
2795
2796                /* unpin the target page to new homeward bound */
2797                XT_PUTPAGE(mp);
2798                jfs_info("xtRelocate: target xtpage relocated.");
2799        }
2800
2801        /*
2802         *      3. acquire maplock for the source extent to be freed;
2803         *
2804         * acquire a maplock saving the src relocated extent address;
2805         * to free of the extent at commit time;
2806         */
2807      out:
2808        /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
2809         * free PXD of the source data extent (logredo() will update
2810         * bmap for free of source data extent), and update bmap for
2811         * free of the source data extent;
2812         */
2813        if (xtype == DATAEXT)
2814                tlck = txMaplock(tid, ip, tlckMAP);
2815        /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
2816         * for the source xtpage (logredo() will init NoRedoPage
2817         * filter and will also update bmap for free of the source
2818         * xtpage), and update bmap for free of the source xtpage;
2819         * N.B. We use tlckMAP instead of tlkcXTREE because there
2820         *      is no buffer associated with this lock since the buffer
2821         *      has been redirected to the target location.
2822         */
2823        else                    /* (xtype == XTPAGE) */
2824                tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
2825
2826        pxdlock = (struct pxd_lock *) & tlck->lock;
2827        pxdlock->flag = mlckFREEPXD;
2828        PXDaddress(&pxdlock->pxd, oxaddr);
2829        PXDlength(&pxdlock->pxd, xlen);
2830        pxdlock->index = 1;
2831
2832        /*
2833         *      4. update the parent xad entry for relocation;
2834         *
2835         * acquire tlck for the parent entry with XAD_NEW as entry
2836         * update which will write LOG_REDOPAGE and update bmap for
2837         * allocation of XAD_NEW destination extent;
2838         */
2839        jfs_info("xtRelocate: update parent xad entry.");
2840        BT_MARK_DIRTY(pmp, ip);
2841        tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
2842        xtlck = (struct xtlock *) & tlck->lock;
2843
2844        /* update the XAD with the new destination extent; */
2845        xad = &pp->xad[index];
2846        xad->flag |= XAD_NEW;
2847        XADaddress(xad, nxaddr);
2848
2849        xtlck->lwm.offset = min(index, xtlck->lwm.offset);
2850        xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
2851            xtlck->lwm.offset;
2852
2853        /* unpin the parent xtpage */
2854        XT_PUTPAGE(pmp);
2855
2856        return rc;
2857}
2858
2859
2860/*
2861 *      xtSearchNode()
2862 *
2863 * function:    search for the internal xad entry covering specified extent.
2864 *              This function is mainly used by defragfs utility.
2865 *
2866 * parameters:
2867 *      ip      - file object;
2868 *      xad     - extent to find;
2869 *      cmpp    - comparison result:
2870 *      btstack - traverse stack;
2871 *      flag    - search process flag;
2872 *
2873 * returns:
2874 *      btstack contains (bn, index) of search path traversed to the entry.
2875 *      *cmpp is set to result of comparison with the entry returned.
2876 *      the page containing the entry is pinned at exit.
2877 */
2878static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
2879                        int *cmpp, struct btstack * btstack, int flag)
2880{
2881        int rc = 0;
2882        s64 xoff, xaddr;
2883        int xlen;
2884        int cmp = 1;            /* init for empty page */
2885        s64 bn;                 /* block number */
2886        struct metapage *mp;    /* meta-page buffer */
2887        xtpage_t *p;            /* page */
2888        int base, index, lim;
2889        struct btframe *btsp;
2890        s64 t64;
2891
2892        BT_CLR(btstack);
2893
2894        xoff = offsetXAD(xad);
2895        xlen = lengthXAD(xad);
2896        xaddr = addressXAD(xad);
2897
2898        /*
2899         *      search down tree from root:
2900         *
2901         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
2902         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
2903         *
2904         * if entry with search key K is not found
2905         * internal page search find the entry with largest key Ki
2906         * less than K which point to the child page to search;
2907         * leaf page search find the entry with smallest key Kj
2908         * greater than K so that the returned index is the position of
2909         * the entry to be shifted right for insertion of new entry.
2910         * for empty tree, search key is greater than any key of the tree.
2911         *
2912         * by convention, root bn = 0.
2913         */
2914        for (bn = 0;;) {
2915                /* get/pin the page to search */
2916                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2917                if (rc)
2918                        return rc;
2919                if (p->header.flag & BT_LEAF) {
2920                        XT_PUTPAGE(mp);
2921                        return -ESTALE;
2922                }
2923
2924                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
2925
2926                /*
2927                 * binary search with search key K on the current page
2928                 */
2929                for (base = XTENTRYSTART; lim; lim >>= 1) {
2930                        index = base + (lim >> 1);
2931
2932                        XT_CMP(cmp, xoff, &p->xad[index], t64);
2933                        if (cmp == 0) {
2934                                /*
2935                                 *      search hit
2936                                 *
2937                                 * verify for exact match;
2938                                 */
2939                                if (xaddr == addressXAD(&p->xad[index]) &&
2940                                    xoff == offsetXAD(&p->xad[index])) {
2941                                        *cmpp = cmp;
2942
2943                                        /* save search result */
2944                                        btsp = btstack->top;
2945                                        btsp->bn = bn;
2946                                        btsp->index = index;
2947                                        btsp->mp = mp;
2948
2949                                        return 0;
2950                                }
2951
2952                                /* descend/search its child page */
2953                                goto next;
2954                        }
2955
2956                        if (cmp > 0) {
2957                                base = index + 1;
2958                                --lim;
2959                        }
2960                }
2961
2962                /*
2963                 *      search miss - non-leaf page:
2964                 *
2965                 * base is the smallest index with key (Kj) greater than
2966                 * search key (K) and may be zero or maxentry index.
2967                 * if base is non-zero, decrement base by one to get the parent
2968                 * entry of the child page to search.
2969                 */
2970                index = base ? base - 1 : base;
2971
2972                /*
2973                 * go down to child page
2974                 */
2975              next:
2976                /* get the child page block number */
2977                bn = addressXAD(&p->xad[index]);
2978
2979                /* unpin the parent page */
2980                XT_PUTPAGE(mp);
2981        }
2982}
2983
2984
2985/*
2986 *      xtRelink()
2987 *
2988 * function:
2989 *      link around a freed page.
2990 *
2991 * Parameter:
2992 *      int             tid,
2993 *      struct inode    *ip,
2994 *      xtpage_t        *p)
2995 *
2996 * returns:
2997 */
2998static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
2999{
3000        int rc = 0;
3001        struct metapage *mp;
3002        s64 nextbn, prevbn;
3003        struct tlock *tlck;
3004
3005        nextbn = le64_to_cpu(p->header.next);
3006        prevbn = le64_to_cpu(p->header.prev);
3007
3008        /* update prev pointer of the next page */
3009        if (nextbn != 0) {
3010                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3011                if (rc)
3012                        return rc;
3013
3014                /*
3015                 * acquire a transaction lock on the page;
3016                 *
3017                 * action: update prev pointer;
3018                 */
3019                BT_MARK_DIRTY(mp, ip);
3020                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3021
3022                /* the page may already have been tlock'd */
3023
3024                p->header.prev = cpu_to_le64(prevbn);
3025
3026                XT_PUTPAGE(mp);
3027        }
3028
3029        /* update next pointer of the previous page */
3030        if (prevbn != 0) {
3031                XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3032                if (rc)
3033                        return rc;
3034
3035                /*
3036                 * acquire a transaction lock on the page;
3037                 *
3038                 * action: update next pointer;
3039                 */
3040                BT_MARK_DIRTY(mp, ip);
3041                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3042
3043                /* the page may already have been tlock'd */
3044
3045                p->header.next = le64_to_cpu(nextbn);
3046
3047                XT_PUTPAGE(mp);
3048        }
3049
3050        return 0;
3051}
3052#endif                          /*  _STILL_TO_PORT */
3053
3054
3055/*
3056 *      xtInitRoot()
3057 *
3058 * initialize file root (inline in inode)
3059 */
3060void xtInitRoot(tid_t tid, struct inode *ip)
3061{
3062        xtpage_t *p;
3063
3064        /*
3065         * acquire a transaction lock on the root
3066         *
3067         * action:
3068         */
3069        txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3070                      tlckXTREE | tlckNEW);
3071        p = &JFS_IP(ip)->i_xtroot;
3072
3073        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3074        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3075
3076        if (S_ISDIR(ip->i_mode))
3077                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3078        else {
3079                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3080                ip->i_size = 0;
3081        }
3082
3083
3084        return;
3085}
3086
3087
3088/*
3089 * We can run into a deadlock truncating a file with a large number of
3090 * xtree pages (large fragmented file).  A robust fix would entail a
3091 * reservation system where we would reserve a number of metadata pages
3092 * and tlocks which we would be guaranteed without a deadlock.  Without
3093 * this, a partial fix is to limit number of metadata pages we will lock
3094 * in a single transaction.  Currently we will truncate the file so that
3095 * no more than 50 leaf pages will be locked.  The caller of xtTruncate
3096 * will be responsible for ensuring that the current transaction gets
3097 * committed, and that subsequent transactions are created to truncate
3098 * the file further if needed.
3099 */
3100#define MAX_TRUNCATE_LEAVES 50
3101
3102/*
3103 *      xtTruncate()
3104 *
3105 * function:
3106 *      traverse for truncation logging backward bottom up;
3107 *      terminate at the last extent entry at the current subtree
3108 *      root page covering new down size.
3109 *      truncation may occur within the last extent entry.
3110 *
3111 * parameter:
3112 *      int             tid,
3113 *      struct inode    *ip,
3114 *      s64             newsize,
3115 *      int             type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3116 *
3117 * return:
3118 *
3119 * note:
3120 *      PWMAP:
3121 *       1. truncate (non-COMMIT_NOLINK file)
3122 *          by jfs_truncate() or jfs_open(O_TRUNC):
3123 *          xtree is updated;
3124 *       2. truncate index table of directory when last entry removed
3125 *      map update via tlock at commit time;
3126 *      PMAP:
3127 *       Call xtTruncate_pmap instead
3128 *      WMAP:
3129 *       1. remove (free zero link count) on last reference release
3130 *          (pmap has been freed at commit zero link count);
3131 *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3132 *          xtree is updated;
3133 *       map update directly at truncation time;
3134 *
3135 *      if (DELETE)
3136 *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3137 *      else if (TRUNCATE)
3138 *              must write LOG_NOREDOPAGE for deleted index page;
3139 *
3140 * pages may already have been tlocked by anonymous transactions
3141 * during file growth (i.e., write) before truncation;
3142 *
3143 * except last truncated entry, deleted entries remains as is
3144 * in the page (nextindex is updated) for other use
3145 * (e.g., log/update allocation map): this avoid copying the page
3146 * info but delay free of pages;
3147 *
3148 */
3149s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3150{
3151        int rc = 0;
3152        s64 teof;
3153        struct metapage *mp;
3154        xtpage_t *p;
3155        s64 bn;
3156        int index, nextindex;
3157        xad_t *xad;
3158        s64 xoff, xaddr;
3159        int xlen, len, freexlen;
3160        struct btstack btstack;
3161        struct btframe *parent;
3162        struct tblock *tblk = NULL;
3163        struct tlock *tlck = NULL;
3164        struct xtlock *xtlck = NULL;
3165        struct xdlistlock xadlock;      /* maplock for COMMIT_WMAP */
3166        struct pxd_lock *pxdlock;               /* maplock for COMMIT_WMAP */
3167        s64 nfreed;
3168        int freed, log;
3169        int locked_leaves = 0;
3170
3171        /* save object truncation type */
3172        if (tid) {
3173                tblk = tid_to_tblock(tid);
3174                tblk->xflag |= flag;
3175        }
3176
3177        nfreed = 0;
3178
3179        flag &= COMMIT_MAP;
3180        assert(flag != COMMIT_PMAP);
3181
3182        if (flag == COMMIT_PWMAP)
3183                log = 1;
3184        else {
3185                log = 0;
3186                xadlock.flag = mlckFREEXADLIST;
3187                xadlock.index = 1;
3188        }
3189
3190        /*
3191         * if the newsize is not an integral number of pages,
3192         * the file between newsize and next page boundary will
3193         * be cleared.
3194         * if truncating into a file hole, it will cause
3195         * a full block to be allocated for the logical block.
3196         */
3197
3198        /*
3199         * release page blocks of truncated region <teof, eof>
3200         *
3201         * free the data blocks from the leaf index blocks.
3202         * delete the parent index entries corresponding to
3203         * the freed child data/index blocks.
3204         * free the index blocks themselves which aren't needed
3205         * in new sized file.
3206         *
3207         * index blocks are updated only if the blocks are to be
3208         * retained in the new sized file.
3209         * if type is PMAP, the data and index pages are NOT
3210         * freed, and the data and index blocks are NOT freed
3211         * from working map.
3212         * (this will allow continued access of data/index of
3213         * temporary file (zerolink count file truncated to zero-length)).
3214         */
3215        teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3216            JFS_SBI(ip->i_sb)->l2bsize;
3217
3218        /* clear stack */
3219        BT_CLR(&btstack);
3220
3221        /*
3222         * start with root
3223         *
3224         * root resides in the inode
3225         */
3226        bn = 0;
3227
3228        /*
3229         * first access of each page:
3230         */
3231      getPage:
3232        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3233        if (rc)
3234                return rc;
3235
3236        /* process entries backward from last index */
3237        index = le16_to_cpu(p->header.nextindex) - 1;
3238
3239
3240        /* Since this is the rightmost page at this level, and we may have
3241         * already freed a page that was formerly to the right, let's make
3242         * sure that the next pointer is zero.
3243         */
3244        if (p->header.next) {
3245                if (log)
3246                        /*
3247                         * Make sure this change to the header is logged.
3248                         * If we really truncate this leaf, the flag
3249                         * will be changed to tlckTRUNCATE
3250                         */
3251                        tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
3252                BT_MARK_DIRTY(mp, ip);
3253                p->header.next = 0;
3254        }
3255
3256        if (p->header.flag & BT_INTERNAL)
3257                goto getChild;
3258
3259        /*
3260         *      leaf page
3261         */
3262        freed = 0;
3263
3264        /* does region covered by leaf page precede Teof ? */
3265        xad = &p->xad[index];
3266        xoff = offsetXAD(xad);
3267        xlen = lengthXAD(xad);
3268        if (teof >= xoff + xlen) {
3269                XT_PUTPAGE(mp);
3270                goto getParent;
3271        }
3272
3273        /* (re)acquire tlock of the leaf page */
3274        if (log) {
3275                if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3276                        /*
3277                         * We need to limit the size of the transaction
3278                         * to avoid exhausting pagecache & tlocks
3279                         */
3280                        XT_PUTPAGE(mp);
3281                        newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3282                        goto getParent;
3283                }
3284                tlck = txLock(tid, ip, mp, tlckXTREE);
3285                tlck->type = tlckXTREE | tlckTRUNCATE;
3286                xtlck = (struct xtlock *) & tlck->lock;
3287                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3288        }
3289        BT_MARK_DIRTY(mp, ip);
3290
3291        /*
3292         * scan backward leaf page entries
3293         */
3294        for (; index >= XTENTRYSTART; index--) {
3295                xad = &p->xad[index];
3296                xoff = offsetXAD(xad);
3297                xlen = lengthXAD(xad);
3298                xaddr = addressXAD(xad);
3299
3300                /*
3301                 * The "data" for a directory is indexed by the block
3302                 * device's address space.  This metadata must be invalidated
3303                 * here
3304                 */
3305                if (S_ISDIR(ip->i_mode) && (teof == 0))
3306                        invalidate_xad_metapages(ip, *xad);
3307                /*
3308                 * entry beyond eof: continue scan of current page
3309                 *          xad
3310                 * ---|---=======------->
3311                 *   eof
3312                 */
3313                if (teof < xoff) {
3314                        nfreed += xlen;
3315                        continue;
3316                }
3317
3318                /*
3319                 * (xoff <= teof): last entry to be deleted from page;
3320                 * If other entries remain in page: keep and update the page.
3321                 */
3322
3323                /*
3324                 * eof == entry_start: delete the entry
3325                 *           xad
3326                 * -------|=======------->
3327                 *       eof
3328                 *
3329                 */
3330                if (teof == xoff) {
3331                        nfreed += xlen;
3332
3333                        if (index == XTENTRYSTART)
3334                                break;
3335
3336                        nextindex = index;
3337                }
3338                /*
3339                 * eof within the entry: truncate the entry.
3340                 *          xad
3341                 * -------===|===------->
3342                 *          eof
3343                 */
3344                else if (teof < xoff + xlen) {
3345                        /* update truncated entry */
3346                        len = teof - xoff;
3347                        freexlen = xlen - len;
3348                        XADlength(xad, len);
3349
3350                        /* save pxd of truncated extent in tlck */
3351                        xaddr += len;
3352                        if (log) {      /* COMMIT_PWMAP */
3353                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
3354                                    min(index, (int)xtlck->lwm.offset) : index;
3355                                xtlck->lwm.length = index + 1 -
3356                                    xtlck->lwm.offset;
3357                                xtlck->twm.offset = index;
3358                                pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3359                                pxdlock->flag = mlckFREEPXD;
3360                                PXDaddress(&pxdlock->pxd, xaddr);
3361                                PXDlength(&pxdlock->pxd, freexlen);
3362                        }
3363                        /* free truncated extent */
3364                        else {  /* COMMIT_WMAP */
3365
3366                                pxdlock = (struct pxd_lock *) & xadlock;
3367                                pxdlock->flag = mlckFREEPXD;
3368                                PXDaddress(&pxdlock->pxd, xaddr);
3369                                PXDlength(&pxdlock->pxd, freexlen);
3370                                txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
3371
3372                                /* reset map lock */
3373                                xadlock.flag = mlckFREEXADLIST;
3374                        }
3375
3376                        /* current entry is new last entry; */
3377                        nextindex = index + 1;
3378
3379                        nfreed += freexlen;
3380                }
3381                /*
3382                 * eof beyond the entry:
3383                 *          xad
3384                 * -------=======---|--->
3385                 *                 eof
3386                 */
3387                else {          /* (xoff + xlen < teof) */
3388
3389                        nextindex = index + 1;
3390                }
3391
3392                if (nextindex < le16_to_cpu(p->header.nextindex)) {
3393                        if (!log) {     /* COMMIT_WAMP */
3394                                xadlock.xdlist = &p->xad[nextindex];
3395                                xadlock.count =
3396                                    le16_to_cpu(p->header.nextindex) -
3397                                    nextindex;
3398                                txFreeMap(ip, (struct maplock *) & xadlock,
3399                                          NULL, COMMIT_WMAP);
3400                        }
3401                        p->header.nextindex = cpu_to_le16(nextindex);
3402                }
3403
3404                XT_PUTPAGE(mp);
3405
3406                /* assert(freed == 0); */
3407                goto getParent;
3408        }                       /* end scan of leaf page entries */
3409
3410        freed = 1;
3411
3412        /*
3413         * leaf page become empty: free the page if type != PMAP
3414         */
3415        if (log) {              /* COMMIT_PWMAP */
3416                /* txCommit() with tlckFREE:
3417                 * free data extents covered by leaf [XTENTRYSTART:hwm);
3418                 * invalidate leaf if COMMIT_PWMAP;
3419                 * if (TRUNCATE), will write LOG_NOREDOPAGE;
3420                 */
3421                tlck->type = tlckXTREE | tlckFREE;
3422        } else {                /* COMMIT_WAMP */
3423
3424                /* free data extents covered by leaf */
3425                xadlock.xdlist = &p->xad[XTENTRYSTART];
3426                xadlock.count =
3427                    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3428                txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
3429        }
3430
3431        if (p->header.flag & BT_ROOT) {
3432                p->header.flag &= ~BT_INTERNAL;
3433                p->header.flag |= BT_LEAF;
3434                p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3435
3436                XT_PUTPAGE(mp); /* debug */
3437                goto out;
3438        } else {
3439                if (log) {      /* COMMIT_PWMAP */
3440                        /* page will be invalidated at tx completion
3441                         */
3442                        XT_PUTPAGE(mp);
3443                } else {        /* COMMIT_WMAP */
3444
3445                        if (mp->lid)
3446                                lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3447
3448                        /* invalidate empty leaf page */
3449                        discard_metapage(mp);
3450                }
3451        }
3452
3453        /*
3454         * the leaf page become empty: delete the parent entry
3455         * for the leaf page if the parent page is to be kept
3456         * in the new sized file.
3457         */
3458
3459        /*
3460         * go back up to the parent page
3461         */
3462      getParent:
3463        /* pop/restore parent entry for the current child page */
3464        if ((parent = BT_POP(&btstack)) == NULL)
3465                /* current page must have been root */
3466                goto out;
3467
3468        /* get back the parent page */
3469        bn = parent->bn;
3470        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3471        if (rc)
3472                return rc;
3473
3474        index = parent->index;
3475
3476        /*
3477         * child page was not empty:
3478         */
3479        if (freed == 0) {
3480                /* has any entry deleted from parent ? */
3481                if (index < le16_to_cpu(p->header.nextindex) - 1) {
3482                        /* (re)acquire tlock on the parent page */
3483                        if (log) {      /* COMMIT_PWMAP */
3484                                /* txCommit() with tlckTRUNCATE:
3485                                 * free child extents covered by parent [);
3486                                 */
3487                                tlck = txLock(tid, ip, mp, tlckXTREE);
3488                                xtlck = (struct xtlock *) & tlck->lock;
3489                                if (!(tlck->type & tlckTRUNCATE)) {
3490                                        xtlck->hwm.offset =
3491                                            le16_to_cpu(p->header.
3492                                                        nextindex) - 1;
3493                                        tlck->type =
3494                                            tlckXTREE | tlckTRUNCATE;
3495                                }
3496                        } else {        /* COMMIT_WMAP */
3497
3498                                /* free child extents covered by parent */
3499                                xadlock.xdlist = &p->xad[index + 1];
3500                                xadlock.count =
3501                                    le16_to_cpu(p->header.nextindex) -
3502                                    index - 1;
3503                                txFreeMap(ip, (struct maplock *) & xadlock,
3504                                          NULL, COMMIT_WMAP);
3505                        }
3506                        BT_MARK_DIRTY(mp, ip);
3507
3508                        p->header.nextindex = cpu_to_le16(index + 1);
3509                }
3510                XT_PUTPAGE(mp);
3511                goto getParent;
3512        }
3513
3514        /*
3515         * child page was empty:
3516         */
3517        nfreed += lengthXAD(&p->xad[index]);
3518
3519        /*
3520         * During working map update, child page's tlock must be handled
3521         * before parent's.  This is because the parent's tlock will cause
3522         * the child's disk space to be marked available in the wmap, so
3523         * it's important that the child page be released by that time.
3524         *
3525         * ToDo:  tlocks should be on doubly-linked list, so we can
3526         * quickly remove it and add it to the end.
3527         */
3528
3529        /*
3530         * Move parent page's tlock to the end of the tid's tlock list
3531         */
3532        if (log && mp->lid && (tblk->last != mp->lid) &&
3533            lid_to_tlock(mp->lid)->tid) {
3534                lid_t lid = mp->lid;
3535                struct tlock *prev;
3536
3537                tlck = lid_to_tlock(lid);
3538
3539                if (tblk->next == lid)
3540                        tblk->next = tlck->next;
3541                else {
3542                        for (prev = lid_to_tlock(tblk->next);
3543                             prev->next != lid;
3544                             prev = lid_to_tlock(prev->next)) {
3545                                assert(prev->next);
3546                        }
3547                        prev->next = tlck->next;
3548                }
3549                lid_to_tlock(tblk->last)->next = lid;
3550                tlck->next = 0;
3551                tblk->last = lid;
3552        }
3553
3554        /*
3555         * parent page become empty: free the page
3556         */
3557        if (index == XTENTRYSTART) {
3558                if (log) {      /* COMMIT_PWMAP */
3559                        /* txCommit() with tlckFREE:
3560                         * free child extents covered by parent;
3561                         * invalidate parent if COMMIT_PWMAP;
3562                         */
3563                        tlck = txLock(tid, ip, mp, tlckXTREE);
3564                        xtlck = (struct xtlock *) & tlck->lock;
3565                        xtlck->hwm.offset =
3566                            le16_to_cpu(p->header.nextindex) - 1;
3567                        tlck->type = tlckXTREE | tlckFREE;
3568                } else {        /* COMMIT_WMAP */
3569
3570                        /* free child extents covered by parent */
3571                        xadlock.xdlist = &p->xad[XTENTRYSTART];
3572                        xadlock.count =
3573                            le16_to_cpu(p->header.nextindex) -
3574                            XTENTRYSTART;
3575                        txFreeMap(ip, (struct maplock *) & xadlock, NULL,
3576                                  COMMIT_WMAP);
3577                }
3578                BT_MARK_DIRTY(mp, ip);
3579
3580                if (p->header.flag & BT_ROOT) {
3581                        p->header.flag &= ~BT_INTERNAL;
3582                        p->header.flag |= BT_LEAF;
3583                        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3584                        if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3585                                /*
3586                                 * Shrink root down to allow inline
3587                                 * EA (otherwise fsck complains)
3588                                 */
3589                                p->header.maxentry =
3590                                    cpu_to_le16(XTROOTINITSLOT);
3591                                JFS_IP(ip)->mode2 |= INLINEEA;
3592                        }
3593
3594                        XT_PUTPAGE(mp); /* debug */
3595                        goto out;
3596                } else {
3597                        if (log) {      /* COMMIT_PWMAP */
3598                                /* page will be invalidated at tx completion
3599                                 */
3600                                XT_PUTPAGE(mp);
3601                        } else {        /* COMMIT_WMAP */
3602
3603                                if (mp->lid)
3604                                        lid_to_tlock(mp->lid)->flag |=
3605                                                tlckFREELOCK;
3606
3607                                /* invalidate parent page */
3608                                discard_metapage(mp);
3609                        }
3610
3611                        /* parent has become empty and freed:
3612                         * go back up to its parent page
3613                         */
3614                        /* freed = 1; */
3615                        goto getParent;
3616                }
3617        }
3618        /*
3619         * parent page still has entries for front region;
3620         */
3621        else {
3622                /* try truncate region covered by preceding entry
3623                 * (process backward)
3624                 */
3625                index--;
3626
3627                /* go back down to the child page corresponding
3628                 * to the entry
3629                 */
3630                goto getChild;
3631        }
3632
3633        /*
3634         *      internal page: go down to child page of current entry
3635         */
3636      getChild:
3637        /* save current parent entry for the child page */
3638        if (BT_STACK_FULL(&btstack)) {
3639                jfs_error(ip->i_sb, "stack overrun!\n");
3640                XT_PUTPAGE(mp);
3641                return -EIO;
3642        }
3643        BT_PUSH(&btstack, bn, index);
3644
3645        /* get child page */
3646        xad = &p->xad[index];
3647        bn = addressXAD(xad);
3648
3649        /*
3650         * first access of each internal entry:
3651         */
3652        /* release parent page */
3653        XT_PUTPAGE(mp);
3654
3655        /* process the child page */
3656        goto getPage;
3657
3658      out:
3659        /*
3660         * update file resource stat
3661         */
3662        /* set size
3663         */
3664        if (S_ISDIR(ip->i_mode) && !newsize)
3665                ip->i_size = 1; /* fsck hates zero-length directories */
3666        else
3667                ip->i_size = newsize;
3668
3669        /* update quota allocation to reflect freed blocks */
3670        dquot_free_block(ip, nfreed);
3671
3672        /*
3673         * free tlock of invalidated pages
3674         */
3675        if (flag == COMMIT_WMAP)
3676                txFreelock(ip);
3677
3678        return newsize;
3679}
3680
3681
3682/*
3683 *      xtTruncate_pmap()
3684 *
3685 * function:
3686 *      Perform truncate to zero length for deleted file, leaving the
3687 *      xtree and working map untouched.  This allows the file to
3688 *      be accessed via open file handles, while the delete of the file
3689 *      is committed to disk.
3690 *
3691 * parameter:
3692 *      tid_t           tid,
3693 *      struct inode    *ip,
3694 *      s64             committed_size)
3695 *
3696 * return: new committed size
3697 *
3698 * note:
3699 *
3700 *      To avoid deadlock by holding too many transaction locks, the
3701 *      truncation may be broken up into multiple transactions.
3702 *      The committed_size keeps track of part of the file has been
3703 *      freed from the pmaps.
3704 */
3705s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3706{
3707        s64 bn;
3708        struct btstack btstack;
3709        int cmp;
3710        int index;
3711        int locked_leaves = 0;
3712        struct metapage *mp;
3713        xtpage_t *p;
3714        struct btframe *parent;
3715        int rc;
3716        struct tblock *tblk;
3717        struct tlock *tlck = NULL;
3718        xad_t *xad;
3719        int xlen;
3720        s64 xoff;
3721        struct xtlock *xtlck = NULL;
3722
3723        /* save object truncation type */
3724        tblk = tid_to_tblock(tid);
3725        tblk->xflag |= COMMIT_PMAP;
3726
3727        /* clear stack */
3728        BT_CLR(&btstack);
3729
3730        if (committed_size) {
3731                xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
3732                rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
3733                if (rc)
3734                        return rc;
3735
3736                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3737
3738                if (cmp != 0) {
3739                        XT_PUTPAGE(mp);
3740                        jfs_error(ip->i_sb, "did not find extent\n");
3741                        return -EIO;
3742                }
3743        } else {
3744                /*
3745                 * start with root
3746                 *
3747                 * root resides in the inode
3748                 */
3749                bn = 0;
3750
3751                /*
3752                 * first access of each page:
3753                 */
3754      getPage:
3755                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3756                if (rc)
3757                        return rc;
3758
3759                /* process entries backward from last index */
3760                index = le16_to_cpu(p->header.nextindex) - 1;
3761
3762                if (p->header.flag & BT_INTERNAL)
3763                        goto getChild;
3764        }
3765
3766        /*
3767         *      leaf page
3768         */
3769
3770        if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3771                /*
3772                 * We need to limit the size of the transaction
3773                 * to avoid exhausting pagecache & tlocks
3774                 */
3775                xad = &p->xad[index];
3776                xoff = offsetXAD(xad);
3777                xlen = lengthXAD(xad);
3778                XT_PUTPAGE(mp);
3779                return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3780        }
3781        tlck = txLock(tid, ip, mp, tlckXTREE);
3782        tlck->type = tlckXTREE | tlckFREE;
3783        xtlck = (struct xtlock *) & tlck->lock;
3784        xtlck->hwm.offset = index;
3785
3786
3787        XT_PUTPAGE(mp);
3788
3789        /*
3790         * go back up to the parent page
3791         */
3792      getParent:
3793        /* pop/restore parent entry for the current child page */
3794        if ((parent = BT_POP(&btstack)) == NULL)
3795                /* current page must have been root */
3796                goto out;
3797
3798        /* get back the parent page */
3799        bn = parent->bn;
3800        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3801        if (rc)
3802                return rc;
3803
3804        index = parent->index;
3805
3806        /*
3807         * parent page become empty: free the page
3808         */
3809        if (index == XTENTRYSTART) {
3810                /* txCommit() with tlckFREE:
3811                 * free child extents covered by parent;
3812                 * invalidate parent if COMMIT_PWMAP;
3813                 */
3814                tlck = txLock(tid, ip, mp, tlckXTREE);
3815                xtlck = (struct xtlock *) & tlck->lock;
3816                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3817                tlck->type = tlckXTREE | tlckFREE;
3818
3819                XT_PUTPAGE(mp);
3820
3821                if (p->header.flag & BT_ROOT) {
3822
3823                        goto out;
3824                } else {
3825                        goto getParent;
3826                }
3827        }
3828        /*
3829         * parent page still has entries for front region;
3830         */
3831        else
3832                index--;
3833        /*
3834         *      internal page: go down to child page of current entry
3835         */
3836      getChild:
3837        /* save current parent entry for the child page */
3838        if (BT_STACK_FULL(&btstack)) {
3839                jfs_error(ip->i_sb, "stack overrun!\n");
3840                XT_PUTPAGE(mp);
3841                return -EIO;
3842        }
3843        BT_PUSH(&btstack, bn, index);
3844
3845        /* get child page */
3846        xad = &p->xad[index];
3847        bn = addressXAD(xad);
3848
3849        /*
3850         * first access of each internal entry:
3851         */
3852        /* release parent page */
3853        XT_PUTPAGE(mp);
3854
3855        /* process the child page */
3856        goto getPage;
3857
3858      out:
3859
3860        return 0;
3861}
3862
3863#ifdef CONFIG_JFS_STATISTICS
3864int jfs_xtstat_proc_show(struct seq_file *m, void *v)
3865{
3866        seq_printf(m,
3867                       "JFS Xtree statistics\n"
3868                       "====================\n"
3869                       "searches = %d\n"
3870                       "fast searches = %d\n"
3871                       "splits = %d\n",
3872                       xtStat.search,
3873                       xtStat.fastSearch,
3874                       xtStat.split);
3875        return 0;
3876}
3877#endif
3878