linux/fs/jfs/jfs_xtree.c
<<
>>
Prefs
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2005
   3 *
   4 *   This program is free software;  you can redistribute it and/or modify
   5 *   it under the terms of the GNU General Public License as published by
   6 *   the Free Software Foundation; either version 2 of the License, or
   7 *   (at your option) any later version.
   8 *
   9 *   This program is distributed in the hope that it will be useful,
  10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  12 *   the GNU General Public License for more details.
  13 *
  14 *   You should have received a copy of the GNU General Public License
  15 *   along with this program;  if not, write to the Free Software
  16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 */
  18/*
  19 *      jfs_xtree.c: extent allocation descriptor B+-tree manager
  20 */
  21
  22#include <linux/fs.h>
  23#include <linux/quotaops.h>
  24#include "jfs_incore.h"
  25#include "jfs_filsys.h"
  26#include "jfs_metapage.h"
  27#include "jfs_dmap.h"
  28#include "jfs_dinode.h"
  29#include "jfs_superblock.h"
  30#include "jfs_debug.h"
  31
  32/*
  33 * xtree local flag
  34 */
  35#define XT_INSERT       0x00000001
  36
  37/*
  38 *      xtree key/entry comparison: extent offset
  39 *
  40 * return:
  41 *      -1: k < start of extent
  42 *       0: start_of_extent <= k <= end_of_extent
  43 *       1: k > end_of_extent
  44 */
  45#define XT_CMP(CMP, K, X, OFFSET64)\
  46{\
  47        OFFSET64 = offsetXAD(X);\
  48        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
  49                ((K) < OFFSET64) ? -1 : 0;\
  50}
  51
  52/* write a xad entry */
  53#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
  54{\
  55        (XAD)->flag = (FLAG);\
  56        XADoffset((XAD), (OFF));\
  57        XADlength((XAD), (LEN));\
  58        XADaddress((XAD), (ADDR));\
  59}
  60
  61#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
  62
  63/* get page buffer for specified block address */
  64/* ToDo: Replace this ugly macro with a function */
  65#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
  66{\
  67        BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
  68        if (!(RC))\
  69        {\
  70                if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
  71                    (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
  72                    (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
  73                {\
  74                        jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
  75                        BT_PUTPAGE(MP);\
  76                        MP = NULL;\
  77                        RC = -EIO;\
  78                }\
  79        }\
  80}
  81
  82/* for consistency */
  83#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
  84
  85#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
  86        BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
  87/* xtree entry parameter descriptor */
  88struct xtsplit {
  89        struct metapage *mp;
  90        s16 index;
  91        u8 flag;
  92        s64 off;
  93        s64 addr;
  94        int len;
  95        struct pxdlist *pxdlist;
  96};
  97
  98
  99/*
 100 *      statistics
 101 */
 102#ifdef CONFIG_JFS_STATISTICS
 103static struct {
 104        uint search;
 105        uint fastSearch;
 106        uint split;
 107} xtStat;
 108#endif
 109
 110
 111/*
 112 * forward references
 113 */
 114static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
 115                    struct btstack * btstack, int flag);
 116
 117static int xtSplitUp(tid_t tid,
 118                     struct inode *ip,
 119                     struct xtsplit * split, struct btstack * btstack);
 120
 121static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
 122                       struct metapage ** rmpp, s64 * rbnp);
 123
 124static int xtSplitRoot(tid_t tid, struct inode *ip,
 125                       struct xtsplit * split, struct metapage ** rmpp);
 126
 127#ifdef _STILL_TO_PORT
 128static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 129                      xtpage_t * fp, struct btstack * btstack);
 130
 131static int xtSearchNode(struct inode *ip,
 132                        xad_t * xad,
 133                        int *cmpp, struct btstack * btstack, int flag);
 134
 135static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 136#endif                          /*  _STILL_TO_PORT */
 137
 138/*
 139 *      xtLookup()
 140 *
 141 * function: map a single page into a physical extent;
 142 */
 143int xtLookup(struct inode *ip, s64 lstart,
 144             s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
 145{
 146        int rc = 0;
 147        struct btstack btstack;
 148        int cmp;
 149        s64 bn;
 150        struct metapage *mp;
 151        xtpage_t *p;
 152        int index;
 153        xad_t *xad;
 154        s64 next, size, xoff, xend;
 155        int xlen;
 156        s64 xaddr;
 157
 158        *paddr = 0;
 159        *plen = llen;
 160
 161        if (!no_check) {
 162                /* is lookup offset beyond eof ? */
 163                size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
 164                    JFS_SBI(ip->i_sb)->l2bsize;
 165                if (lstart >= size) {
 166                        jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
 167                                (ulong) lstart, (ulong) size);
 168                        return 0;
 169                }
 170        }
 171
 172        /*
 173         * search for the xad entry covering the logical extent
 174         */
 175//search:
 176        if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
 177                jfs_err("xtLookup: xtSearch returned %d", rc);
 178                return rc;
 179        }
 180
 181        /*
 182         *      compute the physical extent covering logical extent
 183         *
 184         * N.B. search may have failed (e.g., hole in sparse file),
 185         * and returned the index of the next entry.
 186         */
 187        /* retrieve search result */
 188        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 189
 190        /* is xad found covering start of logical extent ?
 191         * lstart is a page start address,
 192         * i.e., lstart cannot start in a hole;
 193         */
 194        if (cmp) {
 195                if (next)
 196                        *plen = min(next - lstart, llen);
 197                goto out;
 198        }
 199
 200        /*
 201         * lxd covered by xad
 202         */
 203        xad = &p->xad[index];
 204        xoff = offsetXAD(xad);
 205        xlen = lengthXAD(xad);
 206        xend = xoff + xlen;
 207        xaddr = addressXAD(xad);
 208
 209        /* initialize new pxd */
 210        *pflag = xad->flag;
 211        *paddr = xaddr + (lstart - xoff);
 212        /* a page must be fully covered by an xad */
 213        *plen = min(xend - lstart, llen);
 214
 215      out:
 216        XT_PUTPAGE(mp);
 217
 218        return rc;
 219}
 220
 221
 222/*
 223 *      xtLookupList()
 224 *
 225 * function: map a single logical extent into a list of physical extent;
 226 *
 227 * parameter:
 228 *      struct inode    *ip,
 229 *      struct lxdlist  *lxdlist,       lxd list (in)
 230 *      struct xadlist  *xadlist,       xad list (in/out)
 231 *      int             flag)
 232 *
 233 * coverage of lxd by xad under assumption of
 234 * . lxd's are ordered and disjoint.
 235 * . xad's are ordered and disjoint.
 236 *
 237 * return:
 238 *      0:      success
 239 *
 240 * note: a page being written (even a single byte) is backed fully,
 241 *      except the last page which is only backed with blocks
 242 *      required to cover the last byte;
 243 *      the extent backing a page is fully contained within an xad;
 244 */
 245int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 246                 struct xadlist * xadlist, int flag)
 247{
 248        int rc = 0;
 249        struct btstack btstack;
 250        int cmp;
 251        s64 bn;
 252        struct metapage *mp;
 253        xtpage_t *p;
 254        int index;
 255        lxd_t *lxd;
 256        xad_t *xad, *pxd;
 257        s64 size, lstart, lend, xstart, xend, pstart;
 258        s64 llen, xlen, plen;
 259        s64 xaddr, paddr;
 260        int nlxd, npxd, maxnpxd;
 261
 262        npxd = xadlist->nxad = 0;
 263        maxnpxd = xadlist->maxnxad;
 264        pxd = xadlist->xad;
 265
 266        nlxd = lxdlist->nlxd;
 267        lxd = lxdlist->lxd;
 268
 269        lstart = offsetLXD(lxd);
 270        llen = lengthLXD(lxd);
 271        lend = lstart + llen;
 272
 273        size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
 274            JFS_SBI(ip->i_sb)->l2bsize;
 275
 276        /*
 277         * search for the xad entry covering the logical extent
 278         */
 279      search:
 280        if (lstart >= size)
 281                return 0;
 282
 283        if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
 284                return rc;
 285
 286        /*
 287         *      compute the physical extent covering logical extent
 288         *
 289         * N.B. search may have failed (e.g., hole in sparse file),
 290         * and returned the index of the next entry.
 291         */
 292//map:
 293        /* retrieve search result */
 294        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 295
 296        /* is xad on the next sibling page ? */
 297        if (index == le16_to_cpu(p->header.nextindex)) {
 298                if (p->header.flag & BT_ROOT)
 299                        goto mapend;
 300
 301                if ((bn = le64_to_cpu(p->header.next)) == 0)
 302                        goto mapend;
 303
 304                XT_PUTPAGE(mp);
 305
 306                /* get next sibling page */
 307                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 308                if (rc)
 309                        return rc;
 310
 311                index = XTENTRYSTART;
 312        }
 313
 314        xad = &p->xad[index];
 315
 316        /*
 317         * is lxd covered by xad ?
 318         */
 319      compare:
 320        xstart = offsetXAD(xad);
 321        xlen = lengthXAD(xad);
 322        xend = xstart + xlen;
 323        xaddr = addressXAD(xad);
 324
 325      compare1:
 326        if (xstart < lstart)
 327                goto compare2;
 328
 329        /* (lstart <= xstart) */
 330
 331        /* lxd is NOT covered by xad */
 332        if (lend <= xstart) {
 333                /*
 334                 * get next lxd
 335                 */
 336                if (--nlxd == 0)
 337                        goto mapend;
 338                lxd++;
 339
 340                lstart = offsetLXD(lxd);
 341                llen = lengthLXD(lxd);
 342                lend = lstart + llen;
 343                if (lstart >= size)
 344                        goto mapend;
 345
 346                /* compare with the current xad */
 347                goto compare1;
 348        }
 349        /* lxd is covered by xad */
 350        else {                  /* (xstart < lend) */
 351
 352                /* initialize new pxd */
 353                pstart = xstart;
 354                plen = min(lend - xstart, xlen);
 355                paddr = xaddr;
 356
 357                goto cover;
 358        }
 359
 360        /* (xstart < lstart) */
 361      compare2:
 362        /* lxd is covered by xad */
 363        if (lstart < xend) {
 364                /* initialize new pxd */
 365                pstart = lstart;
 366                plen = min(xend - lstart, llen);
 367                paddr = xaddr + (lstart - xstart);
 368
 369                goto cover;
 370        }
 371        /* lxd is NOT covered by xad */
 372        else {                  /* (xend <= lstart) */
 373
 374                /*
 375                 * get next xad
 376                 *
 377                 * linear search next xad covering lxd on
 378                 * the current xad page, and then tree search
 379                 */
 380                if (index == le16_to_cpu(p->header.nextindex) - 1) {
 381                        if (p->header.flag & BT_ROOT)
 382                                goto mapend;
 383
 384                        XT_PUTPAGE(mp);
 385                        goto search;
 386                } else {
 387                        index++;
 388                        xad++;
 389
 390                        /* compare with new xad */
 391                        goto compare;
 392                }
 393        }
 394
 395        /*
 396         * lxd is covered by xad and a new pxd has been initialized
 397         * (lstart <= xstart < lend) or (xstart < lstart < xend)
 398         */
 399      cover:
 400        /* finalize pxd corresponding to current xad */
 401        XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
 402
 403        if (++npxd >= maxnpxd)
 404                goto mapend;
 405        pxd++;
 406
 407        /*
 408         * lxd is fully covered by xad
 409         */
 410        if (lend <= xend) {
 411                /*
 412                 * get next lxd
 413                 */
 414                if (--nlxd == 0)
 415                        goto mapend;
 416                lxd++;
 417
 418                lstart = offsetLXD(lxd);
 419                llen = lengthLXD(lxd);
 420                lend = lstart + llen;
 421                if (lstart >= size)
 422                        goto mapend;
 423
 424                /*
 425                 * test for old xad covering new lxd
 426                 * (old xstart < new lstart)
 427                 */
 428                goto compare2;
 429        }
 430        /*
 431         * lxd is partially covered by xad
 432         */
 433        else {                  /* (xend < lend) */
 434
 435                /*
 436                 * get next xad
 437                 *
 438                 * linear search next xad covering lxd on
 439                 * the current xad page, and then next xad page search
 440                 */
 441                if (index == le16_to_cpu(p->header.nextindex) - 1) {
 442                        if (p->header.flag & BT_ROOT)
 443                                goto mapend;
 444
 445                        if ((bn = le64_to_cpu(p->header.next)) == 0)
 446                                goto mapend;
 447
 448                        XT_PUTPAGE(mp);
 449
 450                        /* get next sibling page */
 451                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 452                        if (rc)
 453                                return rc;
 454
 455                        index = XTENTRYSTART;
 456                        xad = &p->xad[index];
 457                } else {
 458                        index++;
 459                        xad++;
 460                }
 461
 462                /*
 463                 * test for new xad covering old lxd
 464                 * (old lstart < new xstart)
 465                 */
 466                goto compare;
 467        }
 468
 469      mapend:
 470        xadlist->nxad = npxd;
 471
 472//out:
 473        XT_PUTPAGE(mp);
 474
 475        return rc;
 476}
 477
 478
 479/*
 480 *      xtSearch()
 481 *
 482 * function:    search for the xad entry covering specified offset.
 483 *
 484 * parameters:
 485 *      ip      - file object;
 486 *      xoff    - extent offset;
 487 *      nextp   - address of next extent (if any) for search miss
 488 *      cmpp    - comparison result:
 489 *      btstack - traverse stack;
 490 *      flag    - search process flag (XT_INSERT);
 491 *
 492 * returns:
 493 *      btstack contains (bn, index) of search path traversed to the entry.
 494 *      *cmpp is set to result of comparison with the entry returned.
 495 *      the page containing the entry is pinned at exit.
 496 */
 497static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
 498                    int *cmpp, struct btstack * btstack, int flag)
 499{
 500        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 501        int rc = 0;
 502        int cmp = 1;            /* init for empty page */
 503        s64 bn;                 /* block number */
 504        struct metapage *mp;    /* page buffer */
 505        xtpage_t *p;            /* page */
 506        xad_t *xad;
 507        int base, index, lim, btindex;
 508        struct btframe *btsp;
 509        int nsplit = 0;         /* number of pages to split */
 510        s64 t64;
 511        s64 next = 0;
 512
 513        INCREMENT(xtStat.search);
 514
 515        BT_CLR(btstack);
 516
 517        btstack->nsplit = 0;
 518
 519        /*
 520         *      search down tree from root:
 521         *
 522         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 523         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 524         *
 525         * if entry with search key K is not found
 526         * internal page search find the entry with largest key Ki
 527         * less than K which point to the child page to search;
 528         * leaf page search find the entry with smallest key Kj
 529         * greater than K so that the returned index is the position of
 530         * the entry to be shifted right for insertion of new entry.
 531         * for empty tree, search key is greater than any key of the tree.
 532         *
 533         * by convention, root bn = 0.
 534         */
 535        for (bn = 0;;) {
 536                /* get/pin the page to search */
 537                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 538                if (rc)
 539                        return rc;
 540
 541                /* try sequential access heuristics with the previous
 542                 * access entry in target leaf page:
 543                 * once search narrowed down into the target leaf,
 544                 * key must either match an entry in the leaf or
 545                 * key entry does not exist in the tree;
 546                 */
 547//fastSearch:
 548                if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
 549                    (p->header.flag & BT_LEAF) &&
 550                    (index = jfs_ip->btindex) <
 551                    le16_to_cpu(p->header.nextindex)) {
 552                        xad = &p->xad[index];
 553                        t64 = offsetXAD(xad);
 554                        if (xoff < t64 + lengthXAD(xad)) {
 555                                if (xoff >= t64) {
 556                                        *cmpp = 0;
 557                                        goto out;
 558                                }
 559
 560                                /* stop sequential access heuristics */
 561                                goto binarySearch;
 562                        } else {        /* (t64 + lengthXAD(xad)) <= xoff */
 563
 564                                /* try next sequential entry */
 565                                index++;
 566                                if (index <
 567                                    le16_to_cpu(p->header.nextindex)) {
 568                                        xad++;
 569                                        t64 = offsetXAD(xad);
 570                                        if (xoff < t64 + lengthXAD(xad)) {
 571                                                if (xoff >= t64) {
 572                                                        *cmpp = 0;
 573                                                        goto out;
 574                                                }
 575
 576                                                /* miss: key falls between
 577                                                 * previous and this entry
 578                                                 */
 579                                                *cmpp = 1;
 580                                                next = t64;
 581                                                goto out;
 582                                        }
 583
 584                                        /* (xoff >= t64 + lengthXAD(xad));
 585                                         * matching entry may be further out:
 586                                         * stop heuristic search
 587                                         */
 588                                        /* stop sequential access heuristics */
 589                                        goto binarySearch;
 590                                }
 591
 592                                /* (index == p->header.nextindex);
 593                                 * miss: key entry does not exist in
 594                                 * the target leaf/tree
 595                                 */
 596                                *cmpp = 1;
 597                                goto out;
 598                        }
 599
 600                        /*
 601                         * if hit, return index of the entry found, and
 602                         * if miss, where new entry with search key is
 603                         * to be inserted;
 604                         */
 605                      out:
 606                        /* compute number of pages to split */
 607                        if (flag & XT_INSERT) {
 608                                if (p->header.nextindex ==      /* little-endian */
 609                                    p->header.maxentry)
 610                                        nsplit++;
 611                                else
 612                                        nsplit = 0;
 613                                btstack->nsplit = nsplit;
 614                        }
 615
 616                        /* save search result */
 617                        btsp = btstack->top;
 618                        btsp->bn = bn;
 619                        btsp->index = index;
 620                        btsp->mp = mp;
 621
 622                        /* update sequential access heuristics */
 623                        jfs_ip->btindex = index;
 624
 625                        if (nextp)
 626                                *nextp = next;
 627
 628                        INCREMENT(xtStat.fastSearch);
 629                        return 0;
 630                }
 631
 632                /* well, ... full search now */
 633              binarySearch:
 634                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
 635
 636                /*
 637                 * binary search with search key K on the current page
 638                 */
 639                for (base = XTENTRYSTART; lim; lim >>= 1) {
 640                        index = base + (lim >> 1);
 641
 642                        XT_CMP(cmp, xoff, &p->xad[index], t64);
 643                        if (cmp == 0) {
 644                                /*
 645                                 *      search hit
 646                                 */
 647                                /* search hit - leaf page:
 648                                 * return the entry found
 649                                 */
 650                                if (p->header.flag & BT_LEAF) {
 651                                        *cmpp = cmp;
 652
 653                                        /* compute number of pages to split */
 654                                        if (flag & XT_INSERT) {
 655                                                if (p->header.nextindex ==
 656                                                    p->header.maxentry)
 657                                                        nsplit++;
 658                                                else
 659                                                        nsplit = 0;
 660                                                btstack->nsplit = nsplit;
 661                                        }
 662
 663                                        /* save search result */
 664                                        btsp = btstack->top;
 665                                        btsp->bn = bn;
 666                                        btsp->index = index;
 667                                        btsp->mp = mp;
 668
 669                                        /* init sequential access heuristics */
 670                                        btindex = jfs_ip->btindex;
 671                                        if (index == btindex ||
 672                                            index == btindex + 1)
 673                                                jfs_ip->btorder = BT_SEQUENTIAL;
 674                                        else
 675                                                jfs_ip->btorder = BT_RANDOM;
 676                                        jfs_ip->btindex = index;
 677
 678                                        return 0;
 679                                }
 680                                /* search hit - internal page:
 681                                 * descend/search its child page
 682                                 */
 683                                if (index < le16_to_cpu(p->header.nextindex)-1)
 684                                        next = offsetXAD(&p->xad[index + 1]);
 685                                goto next;
 686                        }
 687
 688                        if (cmp > 0) {
 689                                base = index + 1;
 690                                --lim;
 691                        }
 692                }
 693
 694                /*
 695                 *      search miss
 696                 *
 697                 * base is the smallest index with key (Kj) greater than
 698                 * search key (K) and may be zero or maxentry index.
 699                 */
 700                if (base < le16_to_cpu(p->header.nextindex))
 701                        next = offsetXAD(&p->xad[base]);
 702                /*
 703                 * search miss - leaf page:
 704                 *
 705                 * return location of entry (base) where new entry with
 706                 * search key K is to be inserted.
 707                 */
 708                if (p->header.flag & BT_LEAF) {
 709                        *cmpp = cmp;
 710
 711                        /* compute number of pages to split */
 712                        if (flag & XT_INSERT) {
 713                                if (p->header.nextindex ==
 714                                    p->header.maxentry)
 715                                        nsplit++;
 716                                else
 717                                        nsplit = 0;
 718                                btstack->nsplit = nsplit;
 719                        }
 720
 721                        /* save search result */
 722                        btsp = btstack->top;
 723                        btsp->bn = bn;
 724                        btsp->index = base;
 725                        btsp->mp = mp;
 726
 727                        /* init sequential access heuristics */
 728                        btindex = jfs_ip->btindex;
 729                        if (base == btindex || base == btindex + 1)
 730                                jfs_ip->btorder = BT_SEQUENTIAL;
 731                        else
 732                                jfs_ip->btorder = BT_RANDOM;
 733                        jfs_ip->btindex = base;
 734
 735                        if (nextp)
 736                                *nextp = next;
 737
 738                        return 0;
 739                }
 740
 741                /*
 742                 * search miss - non-leaf page:
 743                 *
 744                 * if base is non-zero, decrement base by one to get the parent
 745                 * entry of the child page to search.
 746                 */
 747                index = base ? base - 1 : base;
 748
 749                /*
 750                 * go down to child page
 751                 */
 752              next:
 753                /* update number of pages to split */
 754                if (p->header.nextindex == p->header.maxentry)
 755                        nsplit++;
 756                else
 757                        nsplit = 0;
 758
 759                /* push (bn, index) of the parent page/entry */
 760                if (BT_STACK_FULL(btstack)) {
 761                        jfs_error(ip->i_sb, "stack overrun in xtSearch!");
 762                        XT_PUTPAGE(mp);
 763                        return -EIO;
 764                }
 765                BT_PUSH(btstack, bn, index);
 766
 767                /* get the child page block number */
 768                bn = addressXAD(&p->xad[index]);
 769
 770                /* unpin the parent page */
 771                XT_PUTPAGE(mp);
 772        }
 773}
 774
 775/*
 776 *      xtInsert()
 777 *
 778 * function:
 779 *
 780 * parameter:
 781 *      tid     - transaction id;
 782 *      ip      - file object;
 783 *      xflag   - extent flag (XAD_NOTRECORDED):
 784 *      xoff    - extent offset;
 785 *      xlen    - extent length;
 786 *      xaddrp  - extent address pointer (in/out):
 787 *              if (*xaddrp)
 788 *                      caller allocated data extent at *xaddrp;
 789 *              else
 790 *                      allocate data extent and return its xaddr;
 791 *      flag    -
 792 *
 793 * return:
 794 */
 795int xtInsert(tid_t tid,         /* transaction id */
 796             struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
 797             int flag)
 798{
 799        int rc = 0;
 800        s64 xaddr, hint;
 801        struct metapage *mp;    /* meta-page buffer */
 802        xtpage_t *p;            /* base B+-tree index page */
 803        s64 bn;
 804        int index, nextindex;
 805        struct btstack btstack; /* traverse stack */
 806        struct xtsplit split;   /* split information */
 807        xad_t *xad;
 808        int cmp;
 809        s64 next;
 810        struct tlock *tlck;
 811        struct xtlock *xtlck;
 812
 813        jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
 814
 815        /*
 816         *      search for the entry location at which to insert:
 817         *
 818         * xtFastSearch() and xtSearch() both returns (leaf page
 819         * pinned, index at which to insert).
 820         * n.b. xtSearch() may return index of maxentry of
 821         * the full page.
 822         */
 823        if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
 824                return rc;
 825
 826        /* retrieve search result */
 827        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 828
 829        /* This test must follow XT_GETSEARCH since mp must be valid if
 830         * we branch to out: */
 831        if ((cmp == 0) || (next && (xlen > next - xoff))) {
 832                rc = -EEXIST;
 833                goto out;
 834        }
 835
 836        /*
 837         * allocate data extent requested
 838         *
 839         * allocation hint: last xad
 840         */
 841        if ((xaddr = *xaddrp) == 0) {
 842                if (index > XTENTRYSTART) {
 843                        xad = &p->xad[index - 1];
 844                        hint = addressXAD(xad) + lengthXAD(xad) - 1;
 845                } else
 846                        hint = 0;
 847                if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen)))
 848                        goto out;
 849                if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
 850                        DQUOT_FREE_BLOCK(ip, xlen);
 851                        goto out;
 852                }
 853        }
 854
 855        /*
 856         *      insert entry for new extent
 857         */
 858        xflag |= XAD_NEW;
 859
 860        /*
 861         *      if the leaf page is full, split the page and
 862         *      propagate up the router entry for the new page from split
 863         *
 864         * The xtSplitUp() will insert the entry and unpin the leaf page.
 865         */
 866        nextindex = le16_to_cpu(p->header.nextindex);
 867        if (nextindex == le16_to_cpu(p->header.maxentry)) {
 868                split.mp = mp;
 869                split.index = index;
 870                split.flag = xflag;
 871                split.off = xoff;
 872                split.len = xlen;
 873                split.addr = xaddr;
 874                split.pxdlist = NULL;
 875                if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
 876                        /* undo data extent allocation */
 877                        if (*xaddrp == 0) {
 878                                dbFree(ip, xaddr, (s64) xlen);
 879                                DQUOT_FREE_BLOCK(ip, xlen);
 880                        }
 881                        return rc;
 882                }
 883
 884                *xaddrp = xaddr;
 885                return 0;
 886        }
 887
 888        /*
 889         *      insert the new entry into the leaf page
 890         */
 891        /*
 892         * acquire a transaction lock on the leaf page;
 893         *
 894         * action: xad insertion/extension;
 895         */
 896        BT_MARK_DIRTY(mp, ip);
 897
 898        /* if insert into middle, shift right remaining entries. */
 899        if (index < nextindex)
 900                memmove(&p->xad[index + 1], &p->xad[index],
 901                        (nextindex - index) * sizeof(xad_t));
 902
 903        /* insert the new entry: mark the entry NEW */
 904        xad = &p->xad[index];
 905        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 906
 907        /* advance next available entry index */
 908        p->header.nextindex =
 909            cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
 910
 911        /* Don't log it if there are no links to the file */
 912        if (!test_cflag(COMMIT_Nolink, ip)) {
 913                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 914                xtlck = (struct xtlock *) & tlck->lock;
 915                xtlck->lwm.offset =
 916                    (xtlck->lwm.offset) ? min(index,
 917                                              (int)xtlck->lwm.offset) : index;
 918                xtlck->lwm.length =
 919                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
 920        }
 921
 922        *xaddrp = xaddr;
 923
 924      out:
 925        /* unpin the leaf page */
 926        XT_PUTPAGE(mp);
 927
 928        return rc;
 929}
 930
 931
 932/*
 933 *      xtSplitUp()
 934 *
 935 * function:
 936 *      split full pages as propagating insertion up the tree
 937 *
 938 * parameter:
 939 *      tid     - transaction id;
 940 *      ip      - file object;
 941 *      split   - entry parameter descriptor;
 942 *      btstack - traverse stack from xtSearch()
 943 *
 944 * return:
 945 */
 946static int
 947xtSplitUp(tid_t tid,
 948          struct inode *ip, struct xtsplit * split, struct btstack * btstack)
 949{
 950        int rc = 0;
 951        struct metapage *smp;
 952        xtpage_t *sp;           /* split page */
 953        struct metapage *rmp;
 954        s64 rbn;                /* new right page block number */
 955        struct metapage *rcmp;
 956        xtpage_t *rcp;          /* right child page */
 957        s64 rcbn;               /* right child page block number */
 958        int skip;               /* index of entry of insertion */
 959        int nextindex;          /* next available entry index of p */
 960        struct btframe *parent; /* parent page entry on traverse stack */
 961        xad_t *xad;
 962        s64 xaddr;
 963        int xlen;
 964        int nsplit;             /* number of pages split */
 965        struct pxdlist pxdlist;
 966        pxd_t *pxd;
 967        struct tlock *tlck;
 968        struct xtlock *xtlck;
 969
 970        smp = split->mp;
 971        sp = XT_PAGE(ip, smp);
 972
 973        /* is inode xtree root extension/inline EA area free ? */
 974        if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
 975            (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
 976            (JFS_IP(ip)->mode2 & INLINEEA)) {
 977                sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
 978                JFS_IP(ip)->mode2 &= ~INLINEEA;
 979
 980                BT_MARK_DIRTY(smp, ip);
 981                /*
 982                 * acquire a transaction lock on the leaf page;
 983                 *
 984                 * action: xad insertion/extension;
 985                 */
 986
 987                /* if insert into middle, shift right remaining entries. */
 988                skip = split->index;
 989                nextindex = le16_to_cpu(sp->header.nextindex);
 990                if (skip < nextindex)
 991                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
 992                                (nextindex - skip) * sizeof(xad_t));
 993
 994                /* insert the new entry: mark the entry NEW */
 995                xad = &sp->xad[skip];
 996                XT_PUTENTRY(xad, split->flag, split->off, split->len,
 997                            split->addr);
 998
 999                /* advance next available entry index */
1000                sp->header.nextindex =
1001                    cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
1002
1003                /* Don't log it if there are no links to the file */
1004                if (!test_cflag(COMMIT_Nolink, ip)) {
1005                        tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1006                        xtlck = (struct xtlock *) & tlck->lock;
1007                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
1008                            min(skip, (int)xtlck->lwm.offset) : skip;
1009                        xtlck->lwm.length =
1010                            le16_to_cpu(sp->header.nextindex) -
1011                            xtlck->lwm.offset;
1012                }
1013
1014                return 0;
1015        }
1016
1017        /*
1018         * allocate new index blocks to cover index page split(s)
1019         *
1020         * allocation hint: ?
1021         */
1022        if (split->pxdlist == NULL) {
1023                nsplit = btstack->nsplit;
1024                split->pxdlist = &pxdlist;
1025                pxdlist.maxnpxd = pxdlist.npxd = 0;
1026                pxd = &pxdlist.pxd[0];
1027                xlen = JFS_SBI(ip->i_sb)->nbperpage;
1028                for (; nsplit > 0; nsplit--, pxd++) {
1029                        if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
1030                            == 0) {
1031                                PXDaddress(pxd, xaddr);
1032                                PXDlength(pxd, xlen);
1033
1034                                pxdlist.maxnpxd++;
1035
1036                                continue;
1037                        }
1038
1039                        /* undo allocation */
1040
1041                        XT_PUTPAGE(smp);
1042                        return rc;
1043                }
1044        }
1045
1046        /*
1047         * Split leaf page <sp> into <sp> and a new right page <rp>.
1048         *
1049         * The split routines insert the new entry into the leaf page,
1050         * and acquire txLock as appropriate.
1051         * return <rp> pinned and its block number <rpbn>.
1052         */
1053        rc = (sp->header.flag & BT_ROOT) ?
1054            xtSplitRoot(tid, ip, split, &rmp) :
1055            xtSplitPage(tid, ip, split, &rmp, &rbn);
1056
1057        XT_PUTPAGE(smp);
1058
1059        if (rc)
1060                return -EIO;
1061        /*
1062         * propagate up the router entry for the leaf page just split
1063         *
1064         * insert a router entry for the new page into the parent page,
1065         * propagate the insert/split up the tree by walking back the stack
1066         * of (bn of parent page, index of child page entry in parent page)
1067         * that were traversed during the search for the page that split.
1068         *
1069         * the propagation of insert/split up the tree stops if the root
1070         * splits or the page inserted into doesn't have to split to hold
1071         * the new entry.
1072         *
1073         * the parent entry for the split page remains the same, and
1074         * a new entry is inserted at its right with the first key and
1075         * block number of the new right page.
1076         *
1077         * There are a maximum of 3 pages pinned at any time:
1078         * right child, left parent and right parent (when the parent splits)
1079         * to keep the child page pinned while working on the parent.
1080         * make sure that all pins are released at exit.
1081         */
1082        while ((parent = BT_POP(btstack)) != NULL) {
1083                /* parent page specified by stack frame <parent> */
1084
1085                /* keep current child pages <rcp> pinned */
1086                rcmp = rmp;
1087                rcbn = rbn;
1088                rcp = XT_PAGE(ip, rcmp);
1089
1090                /*
1091                 * insert router entry in parent for new right child page <rp>
1092                 */
1093                /* get/pin the parent page <sp> */
1094                XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1095                if (rc) {
1096                        XT_PUTPAGE(rcmp);
1097                        return rc;
1098                }
1099
1100                /*
1101                 * The new key entry goes ONE AFTER the index of parent entry,
1102                 * because the split was to the right.
1103                 */
1104                skip = parent->index + 1;
1105
1106                /*
1107                 * split or shift right remaining entries of the parent page
1108                 */
1109                nextindex = le16_to_cpu(sp->header.nextindex);
1110                /*
1111                 * parent page is full - split the parent page
1112                 */
1113                if (nextindex == le16_to_cpu(sp->header.maxentry)) {
1114                        /* init for parent page split */
1115                        split->mp = smp;
1116                        split->index = skip;    /* index at insert */
1117                        split->flag = XAD_NEW;
1118                        split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
1119                        split->len = JFS_SBI(ip->i_sb)->nbperpage;
1120                        split->addr = rcbn;
1121
1122                        /* unpin previous right child page */
1123                        XT_PUTPAGE(rcmp);
1124
1125                        /* The split routines insert the new entry,
1126                         * and acquire txLock as appropriate.
1127                         * return <rp> pinned and its block number <rpbn>.
1128                         */
1129                        rc = (sp->header.flag & BT_ROOT) ?
1130                            xtSplitRoot(tid, ip, split, &rmp) :
1131                            xtSplitPage(tid, ip, split, &rmp, &rbn);
1132                        if (rc) {
1133                                XT_PUTPAGE(smp);
1134                                return rc;
1135                        }
1136
1137                        XT_PUTPAGE(smp);
1138                        /* keep new child page <rp> pinned */
1139                }
1140                /*
1141                 * parent page is not full - insert in parent page
1142                 */
1143                else {
1144                        /*
1145                         * insert router entry in parent for the right child
1146                         * page from the first entry of the right child page:
1147                         */
1148                        /*
1149                         * acquire a transaction lock on the parent page;
1150                         *
1151                         * action: router xad insertion;
1152                         */
1153                        BT_MARK_DIRTY(smp, ip);
1154
1155                        /*
1156                         * if insert into middle, shift right remaining entries
1157                         */
1158                        if (skip < nextindex)
1159                                memmove(&sp->xad[skip + 1], &sp->xad[skip],
1160                                        (nextindex -
1161                                         skip) << L2XTSLOTSIZE);
1162
1163                        /* insert the router entry */
1164                        xad = &sp->xad[skip];
1165                        XT_PUTENTRY(xad, XAD_NEW,
1166                                    offsetXAD(&rcp->xad[XTENTRYSTART]),
1167                                    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
1168
1169                        /* advance next available entry index. */
1170                        sp->header.nextindex =
1171                            cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
1172                                        1);
1173
1174                        /* Don't log it if there are no links to the file */
1175                        if (!test_cflag(COMMIT_Nolink, ip)) {
1176                                tlck = txLock(tid, ip, smp,
1177                                              tlckXTREE | tlckGROW);
1178                                xtlck = (struct xtlock *) & tlck->lock;
1179                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
1180                                    min(skip, (int)xtlck->lwm.offset) : skip;
1181                                xtlck->lwm.length =
1182                                    le16_to_cpu(sp->header.nextindex) -
1183                                    xtlck->lwm.offset;
1184                        }
1185
1186                        /* unpin parent page */
1187                        XT_PUTPAGE(smp);
1188
1189                        /* exit propagate up */
1190                        break;
1191                }
1192        }
1193
1194        /* unpin current right page */
1195        XT_PUTPAGE(rmp);
1196
1197        return 0;
1198}
1199
1200
1201/*
1202 *      xtSplitPage()
1203 *
1204 * function:
1205 *      split a full non-root page into
1206 *      original/split/left page and new right page
1207 *      i.e., the original/split page remains as left page.
1208 *
1209 * parameter:
1210 *      int             tid,
1211 *      struct inode    *ip,
1212 *      struct xtsplit  *split,
1213 *      struct metapage **rmpp,
1214 *      u64             *rbnp,
1215 *
1216 * return:
1217 *      Pointer to page in which to insert or NULL on error.
1218 */
1219static int
1220xtSplitPage(tid_t tid, struct inode *ip,
1221            struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
1222{
1223        int rc = 0;
1224        struct metapage *smp;
1225        xtpage_t *sp;
1226        struct metapage *rmp;
1227        xtpage_t *rp;           /* new right page allocated */
1228        s64 rbn;                /* new right page block number */
1229        struct metapage *mp;
1230        xtpage_t *p;
1231        s64 nextbn;
1232        int skip, maxentry, middle, righthalf, n;
1233        xad_t *xad;
1234        struct pxdlist *pxdlist;
1235        pxd_t *pxd;
1236        struct tlock *tlck;
1237        struct xtlock *sxtlck = NULL, *rxtlck = NULL;
1238        int quota_allocation = 0;
1239
1240        smp = split->mp;
1241        sp = XT_PAGE(ip, smp);
1242
1243        INCREMENT(xtStat.split);
1244
1245        pxdlist = split->pxdlist;
1246        pxd = &pxdlist->pxd[pxdlist->npxd];
1247        pxdlist->npxd++;
1248        rbn = addressPXD(pxd);
1249
1250        /* Allocate blocks to quota. */
1251        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1252                rc = -EDQUOT;
1253                goto clean_up;
1254        }
1255
1256        quota_allocation += lengthPXD(pxd);
1257
1258        /*
1259         * allocate the new right page for the split
1260         */
1261        rmp = get_metapage(ip, rbn, PSIZE, 1);
1262        if (rmp == NULL) {
1263                rc = -EIO;
1264                goto clean_up;
1265        }
1266
1267        jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1268
1269        BT_MARK_DIRTY(rmp, ip);
1270        /*
1271         * action: new page;
1272         */
1273
1274        rp = (xtpage_t *) rmp->data;
1275        rp->header.self = *pxd;
1276        rp->header.flag = sp->header.flag & BT_TYPE;
1277        rp->header.maxentry = sp->header.maxentry;      /* little-endian */
1278        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1279
1280        BT_MARK_DIRTY(smp, ip);
1281        /* Don't log it if there are no links to the file */
1282        if (!test_cflag(COMMIT_Nolink, ip)) {
1283                /*
1284                 * acquire a transaction lock on the new right page;
1285                 */
1286                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1287                rxtlck = (struct xtlock *) & tlck->lock;
1288                rxtlck->lwm.offset = XTENTRYSTART;
1289                /*
1290                 * acquire a transaction lock on the split page
1291                 */
1292                tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1293                sxtlck = (struct xtlock *) & tlck->lock;
1294        }
1295
1296        /*
1297         * initialize/update sibling pointers of <sp> and <rp>
1298         */
1299        nextbn = le64_to_cpu(sp->header.next);
1300        rp->header.next = cpu_to_le64(nextbn);
1301        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1302        sp->header.next = cpu_to_le64(rbn);
1303
1304        skip = split->index;
1305
1306        /*
1307         *      sequential append at tail (after last entry of last page)
1308         *
1309         * if splitting the last page on a level because of appending
1310         * a entry to it (skip is maxentry), it's likely that the access is
1311         * sequential. adding an empty page on the side of the level is less
1312         * work and can push the fill factor much higher than normal.
1313         * if we're wrong it's no big deal -  we will do the split the right
1314         * way next time.
1315         * (it may look like it's equally easy to do a similar hack for
1316         * reverse sorted data, that is, split the tree left, but it's not.
1317         * Be my guest.)
1318         */
1319        if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1320                /*
1321                 * acquire a transaction lock on the new/right page;
1322                 *
1323                 * action: xad insertion;
1324                 */
1325                /* insert entry at the first entry of the new right page */
1326                xad = &rp->xad[XTENTRYSTART];
1327                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1328                            split->addr);
1329
1330                rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1331
1332                if (!test_cflag(COMMIT_Nolink, ip)) {
1333                        /* rxtlck->lwm.offset = XTENTRYSTART; */
1334                        rxtlck->lwm.length = 1;
1335                }
1336
1337                *rmpp = rmp;
1338                *rbnp = rbn;
1339
1340                jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1341                return 0;
1342        }
1343
1344        /*
1345         *      non-sequential insert (at possibly middle page)
1346         */
1347
1348        /*
1349         * update previous pointer of old next/right page of <sp>
1350         */
1351        if (nextbn != 0) {
1352                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1353                if (rc) {
1354                        XT_PUTPAGE(rmp);
1355                        goto clean_up;
1356                }
1357
1358                BT_MARK_DIRTY(mp, ip);
1359                /*
1360                 * acquire a transaction lock on the next page;
1361                 *
1362                 * action:sibling pointer update;
1363                 */
1364                if (!test_cflag(COMMIT_Nolink, ip))
1365                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1366
1367                p->header.prev = cpu_to_le64(rbn);
1368
1369                /* sibling page may have been updated previously, or
1370                 * it may be updated later;
1371                 */
1372
1373                XT_PUTPAGE(mp);
1374        }
1375
1376        /*
1377         * split the data between the split and new/right pages
1378         */
1379        maxentry = le16_to_cpu(sp->header.maxentry);
1380        middle = maxentry >> 1;
1381        righthalf = maxentry - middle;
1382
1383        /*
1384         * skip index in old split/left page - insert into left page:
1385         */
1386        if (skip <= middle) {
1387                /* move right half of split page to the new right page */
1388                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1389                        righthalf << L2XTSLOTSIZE);
1390
1391                /* shift right tail of left half to make room for new entry */
1392                if (skip < middle)
1393                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
1394                                (middle - skip) << L2XTSLOTSIZE);
1395
1396                /* insert new entry */
1397                xad = &sp->xad[skip];
1398                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1399                            split->addr);
1400
1401                /* update page header */
1402                sp->header.nextindex = cpu_to_le16(middle + 1);
1403                if (!test_cflag(COMMIT_Nolink, ip)) {
1404                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1405                            min(skip, (int)sxtlck->lwm.offset) : skip;
1406                }
1407
1408                rp->header.nextindex =
1409                    cpu_to_le16(XTENTRYSTART + righthalf);
1410        }
1411        /*
1412         * skip index in new right page - insert into right page:
1413         */
1414        else {
1415                /* move left head of right half to right page */
1416                n = skip - middle;
1417                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1418                        n << L2XTSLOTSIZE);
1419
1420                /* insert new entry */
1421                n += XTENTRYSTART;
1422                xad = &rp->xad[n];
1423                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1424                            split->addr);
1425
1426                /* move right tail of right half to right page */
1427                if (skip < maxentry)
1428                        memmove(&rp->xad[n + 1], &sp->xad[skip],
1429                                (maxentry - skip) << L2XTSLOTSIZE);
1430
1431                /* update page header */
1432                sp->header.nextindex = cpu_to_le16(middle);
1433                if (!test_cflag(COMMIT_Nolink, ip)) {
1434                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1435                            min(middle, (int)sxtlck->lwm.offset) : middle;
1436                }
1437
1438                rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1439                                                   righthalf + 1);
1440        }
1441
1442        if (!test_cflag(COMMIT_Nolink, ip)) {
1443                sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1444                    sxtlck->lwm.offset;
1445
1446                /* rxtlck->lwm.offset = XTENTRYSTART; */
1447                rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1448                    XTENTRYSTART;
1449        }
1450
1451        *rmpp = rmp;
1452        *rbnp = rbn;
1453
1454        jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1455        return rc;
1456
1457      clean_up:
1458
1459        /* Rollback quota allocation. */
1460        if (quota_allocation)
1461                DQUOT_FREE_BLOCK(ip, quota_allocation);
1462
1463        return (rc);
1464}
1465
1466
1467/*
1468 *      xtSplitRoot()
1469 *
1470 * function:
1471 *      split the full root page into original/root/split page and new
1472 *      right page
1473 *      i.e., root remains fixed in tree anchor (inode) and the root is
1474 *      copied to a single new right child page since root page <<
1475 *      non-root page, and the split root page contains a single entry
1476 *      for the new right child page.
1477 *
1478 * parameter:
1479 *      int             tid,
1480 *      struct inode    *ip,
1481 *      struct xtsplit  *split,
1482 *      struct metapage **rmpp)
1483 *
1484 * return:
1485 *      Pointer to page in which to insert or NULL on error.
1486 */
1487static int
1488xtSplitRoot(tid_t tid,
1489            struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1490{
1491        xtpage_t *sp;
1492        struct metapage *rmp;
1493        xtpage_t *rp;
1494        s64 rbn;
1495        int skip, nextindex;
1496        xad_t *xad;
1497        pxd_t *pxd;
1498        struct pxdlist *pxdlist;
1499        struct tlock *tlck;
1500        struct xtlock *xtlck;
1501
1502        sp = &JFS_IP(ip)->i_xtroot;
1503
1504        INCREMENT(xtStat.split);
1505
1506        /*
1507         *      allocate a single (right) child page
1508         */
1509        pxdlist = split->pxdlist;
1510        pxd = &pxdlist->pxd[pxdlist->npxd];
1511        pxdlist->npxd++;
1512        rbn = addressPXD(pxd);
1513        rmp = get_metapage(ip, rbn, PSIZE, 1);
1514        if (rmp == NULL)
1515                return -EIO;
1516
1517        /* Allocate blocks to quota. */
1518        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1519                release_metapage(rmp);
1520                return -EDQUOT;
1521        }
1522
1523        jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1524
1525        /*
1526         * acquire a transaction lock on the new right page;
1527         *
1528         * action: new page;
1529         */
1530        BT_MARK_DIRTY(rmp, ip);
1531
1532        rp = (xtpage_t *) rmp->data;
1533        rp->header.flag =
1534            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1535        rp->header.self = *pxd;
1536        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1537        rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1538
1539        /* initialize sibling pointers */
1540        rp->header.next = 0;
1541        rp->header.prev = 0;
1542
1543        /*
1544         * copy the in-line root page into new right page extent
1545         */
1546        nextindex = le16_to_cpu(sp->header.maxentry);
1547        memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1548                (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1549
1550        /*
1551         * insert the new entry into the new right/child page
1552         * (skip index in the new right page will not change)
1553         */
1554        skip = split->index;
1555        /* if insert into middle, shift right remaining entries */
1556        if (skip != nextindex)
1557                memmove(&rp->xad[skip + 1], &rp->xad[skip],
1558                        (nextindex - skip) * sizeof(xad_t));
1559
1560        xad = &rp->xad[skip];
1561        XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1562
1563        /* update page header */
1564        rp->header.nextindex = cpu_to_le16(nextindex + 1);
1565
1566        if (!test_cflag(COMMIT_Nolink, ip)) {
1567                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1568                xtlck = (struct xtlock *) & tlck->lock;
1569                xtlck->lwm.offset = XTENTRYSTART;
1570                xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1571                    XTENTRYSTART;
1572        }
1573
1574        /*
1575         *      reset the root
1576         *
1577         * init root with the single entry for the new right page
1578         * set the 1st entry offset to 0, which force the left-most key
1579         * at any level of the tree to be less than any search key.
1580         */
1581        /*
1582         * acquire a transaction lock on the root page (in-memory inode);
1583         *
1584         * action: root split;
1585         */
1586        BT_MARK_DIRTY(split->mp, ip);
1587
1588        xad = &sp->xad[XTENTRYSTART];
1589        XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1590
1591        /* update page header of root */
1592        sp->header.flag &= ~BT_LEAF;
1593        sp->header.flag |= BT_INTERNAL;
1594
1595        sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1596
1597        if (!test_cflag(COMMIT_Nolink, ip)) {
1598                tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1599                xtlck = (struct xtlock *) & tlck->lock;
1600                xtlck->lwm.offset = XTENTRYSTART;
1601                xtlck->lwm.length = 1;
1602        }
1603
1604        *rmpp = rmp;
1605
1606        jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1607        return 0;
1608}
1609
1610
1611/*
1612 *      xtExtend()
1613 *
1614 * function: extend in-place;
1615 *
1616 * note: existing extent may or may not have been committed.
1617 * caller is responsible for pager buffer cache update, and
1618 * working block allocation map update;
1619 * update pmap: alloc whole extended extent;
1620 */
1621int xtExtend(tid_t tid,         /* transaction id */
1622             struct inode *ip, s64 xoff,        /* delta extent offset */
1623             s32 xlen,          /* delta extent length */
1624             int flag)
1625{
1626        int rc = 0;
1627        int cmp;
1628        struct metapage *mp;    /* meta-page buffer */
1629        xtpage_t *p;            /* base B+-tree index page */
1630        s64 bn;
1631        int index, nextindex, len;
1632        struct btstack btstack; /* traverse stack */
1633        struct xtsplit split;   /* split information */
1634        xad_t *xad;
1635        s64 xaddr;
1636        struct tlock *tlck;
1637        struct xtlock *xtlck = NULL;
1638
1639        jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1640
1641        /* there must exist extent to be extended */
1642        if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1643                return rc;
1644
1645        /* retrieve search result */
1646        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1647
1648        if (cmp != 0) {
1649                XT_PUTPAGE(mp);
1650                jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
1651                return -EIO;
1652        }
1653
1654        /* extension must be contiguous */
1655        xad = &p->xad[index];
1656        if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1657                XT_PUTPAGE(mp);
1658                jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
1659                return -EIO;
1660        }
1661
1662        /*
1663         * acquire a transaction lock on the leaf page;
1664         *
1665         * action: xad insertion/extension;
1666         */
1667        BT_MARK_DIRTY(mp, ip);
1668        if (!test_cflag(COMMIT_Nolink, ip)) {
1669                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1670                xtlck = (struct xtlock *) & tlck->lock;
1671        }
1672
1673        /* extend will overflow extent ? */
1674        xlen = lengthXAD(xad) + xlen;
1675        if ((len = xlen - MAXXLEN) <= 0)
1676                goto extendOld;
1677
1678        /*
1679         *      extent overflow: insert entry for new extent
1680         */
1681//insertNew:
1682        xoff = offsetXAD(xad) + MAXXLEN;
1683        xaddr = addressXAD(xad) + MAXXLEN;
1684        nextindex = le16_to_cpu(p->header.nextindex);
1685
1686        /*
1687         *      if the leaf page is full, insert the new entry and
1688         *      propagate up the router entry for the new page from split
1689         *
1690         * The xtSplitUp() will insert the entry and unpin the leaf page.
1691         */
1692        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1693                /* xtSpliUp() unpins leaf pages */
1694                split.mp = mp;
1695                split.index = index + 1;
1696                split.flag = XAD_NEW;
1697                split.off = xoff;       /* split offset */
1698                split.len = len;
1699                split.addr = xaddr;
1700                split.pxdlist = NULL;
1701                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1702                        return rc;
1703
1704                /* get back old page */
1705                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1706                if (rc)
1707                        return rc;
1708                /*
1709                 * if leaf root has been split, original root has been
1710                 * copied to new child page, i.e., original entry now
1711                 * resides on the new child page;
1712                 */
1713                if (p->header.flag & BT_INTERNAL) {
1714                        ASSERT(p->header.nextindex ==
1715                               cpu_to_le16(XTENTRYSTART + 1));
1716                        xad = &p->xad[XTENTRYSTART];
1717                        bn = addressXAD(xad);
1718                        XT_PUTPAGE(mp);
1719
1720                        /* get new child page */
1721                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1722                        if (rc)
1723                                return rc;
1724
1725                        BT_MARK_DIRTY(mp, ip);
1726                        if (!test_cflag(COMMIT_Nolink, ip)) {
1727                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1728                                xtlck = (struct xtlock *) & tlck->lock;
1729                        }
1730                }
1731        }
1732        /*
1733         *      insert the new entry into the leaf page
1734         */
1735        else {
1736                /* insert the new entry: mark the entry NEW */
1737                xad = &p->xad[index + 1];
1738                XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1739
1740                /* advance next available entry index */
1741                p->header.nextindex =
1742                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1743        }
1744
1745        /* get back old entry */
1746        xad = &p->xad[index];
1747        xlen = MAXXLEN;
1748
1749        /*
1750         * extend old extent
1751         */
1752      extendOld:
1753        XADlength(xad, xlen);
1754        if (!(xad->flag & XAD_NEW))
1755                xad->flag |= XAD_EXTENDED;
1756
1757        if (!test_cflag(COMMIT_Nolink, ip)) {
1758                xtlck->lwm.offset =
1759                    (xtlck->lwm.offset) ? min(index,
1760                                              (int)xtlck->lwm.offset) : index;
1761                xtlck->lwm.length =
1762                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1763        }
1764
1765        /* unpin the leaf page */
1766        XT_PUTPAGE(mp);
1767
1768        return rc;
1769}
1770
1771#ifdef _NOTYET
1772/*
1773 *      xtTailgate()
1774 *
1775 * function: split existing 'tail' extent
1776 *      (split offset >= start offset of tail extent), and
1777 *      relocate and extend the split tail half;
1778 *
1779 * note: existing extent may or may not have been committed.
1780 * caller is responsible for pager buffer cache update, and
1781 * working block allocation map update;
1782 * update pmap: free old split tail extent, alloc new extent;
1783 */
1784int xtTailgate(tid_t tid,               /* transaction id */
1785               struct inode *ip, s64 xoff,      /* split/new extent offset */
1786               s32 xlen,        /* new extent length */
1787               s64 xaddr,       /* new extent address */
1788               int flag)
1789{
1790        int rc = 0;
1791        int cmp;
1792        struct metapage *mp;    /* meta-page buffer */
1793        xtpage_t *p;            /* base B+-tree index page */
1794        s64 bn;
1795        int index, nextindex, llen, rlen;
1796        struct btstack btstack; /* traverse stack */
1797        struct xtsplit split;   /* split information */
1798        xad_t *xad;
1799        struct tlock *tlck;
1800        struct xtlock *xtlck = 0;
1801        struct tlock *mtlck;
1802        struct maplock *pxdlock;
1803
1804/*
1805printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1806        (ulong)xoff, xlen, (ulong)xaddr);
1807*/
1808
1809        /* there must exist extent to be tailgated */
1810        if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1811                return rc;
1812
1813        /* retrieve search result */
1814        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1815
1816        if (cmp != 0) {
1817                XT_PUTPAGE(mp);
1818                jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
1819                return -EIO;
1820        }
1821
1822        /* entry found must be last entry */
1823        nextindex = le16_to_cpu(p->header.nextindex);
1824        if (index != nextindex - 1) {
1825                XT_PUTPAGE(mp);
1826                jfs_error(ip->i_sb,
1827                          "xtTailgate: the entry found is not the last entry");
1828                return -EIO;
1829        }
1830
1831        BT_MARK_DIRTY(mp, ip);
1832        /*
1833         * acquire tlock of the leaf page containing original entry
1834         */
1835        if (!test_cflag(COMMIT_Nolink, ip)) {
1836                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1837                xtlck = (struct xtlock *) & tlck->lock;
1838        }
1839
1840        /* completely replace extent ? */
1841        xad = &p->xad[index];
1842/*
1843printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1844        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1845*/
1846        if ((llen = xoff - offsetXAD(xad)) == 0)
1847                goto updateOld;
1848
1849        /*
1850         *      partially replace extent: insert entry for new extent
1851         */
1852//insertNew:
1853        /*
1854         *      if the leaf page is full, insert the new entry and
1855         *      propagate up the router entry for the new page from split
1856         *
1857         * The xtSplitUp() will insert the entry and unpin the leaf page.
1858         */
1859        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1860                /* xtSpliUp() unpins leaf pages */
1861                split.mp = mp;
1862                split.index = index + 1;
1863                split.flag = XAD_NEW;
1864                split.off = xoff;       /* split offset */
1865                split.len = xlen;
1866                split.addr = xaddr;
1867                split.pxdlist = NULL;
1868                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1869                        return rc;
1870
1871                /* get back old page */
1872                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1873                if (rc)
1874                        return rc;
1875                /*
1876                 * if leaf root has been split, original root has been
1877                 * copied to new child page, i.e., original entry now
1878                 * resides on the new child page;
1879                 */
1880                if (p->header.flag & BT_INTERNAL) {
1881                        ASSERT(p->header.nextindex ==
1882                               cpu_to_le16(XTENTRYSTART + 1));
1883                        xad = &p->xad[XTENTRYSTART];
1884                        bn = addressXAD(xad);
1885                        XT_PUTPAGE(mp);
1886
1887                        /* get new child page */
1888                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1889                        if (rc)
1890                                return rc;
1891
1892                        BT_MARK_DIRTY(mp, ip);
1893                        if (!test_cflag(COMMIT_Nolink, ip)) {
1894                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1895                                xtlck = (struct xtlock *) & tlck->lock;
1896                        }
1897                }
1898        }
1899        /*
1900         *      insert the new entry into the leaf page
1901         */
1902        else {
1903                /* insert the new entry: mark the entry NEW */
1904                xad = &p->xad[index + 1];
1905                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1906
1907                /* advance next available entry index */
1908                p->header.nextindex =
1909                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1910        }
1911
1912        /* get back old XAD */
1913        xad = &p->xad[index];
1914
1915        /*
1916         * truncate/relocate old extent at split offset
1917         */
1918      updateOld:
1919        /* update dmap for old/committed/truncated extent */
1920        rlen = lengthXAD(xad) - llen;
1921        if (!(xad->flag & XAD_NEW)) {
1922                /* free from PWMAP at commit */
1923                if (!test_cflag(COMMIT_Nolink, ip)) {
1924                        mtlck = txMaplock(tid, ip, tlckMAP);
1925                        pxdlock = (struct maplock *) & mtlck->lock;
1926                        pxdlock->flag = mlckFREEPXD;
1927                        PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1928                        PXDlength(&pxdlock->pxd, rlen);
1929                        pxdlock->index = 1;
1930                }
1931        } else
1932                /* free from WMAP */
1933                dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1934
1935        if (llen)
1936                /* truncate */
1937                XADlength(xad, llen);
1938        else
1939                /* replace */
1940                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1941
1942        if (!test_cflag(COMMIT_Nolink, ip)) {
1943                xtlck->lwm.offset = (xtlck->lwm.offset) ?
1944                    min(index, (int)xtlck->lwm.offset) : index;
1945                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1946                    xtlck->lwm.offset;
1947        }
1948
1949        /* unpin the leaf page */
1950        XT_PUTPAGE(mp);
1951
1952        return rc;
1953}
1954#endif /* _NOTYET */
1955
1956/*
1957 *      xtUpdate()
1958 *
1959 * function: update XAD;
1960 *
1961 *      update extent for allocated_but_not_recorded or
1962 *      compressed extent;
1963 *
1964 * parameter:
1965 *      nxad    - new XAD;
1966 *              logical extent of the specified XAD must be completely
1967 *              contained by an existing XAD;
1968 */
1969int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1970{                               /* new XAD */
1971        int rc = 0;
1972        int cmp;
1973        struct metapage *mp;    /* meta-page buffer */
1974        xtpage_t *p;            /* base B+-tree index page */
1975        s64 bn;
1976        int index0, index, newindex, nextindex;
1977        struct btstack btstack; /* traverse stack */
1978        struct xtsplit split;   /* split information */
1979        xad_t *xad, *lxad, *rxad;
1980        int xflag;
1981        s64 nxoff, xoff;
1982        int nxlen, xlen, lxlen, rxlen;
1983        s64 nxaddr, xaddr;
1984        struct tlock *tlck;
1985        struct xtlock *xtlck = NULL;
1986        int newpage = 0;
1987
1988        /* there must exist extent to be tailgated */
1989        nxoff = offsetXAD(nxad);
1990        nxlen = lengthXAD(nxad);
1991        nxaddr = addressXAD(nxad);
1992
1993        if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1994                return rc;
1995
1996        /* retrieve search result */
1997        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1998
1999        if (cmp != 0) {
2000                XT_PUTPAGE(mp);
2001                jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
2002                return -EIO;
2003        }
2004
2005        BT_MARK_DIRTY(mp, ip);
2006        /*
2007         * acquire tlock of the leaf page containing original entry
2008         */
2009        if (!test_cflag(COMMIT_Nolink, ip)) {
2010                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2011                xtlck = (struct xtlock *) & tlck->lock;
2012        }
2013
2014        xad = &p->xad[index0];
2015        xflag = xad->flag;
2016        xoff = offsetXAD(xad);
2017        xlen = lengthXAD(xad);
2018        xaddr = addressXAD(xad);
2019
2020        /* nXAD must be completely contained within XAD */
2021        if ((xoff > nxoff) ||
2022            (nxoff + nxlen > xoff + xlen)) {
2023                XT_PUTPAGE(mp);
2024                jfs_error(ip->i_sb,
2025                          "xtUpdate: nXAD in not completely contained within XAD");
2026                return -EIO;
2027        }
2028
2029        index = index0;
2030        newindex = index + 1;
2031        nextindex = le16_to_cpu(p->header.nextindex);
2032
2033#ifdef  _JFS_WIP_NOCOALESCE
2034        if (xoff < nxoff)
2035                goto updateRight;
2036
2037        /*
2038         * replace XAD with nXAD
2039         */
2040      replace:                  /* (nxoff == xoff) */
2041        if (nxlen == xlen) {
2042                /* replace XAD with nXAD:recorded */
2043                *xad = *nxad;
2044                xad->flag = xflag & ~XAD_NOTRECORDED;
2045
2046                goto out;
2047        } else                  /* (nxlen < xlen) */
2048                goto updateLeft;
2049#endif                          /* _JFS_WIP_NOCOALESCE */
2050
2051/* #ifdef _JFS_WIP_COALESCE */
2052        if (xoff < nxoff)
2053                goto coalesceRight;
2054
2055        /*
2056         * coalesce with left XAD
2057         */
2058//coalesceLeft: /* (xoff == nxoff) */
2059        /* is XAD first entry of page ? */
2060        if (index == XTENTRYSTART)
2061                goto replace;
2062
2063        /* is nXAD logically and physically contiguous with lXAD ? */
2064        lxad = &p->xad[index - 1];
2065        lxlen = lengthXAD(lxad);
2066        if (!(lxad->flag & XAD_NOTRECORDED) &&
2067            (nxoff == offsetXAD(lxad) + lxlen) &&
2068            (nxaddr == addressXAD(lxad) + lxlen) &&
2069            (lxlen + nxlen < MAXXLEN)) {
2070                /* extend right lXAD */
2071                index0 = index - 1;
2072                XADlength(lxad, lxlen + nxlen);
2073
2074                /* If we just merged two extents together, need to make sure the
2075                 * right extent gets logged.  If the left one is marked XAD_NEW,
2076                 * then we know it will be logged.  Otherwise, mark as
2077                 * XAD_EXTENDED
2078                 */
2079                if (!(lxad->flag & XAD_NEW))
2080                        lxad->flag |= XAD_EXTENDED;
2081
2082                if (xlen > nxlen) {
2083                        /* truncate XAD */
2084                        XADoffset(xad, xoff + nxlen);
2085                        XADlength(xad, xlen - nxlen);
2086                        XADaddress(xad, xaddr + nxlen);
2087                        goto out;
2088                } else {        /* (xlen == nxlen) */
2089
2090                        /* remove XAD */
2091                        if (index < nextindex - 1)
2092                                memmove(&p->xad[index], &p->xad[index + 1],
2093                                        (nextindex - index -
2094                                         1) << L2XTSLOTSIZE);
2095
2096                        p->header.nextindex =
2097                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2098                                        1);
2099
2100                        index = index0;
2101                        newindex = index + 1;
2102                        nextindex = le16_to_cpu(p->header.nextindex);
2103                        xoff = nxoff = offsetXAD(lxad);
2104                        xlen = nxlen = lxlen + nxlen;
2105                        xaddr = nxaddr = addressXAD(lxad);
2106                        goto coalesceRight;
2107                }
2108        }
2109
2110        /*
2111         * replace XAD with nXAD
2112         */
2113      replace:                  /* (nxoff == xoff) */
2114        if (nxlen == xlen) {
2115                /* replace XAD with nXAD:recorded */
2116                *xad = *nxad;
2117                xad->flag = xflag & ~XAD_NOTRECORDED;
2118
2119                goto coalesceRight;
2120        } else                  /* (nxlen < xlen) */
2121                goto updateLeft;
2122
2123        /*
2124         * coalesce with right XAD
2125         */
2126      coalesceRight:            /* (xoff <= nxoff) */
2127        /* is XAD last entry of page ? */
2128        if (newindex == nextindex) {
2129                if (xoff == nxoff)
2130                        goto out;
2131                goto updateRight;
2132        }
2133
2134        /* is nXAD logically and physically contiguous with rXAD ? */
2135        rxad = &p->xad[index + 1];
2136        rxlen = lengthXAD(rxad);
2137        if (!(rxad->flag & XAD_NOTRECORDED) &&
2138            (nxoff + nxlen == offsetXAD(rxad)) &&
2139            (nxaddr + nxlen == addressXAD(rxad)) &&
2140            (rxlen + nxlen < MAXXLEN)) {
2141                /* extend left rXAD */
2142                XADoffset(rxad, nxoff);
2143                XADlength(rxad, rxlen + nxlen);
2144                XADaddress(rxad, nxaddr);
2145
2146                /* If we just merged two extents together, need to make sure
2147                 * the left extent gets logged.  If the right one is marked
2148                 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
2149                 * XAD_EXTENDED
2150                 */
2151                if (!(rxad->flag & XAD_NEW))
2152                        rxad->flag |= XAD_EXTENDED;
2153
2154                if (xlen > nxlen)
2155                        /* truncate XAD */
2156                        XADlength(xad, xlen - nxlen);
2157                else {          /* (xlen == nxlen) */
2158
2159                        /* remove XAD */
2160                        memmove(&p->xad[index], &p->xad[index + 1],
2161                                (nextindex - index - 1) << L2XTSLOTSIZE);
2162
2163                        p->header.nextindex =
2164                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2165                                        1);
2166                }
2167
2168                goto out;
2169        } else if (xoff == nxoff)
2170                goto out;
2171
2172        if (xoff >= nxoff) {
2173                XT_PUTPAGE(mp);
2174                jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
2175                return -EIO;
2176        }
2177/* #endif _JFS_WIP_COALESCE */
2178
2179        /*
2180         * split XAD into (lXAD, nXAD):
2181         *
2182         *          |---nXAD--->
2183         * --|----------XAD----------|--
2184         *   |-lXAD-|
2185         */
2186      updateRight:              /* (xoff < nxoff) */
2187        /* truncate old XAD as lXAD:not_recorded */
2188        xad = &p->xad[index];
2189        XADlength(xad, nxoff - xoff);
2190
2191        /* insert nXAD:recorded */
2192        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2193
2194                /* xtSpliUp() unpins leaf pages */
2195                split.mp = mp;
2196                split.index = newindex;
2197                split.flag = xflag & ~XAD_NOTRECORDED;
2198                split.off = nxoff;
2199                split.len = nxlen;
2200                split.addr = nxaddr;
2201                split.pxdlist = NULL;
2202                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2203                        return rc;
2204
2205                /* get back old page */
2206                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2207                if (rc)
2208                        return rc;
2209                /*
2210                 * if leaf root has been split, original root has been
2211                 * copied to new child page, i.e., original entry now
2212                 * resides on the new child page;
2213                 */
2214                if (p->header.flag & BT_INTERNAL) {
2215                        ASSERT(p->header.nextindex ==
2216                               cpu_to_le16(XTENTRYSTART + 1));
2217                        xad = &p->xad[XTENTRYSTART];
2218                        bn = addressXAD(xad);
2219                        XT_PUTPAGE(mp);
2220
2221                        /* get new child page */
2222                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2223                        if (rc)
2224                                return rc;
2225
2226                        BT_MARK_DIRTY(mp, ip);
2227                        if (!test_cflag(COMMIT_Nolink, ip)) {
2228                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2229                                xtlck = (struct xtlock *) & tlck->lock;
2230                        }
2231                } else {
2232                        /* is nXAD on new page ? */
2233                        if (newindex >
2234                            (le16_to_cpu(p->header.maxentry) >> 1)) {
2235                                newindex =
2236                                    newindex -
2237                                    le16_to_cpu(p->header.nextindex) +
2238                                    XTENTRYSTART;
2239                                newpage = 1;
2240                        }
2241                }
2242        } else {
2243                /* if insert into middle, shift right remaining entries */
2244                if (newindex < nextindex)
2245                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
2246                                (nextindex - newindex) << L2XTSLOTSIZE);
2247
2248                /* insert the entry */
2249                xad = &p->xad[newindex];
2250                *xad = *nxad;
2251                xad->flag = xflag & ~XAD_NOTRECORDED;
2252
2253                /* advance next available entry index. */
2254                p->header.nextindex =
2255                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2256        }
2257
2258        /*
2259         * does nXAD force 3-way split ?
2260         *
2261         *          |---nXAD--->|
2262         * --|----------XAD-------------|--
2263         *   |-lXAD-|           |-rXAD -|
2264         */
2265        if (nxoff + nxlen == xoff + xlen)
2266                goto out;
2267
2268        /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
2269        if (newpage) {
2270                /* close out old page */
2271                if (!test_cflag(COMMIT_Nolink, ip)) {
2272                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
2273                            min(index0, (int)xtlck->lwm.offset) : index0;
2274                        xtlck->lwm.length =
2275                            le16_to_cpu(p->header.nextindex) -
2276                            xtlck->lwm.offset;
2277                }
2278
2279                bn = le64_to_cpu(p->header.next);
2280                XT_PUTPAGE(mp);
2281
2282                /* get new right page */
2283                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2284                if (rc)
2285                        return rc;
2286
2287                BT_MARK_DIRTY(mp, ip);
2288                if (!test_cflag(COMMIT_Nolink, ip)) {
2289                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2290                        xtlck = (struct xtlock *) & tlck->lock;
2291                }
2292
2293                index0 = index = newindex;
2294        } else
2295                index++;
2296
2297        newindex = index + 1;
2298        nextindex = le16_to_cpu(p->header.nextindex);
2299        xlen = xlen - (nxoff - xoff);
2300        xoff = nxoff;
2301        xaddr = nxaddr;
2302
2303        /* recompute split pages */
2304        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2305                XT_PUTPAGE(mp);
2306
2307                if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
2308                        return rc;
2309
2310                /* retrieve search result */
2311                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2312
2313                if (cmp != 0) {
2314                        XT_PUTPAGE(mp);
2315                        jfs_error(ip->i_sb, "xtUpdate: xtSearch failed");
2316                        return -EIO;
2317                }
2318
2319                if (index0 != index) {
2320                        XT_PUTPAGE(mp);
2321                        jfs_error(ip->i_sb,
2322                                  "xtUpdate: unexpected value of index");
2323                        return -EIO;
2324                }
2325        }
2326
2327        /*
2328         * split XAD into (nXAD, rXAD)
2329         *
2330         *          ---nXAD---|
2331         * --|----------XAD----------|--
2332         *                    |-rXAD-|
2333         */
2334      updateLeft:               /* (nxoff == xoff) && (nxlen < xlen) */
2335        /* update old XAD with nXAD:recorded */
2336        xad = &p->xad[index];
2337        *xad = *nxad;
2338        xad->flag = xflag & ~XAD_NOTRECORDED;
2339
2340        /* insert rXAD:not_recorded */
2341        xoff = xoff + nxlen;
2342        xlen = xlen - nxlen;
2343        xaddr = xaddr + nxlen;
2344        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2345/*
2346printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2347*/
2348                /* xtSpliUp() unpins leaf pages */
2349                split.mp = mp;
2350                split.index = newindex;
2351                split.flag = xflag;
2352                split.off = xoff;
2353                split.len = xlen;
2354                split.addr = xaddr;
2355                split.pxdlist = NULL;
2356                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2357                        return rc;
2358
2359                /* get back old page */
2360                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2361                if (rc)
2362                        return rc;
2363
2364                /*
2365                 * if leaf root has been split, original root has been
2366                 * copied to new child page, i.e., original entry now
2367                 * resides on the new child page;
2368                 */
2369                if (p->header.flag & BT_INTERNAL) {
2370                        ASSERT(p->header.nextindex ==
2371                               cpu_to_le16(XTENTRYSTART + 1));
2372                        xad = &p->xad[XTENTRYSTART];
2373                        bn = addressXAD(xad);
2374                        XT_PUTPAGE(mp);
2375
2376                        /* get new child page */
2377                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2378                        if (rc)
2379                                return rc;
2380
2381                        BT_MARK_DIRTY(mp, ip);
2382                        if (!test_cflag(COMMIT_Nolink, ip)) {
2383                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2384                                xtlck = (struct xtlock *) & tlck->lock;
2385                        }
2386                }
2387        } else {
2388                /* if insert into middle, shift right remaining entries */
2389                if (newindex < nextindex)
2390                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
2391                                (nextindex - newindex) << L2XTSLOTSIZE);
2392
2393                /* insert the entry */
2394                xad = &p->xad[newindex];
2395                XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2396
2397                /* advance next available entry index. */
2398                p->header.nextindex =
2399                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2400        }
2401
2402      out:
2403        if (!test_cflag(COMMIT_Nolink, ip)) {
2404                xtlck->lwm.offset = (xtlck->lwm.offset) ?
2405                    min(index0, (int)xtlck->lwm.offset) : index0;
2406                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2407                    xtlck->lwm.offset;
2408        }
2409
2410        /* unpin the leaf page */
2411        XT_PUTPAGE(mp);
2412
2413        return rc;
2414}
2415
2416
2417/*
2418 *      xtAppend()
2419 *
2420 * function: grow in append mode from contiguous region specified ;
2421 *
2422 * parameter:
2423 *      tid             - transaction id;
2424 *      ip              - file object;
2425 *      xflag           - extent flag:
2426 *      xoff            - extent offset;
2427 *      maxblocks       - max extent length;
2428 *      xlen            - extent length (in/out);
2429 *      xaddrp          - extent address pointer (in/out):
2430 *      flag            -
2431 *
2432 * return:
2433 */
2434int xtAppend(tid_t tid,         /* transaction id */
2435             struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2436             s32 * xlenp,       /* (in/out) */
2437             s64 * xaddrp,      /* (in/out) */
2438             int flag)
2439{
2440        int rc = 0;
2441        struct metapage *mp;    /* meta-page buffer */
2442        xtpage_t *p;            /* base B+-tree index page */
2443        s64 bn, xaddr;
2444        int index, nextindex;
2445        struct btstack btstack; /* traverse stack */
2446        struct xtsplit split;   /* split information */
2447        xad_t *xad;
2448        int cmp;
2449        struct tlock *tlck;
2450        struct xtlock *xtlck;
2451        int nsplit, nblocks, xlen;
2452        struct pxdlist pxdlist;
2453        pxd_t *pxd;
2454        s64 next;
2455
2456        xaddr = *xaddrp;
2457        xlen = *xlenp;
2458        jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2459                 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2460
2461        /*
2462         *      search for the entry location at which to insert:
2463         *
2464         * xtFastSearch() and xtSearch() both returns (leaf page
2465         * pinned, index at which to insert).
2466         * n.b. xtSearch() may return index of maxentry of
2467         * the full page.
2468         */
2469        if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
2470                return rc;
2471
2472        /* retrieve search result */
2473        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2474
2475        if (cmp == 0) {
2476                rc = -EEXIST;
2477                goto out;
2478        }
2479
2480        if (next)
2481                xlen = min(xlen, (int)(next - xoff));
2482//insert:
2483        /*
2484         *      insert entry for new extent
2485         */
2486        xflag |= XAD_NEW;
2487
2488        /*
2489         *      if the leaf page is full, split the page and
2490         *      propagate up the router entry for the new page from split
2491         *
2492         * The xtSplitUp() will insert the entry and unpin the leaf page.
2493         */
2494        nextindex = le16_to_cpu(p->header.nextindex);
2495        if (nextindex < le16_to_cpu(p->header.maxentry))
2496                goto insertLeaf;
2497
2498        /*
2499         * allocate new index blocks to cover index page split(s)
2500         */
2501        nsplit = btstack.nsplit;
2502        split.pxdlist = &pxdlist;
2503        pxdlist.maxnpxd = pxdlist.npxd = 0;
2504        pxd = &pxdlist.pxd[0];
2505        nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2506        for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2507                if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2508                        PXDaddress(pxd, xaddr);
2509                        PXDlength(pxd, nblocks);
2510
2511                        pxdlist.maxnpxd++;
2512
2513                        continue;
2514                }
2515
2516                /* undo allocation */
2517
2518                goto out;
2519        }
2520
2521        xlen = min(xlen, maxblocks);
2522
2523        /*
2524         * allocate data extent requested
2525         */
2526        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2527                goto out;
2528
2529        split.mp = mp;
2530        split.index = index;
2531        split.flag = xflag;
2532        split.off = xoff;
2533        split.len = xlen;
2534        split.addr = xaddr;
2535        if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2536                /* undo data extent allocation */
2537                dbFree(ip, *xaddrp, (s64) * xlenp);
2538
2539                return rc;
2540        }
2541
2542        *xaddrp = xaddr;
2543        *xlenp = xlen;
2544        return 0;
2545
2546        /*
2547         *      insert the new entry into the leaf page
2548         */
2549      insertLeaf:
2550        /*
2551         * allocate data extent requested
2552         */
2553        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2554                goto out;
2555
2556        BT_MARK_DIRTY(mp, ip);
2557        /*
2558         * acquire a transaction lock on the leaf page;
2559         *
2560         * action: xad insertion/extension;
2561         */
2562        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2563        xtlck = (struct xtlock *) & tlck->lock;
2564
2565        /* insert the new entry: mark the entry NEW */
2566        xad = &p->xad[index];
2567        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2568
2569        /* advance next available entry index */
2570        p->header.nextindex =
2571            cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2572
2573        xtlck->lwm.offset =
2574            (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2575        xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2576            xtlck->lwm.offset;
2577
2578        *xaddrp = xaddr;
2579        *xlenp = xlen;
2580
2581      out:
2582        /* unpin the leaf page */
2583        XT_PUTPAGE(mp);
2584
2585        return rc;
2586}
2587#ifdef _STILL_TO_PORT
2588
2589/* - TBD for defragmentaion/reorganization -
2590 *
2591 *      xtDelete()
2592 *
2593 * function:
2594 *      delete the entry with the specified key.
2595 *
2596 *      N.B.: whole extent of the entry is assumed to be deleted.
2597 *
2598 * parameter:
2599 *
2600 * return:
2601 *      ENOENT: if the entry is not found.
2602 *
2603 * exception:
2604 */
2605int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2606{
2607        int rc = 0;
2608        struct btstack btstack;
2609        int cmp;
2610        s64 bn;
2611        struct metapage *mp;
2612        xtpage_t *p;
2613        int index, nextindex;
2614        struct tlock *tlck;
2615        struct xtlock *xtlck;
2616
2617        /*
2618         * find the matching entry; xtSearch() pins the page
2619         */
2620        if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2621                return rc;
2622
2623        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2624        if (cmp) {
2625                /* unpin the leaf page */
2626                XT_PUTPAGE(mp);
2627                return -ENOENT;
2628        }
2629
2630        /*
2631         * delete the entry from the leaf page
2632         */
2633        nextindex = le16_to_cpu(p->header.nextindex);
2634        p->header.nextindex =
2635            cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
2636
2637        /*
2638         * if the leaf page bocome empty, free the page
2639         */
2640        if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2641                return (xtDeleteUp(tid, ip, mp, p, &btstack));
2642
2643        BT_MARK_DIRTY(mp, ip);
2644        /*
2645         * acquire a transaction lock on the leaf page;
2646         *
2647         * action:xad deletion;
2648         */
2649        tlck = txLock(tid, ip, mp, tlckXTREE);
2650        xtlck = (struct xtlock *) & tlck->lock;
2651        xtlck->lwm.offset =
2652            (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2653
2654        /* if delete from middle, shift left/compact the remaining entries */
2655        if (index < nextindex - 1)
2656                memmove(&p->xad[index], &p->xad[index + 1],
2657                        (nextindex - index - 1) * sizeof(xad_t));
2658
2659        XT_PUTPAGE(mp);
2660
2661        return 0;
2662}
2663
2664
2665/* - TBD for defragmentaion/reorganization -
2666 *
2667 *      xtDeleteUp()
2668 *
2669 * function:
2670 *      free empty pages as propagating deletion up the tree
2671 *
2672 * parameter:
2673 *
2674 * return:
2675 */
2676static int
2677xtDeleteUp(tid_t tid, struct inode *ip,
2678           struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2679{
2680        int rc = 0;
2681        struct metapage *mp;
2682        xtpage_t *p;
2683        int index, nextindex;
2684        s64 xaddr;
2685        int xlen;
2686        struct btframe *parent;
2687        struct tlock *tlck;
2688        struct xtlock *xtlck;
2689
2690        /*
2691         * keep root leaf page which has become empty
2692         */
2693        if (fp->header.flag & BT_ROOT) {
2694                /* keep the root page */
2695                fp->header.flag &= ~BT_INTERNAL;
2696                fp->header.flag |= BT_LEAF;
2697                fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2698
2699                /* XT_PUTPAGE(fmp); */
2700
2701                return 0;
2702        }
2703
2704        /*
2705         * free non-root leaf page
2706         */
2707        if ((rc = xtRelink(tid, ip, fp))) {
2708                XT_PUTPAGE(fmp);
2709                return rc;
2710        }
2711
2712        xaddr = addressPXD(&fp->header.self);
2713        xlen = lengthPXD(&fp->header.self);
2714        /* free the page extent */
2715        dbFree(ip, xaddr, (s64) xlen);
2716
2717        /* free the buffer page */
2718        discard_metapage(fmp);
2719
2720        /*
2721         * propagate page deletion up the index tree
2722         *
2723         * If the delete from the parent page makes it empty,
2724         * continue all the way up the tree.
2725         * stop if the root page is reached (which is never deleted) or
2726         * if the entry deletion does not empty the page.
2727         */
2728        while ((parent = BT_POP(btstack)) != NULL) {
2729                /* get/pin the parent page <sp> */
2730                XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2731                if (rc)
2732                        return rc;
2733
2734                index = parent->index;
2735
2736                /* delete the entry for the freed child page from parent.
2737                 */
2738                nextindex = le16_to_cpu(p->header.nextindex);
2739
2740                /*
2741                 * the parent has the single entry being deleted:
2742                 * free the parent page which has become empty.
2743                 */
2744                if (nextindex == 1) {
2745                        if (p->header.flag & BT_ROOT) {
2746                                /* keep the root page */
2747                                p->header.flag &= ~BT_INTERNAL;
2748                                p->header.flag |= BT_LEAF;
2749                                p->header.nextindex =
2750                                    cpu_to_le16(XTENTRYSTART);
2751
2752                                /* XT_PUTPAGE(mp); */
2753
2754                                break;
2755                        } else {
2756                                /* free the parent page */
2757                                if ((rc = xtRelink(tid, ip, p)))
2758                                        return rc;
2759
2760                                xaddr = addressPXD(&p->header.self);
2761                                /* free the page extent */
2762                                dbFree(ip, xaddr,
2763                                       (s64) JFS_SBI(ip->i_sb)->nbperpage);
2764
2765                                /* unpin/free the buffer page */
2766                                discard_metapage(mp);
2767
2768                                /* propagate up */
2769                                continue;
2770                        }
2771                }
2772                /*
2773                 * the parent has other entries remaining:
2774                 * delete the router entry from the parent page.
2775                 */
2776                else {
2777                        BT_MARK_DIRTY(mp, ip);
2778                        /*
2779                         * acquire a transaction lock on the leaf page;
2780                         *
2781                         * action:xad deletion;
2782                         */
2783                        tlck = txLock(tid, ip, mp, tlckXTREE);
2784                        xtlck = (struct xtlock *) & tlck->lock;
2785                        xtlck->lwm.offset =
2786                            (xtlck->lwm.offset) ? min(index,
2787                                                      xtlck->lwm.
2788                                                      offset) : index;
2789
2790                        /* if delete from middle,
2791                         * shift left/compact the remaining entries in the page
2792                         */
2793                        if (index < nextindex - 1)
2794                                memmove(&p->xad[index], &p->xad[index + 1],
2795                                        (nextindex - index -
2796                                         1) << L2XTSLOTSIZE);
2797
2798                        p->header.nextindex =
2799                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2800                                        1);
2801                        jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2802                                 (ulong) parent->bn, index);
2803                }
2804
2805                /* unpin the parent page */
2806                XT_PUTPAGE(mp);
2807
2808                /* exit propagation up */
2809                break;
2810        }
2811
2812        return 0;
2813}
2814
2815
2816/*
2817 * NAME:        xtRelocate()
2818 *
2819 * FUNCTION:    relocate xtpage or data extent of regular file;
2820 *              This function is mainly used by defragfs utility.
2821 *
2822 * NOTE:        This routine does not have the logic to handle
2823 *              uncommitted allocated extent. The caller should call
2824 *              txCommit() to commit all the allocation before call
2825 *              this routine.
2826 */
2827int
2828xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
2829           s64 nxaddr,          /* new xaddr */
2830           int xtype)
2831{                               /* extent type: XTPAGE or DATAEXT */
2832        int rc = 0;
2833        struct tblock *tblk;
2834        struct tlock *tlck;
2835        struct xtlock *xtlck;
2836        struct metapage *mp, *pmp, *lmp, *rmp;  /* meta-page buffer */
2837        xtpage_t *p, *pp, *rp, *lp;     /* base B+-tree index page */
2838        xad_t *xad;
2839        pxd_t *pxd;
2840        s64 xoff, xsize;
2841        int xlen;
2842        s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2843        cbuf_t *cp;
2844        s64 offset, nbytes, nbrd, pno;
2845        int nb, npages, nblks;
2846        s64 bn;
2847        int cmp;
2848        int index;
2849        struct pxd_lock *pxdlock;
2850        struct btstack btstack; /* traverse stack */
2851
2852        xtype = xtype & EXTENT_TYPE;
2853
2854        xoff = offsetXAD(oxad);
2855        oxaddr = addressXAD(oxad);
2856        xlen = lengthXAD(oxad);
2857
2858        /* validate extent offset */
2859        offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2860        if (offset >= ip->i_size)
2861                return -ESTALE; /* stale extent */
2862
2863        jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2864                 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2865
2866        /*
2867         *      1. get and validate the parent xtpage/xad entry
2868         *      covering the source extent to be relocated;
2869         */
2870        if (xtype == DATAEXT) {
2871                /* search in leaf entry */
2872                rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2873                if (rc)
2874                        return rc;
2875
2876                /* retrieve search result */
2877                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2878
2879                if (cmp) {
2880                        XT_PUTPAGE(pmp);
2881                        return -ESTALE;
2882                }
2883
2884                /* validate for exact match with a single entry */
2885                xad = &pp->xad[index];
2886                if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2887                        XT_PUTPAGE(pmp);
2888                        return -ESTALE;
2889                }
2890        } else {                /* (xtype == XTPAGE) */
2891
2892                /* search in internal entry */
2893                rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2894                if (rc)
2895                        return rc;
2896
2897                /* retrieve search result */
2898                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2899
2900                if (cmp) {
2901                        XT_PUTPAGE(pmp);
2902                        return -ESTALE;
2903                }
2904
2905                /* xtSearchNode() validated for exact match with a single entry
2906                 */
2907                xad = &pp->xad[index];
2908        }
2909        jfs_info("xtRelocate: parent xad entry validated.");
2910
2911        /*
2912         *      2. relocate the extent
2913         */
2914        if (xtype == DATAEXT) {
2915                /* if the extent is allocated-but-not-recorded
2916                 * there is no real data to be moved in this extent,
2917                 */
2918                if (xad->flag & XAD_NOTRECORDED)
2919                        goto out;
2920                else
2921                        /* release xtpage for cmRead()/xtLookup() */
2922                        XT_PUTPAGE(pmp);
2923
2924                /*
2925                 *      cmRelocate()
2926                 *
2927                 * copy target data pages to be relocated;
2928                 *
2929                 * data extent must start at page boundary and
2930                 * multiple of page size (except the last data extent);
2931                 * read in each page of the source data extent into cbuf,
2932                 * update the cbuf extent descriptor of the page to be
2933                 * homeward bound to new dst data extent
2934                 * copy the data from the old extent to new extent.
2935                 * copy is essential for compressed files to avoid problems
2936                 * that can arise if there was a change in compression
2937                 * algorithms.
2938                 * it is a good strategy because it may disrupt cache
2939                 * policy to keep the pages in memory afterwards.
2940                 */
2941                offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2942                assert((offset & CM_OFFSET) == 0);
2943                nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2944                pno = offset >> CM_L2BSIZE;
2945                npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2946/*
2947                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2948                          (offset >> CM_L2BSIZE) + 1;
2949*/
2950                sxaddr = oxaddr;
2951                dxaddr = nxaddr;
2952
2953                /* process the request one cache buffer at a time */
2954                for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2955                     offset += nb, pno++, npages--) {
2956                        /* compute page size */
2957                        nb = min(nbytes - nbrd, CM_BSIZE);
2958
2959                        /* get the cache buffer of the page */
2960                        if (rc = cmRead(ip, offset, npages, &cp))
2961                                break;
2962
2963                        assert(addressPXD(&cp->cm_pxd) == sxaddr);
2964                        assert(!cp->cm_modified);
2965
2966                        /* bind buffer with the new extent address */
2967                        nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2968                        cmSetXD(ip, cp, pno, dxaddr, nblks);
2969
2970                        /* release the cbuf, mark it as modified */
2971                        cmPut(cp, true);
2972
2973                        dxaddr += nblks;
2974                        sxaddr += nblks;
2975                }
2976
2977                /* get back parent page */
2978                if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2979                        return rc;
2980
2981                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2982                jfs_info("xtRelocate: target data extent relocated.");
2983        } else {                /* (xtype == XTPAGE) */
2984
2985                /*
2986                 * read in the target xtpage from the source extent;
2987                 */
2988                XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2989                if (rc) {
2990                        XT_PUTPAGE(pmp);
2991                        return rc;
2992                }
2993
2994                /*
2995                 * read in sibling pages if any to update sibling pointers;
2996                 */
2997                rmp = NULL;
2998                if (p->header.next) {
2999                        nextbn = le64_to_cpu(p->header.next);
3000                        XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
3001                        if (rc) {
3002                                XT_PUTPAGE(pmp);
3003                                XT_PUTPAGE(mp);
3004                                return (rc);
3005                        }
3006                }
3007
3008                lmp = NULL;
3009                if (p->header.prev) {
3010                        prevbn = le64_to_cpu(p->header.prev);
3011                        XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
3012                        if (rc) {
3013                                XT_PUTPAGE(pmp);
3014                                XT_PUTPAGE(mp);
3015                                if (rmp)
3016                                        XT_PUTPAGE(rmp);
3017                                return (rc);
3018                        }
3019                }
3020
3021                /* at this point, all xtpages to be updated are in memory */
3022
3023                /*
3024                 * update sibling pointers of sibling xtpages if any;
3025                 */
3026                if (lmp) {
3027                        BT_MARK_DIRTY(lmp, ip);
3028                        tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3029                        lp->header.next = cpu_to_le64(nxaddr);
3030                        XT_PUTPAGE(lmp);
3031                }
3032
3033                if (rmp) {
3034                        BT_MARK_DIRTY(rmp, ip);
3035                        tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3036                        rp->header.prev = cpu_to_le64(nxaddr);
3037                        XT_PUTPAGE(rmp);
3038                }
3039
3040                /*
3041                 * update the target xtpage to be relocated
3042                 *
3043                 * update the self address of the target page
3044                 * and write to destination extent;
3045                 * redo image covers the whole xtpage since it is new page
3046                 * to the destination extent;
3047                 * update of bmap for the free of source extent
3048                 * of the target xtpage itself:
3049                 * update of bmap for the allocation of destination extent
3050                 * of the target xtpage itself:
3051                 * update of bmap for the extents covered by xad entries in
3052                 * the target xtpage is not necessary since they are not
3053                 * updated;
3054                 * if not committed before this relocation,
3055                 * target page may contain XAD_NEW entries which must
3056                 * be scanned for bmap update (logredo() always
3057                 * scan xtpage REDOPAGE image for bmap update);
3058                 * if committed before this relocation (tlckRELOCATE),
3059                 * scan may be skipped by commit() and logredo();
3060                 */
3061                BT_MARK_DIRTY(mp, ip);
3062                /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
3063                tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
3064                xtlck = (struct xtlock *) & tlck->lock;
3065
3066                /* update the self address in the xtpage header */
3067                pxd = &p->header.self;
3068                PXDaddress(pxd, nxaddr);
3069
3070                /* linelock for the after image of the whole page */
3071                xtlck->lwm.length =
3072                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
3073
3074                /* update the buffer extent descriptor of target xtpage */
3075                xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
3076                bmSetXD(mp, nxaddr, xsize);
3077
3078                /* unpin the target page to new homeward bound */
3079                XT_PUTPAGE(mp);
3080                jfs_info("xtRelocate: target xtpage relocated.");
3081        }
3082
3083        /*
3084         *      3. acquire maplock for the source extent to be freed;
3085         *
3086         * acquire a maplock saving the src relocated extent address;
3087         * to free of the extent at commit time;
3088         */
3089      out:
3090        /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
3091         * free PXD of the source data extent (logredo() will update
3092         * bmap for free of source data extent), and update bmap for
3093         * free of the source data extent;
3094         */
3095        if (xtype == DATAEXT)
3096                tlck = txMaplock(tid, ip, tlckMAP);
3097        /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
3098         * for the source xtpage (logredo() will init NoRedoPage
3099         * filter and will also update bmap for free of the source
3100         * xtpage), and update bmap for free of the source xtpage;
3101         * N.B. We use tlckMAP instead of tlkcXTREE because there
3102         *      is no buffer associated with this lock since the buffer
3103         *      has been redirected to the target location.
3104         */
3105        else                    /* (xtype == XTPAGE) */
3106                tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
3107
3108        pxdlock = (struct pxd_lock *) & tlck->lock;
3109        pxdlock->flag = mlckFREEPXD;
3110        PXDaddress(&pxdlock->pxd, oxaddr);
3111        PXDlength(&pxdlock->pxd, xlen);
3112        pxdlock->index = 1;
3113
3114        /*
3115         *      4. update the parent xad entry for relocation;
3116         *
3117         * acquire tlck for the parent entry with XAD_NEW as entry
3118         * update which will write LOG_REDOPAGE and update bmap for
3119         * allocation of XAD_NEW destination extent;
3120         */
3121        jfs_info("xtRelocate: update parent xad entry.");
3122        BT_MARK_DIRTY(pmp, ip);
3123        tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
3124        xtlck = (struct xtlock *) & tlck->lock;
3125
3126        /* update the XAD with the new destination extent; */
3127        xad = &pp->xad[index];
3128        xad->flag |= XAD_NEW;
3129        XADaddress(xad, nxaddr);
3130
3131        xtlck->lwm.offset = min(index, xtlck->lwm.offset);
3132        xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
3133            xtlck->lwm.offset;
3134
3135        /* unpin the parent xtpage */
3136        XT_PUTPAGE(pmp);
3137
3138        return rc;
3139}
3140
3141
3142/*
3143 *      xtSearchNode()
3144 *
3145 * function:    search for the internal xad entry covering specified extent.
3146 *              This function is mainly used by defragfs utility.
3147 *
3148 * parameters:
3149 *      ip      - file object;
3150 *      xad     - extent to find;
3151 *      cmpp    - comparison result:
3152 *      btstack - traverse stack;
3153 *      flag    - search process flag;
3154 *
3155 * returns:
3156 *      btstack contains (bn, index) of search path traversed to the entry.
3157 *      *cmpp is set to result of comparison with the entry returned.
3158 *      the page containing the entry is pinned at exit.
3159 */
3160static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
3161                        int *cmpp, struct btstack * btstack, int flag)
3162{
3163        int rc = 0;
3164        s64 xoff, xaddr;
3165        int xlen;
3166        int cmp = 1;            /* init for empty page */
3167        s64 bn;                 /* block number */
3168        struct metapage *mp;    /* meta-page buffer */
3169        xtpage_t *p;            /* page */
3170        int base, index, lim;
3171        struct btframe *btsp;
3172        s64 t64;
3173
3174        BT_CLR(btstack);
3175
3176        xoff = offsetXAD(xad);
3177        xlen = lengthXAD(xad);
3178        xaddr = addressXAD(xad);
3179
3180        /*
3181         *      search down tree from root:
3182         *
3183         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
3184         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
3185         *
3186         * if entry with search key K is not found
3187         * internal page search find the entry with largest key Ki
3188         * less than K which point to the child page to search;
3189         * leaf page search find the entry with smallest key Kj
3190         * greater than K so that the returned index is the position of
3191         * the entry to be shifted right for insertion of new entry.
3192         * for empty tree, search key is greater than any key of the tree.
3193         *
3194         * by convention, root bn = 0.
3195         */
3196        for (bn = 0;;) {
3197                /* get/pin the page to search */
3198                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3199                if (rc)
3200                        return rc;
3201                if (p->header.flag & BT_LEAF) {
3202                        XT_PUTPAGE(mp);
3203                        return -ESTALE;
3204                }
3205
3206                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3207
3208                /*
3209                 * binary search with search key K on the current page
3210                 */
3211                for (base = XTENTRYSTART; lim; lim >>= 1) {
3212                        index = base + (lim >> 1);
3213
3214                        XT_CMP(cmp, xoff, &p->xad[index], t64);
3215                        if (cmp == 0) {
3216                                /*
3217                                 *      search hit
3218                                 *
3219                                 * verify for exact match;
3220                                 */
3221                                if (xaddr == addressXAD(&p->xad[index]) &&
3222                                    xoff == offsetXAD(&p->xad[index])) {
3223                                        *cmpp = cmp;
3224
3225                                        /* save search result */
3226                                        btsp = btstack->top;
3227                                        btsp->bn = bn;
3228                                        btsp->index = index;
3229                                        btsp->mp = mp;
3230
3231                                        return 0;
3232                                }
3233
3234                                /* descend/search its child page */
3235                                goto next;
3236                        }
3237
3238                        if (cmp > 0) {
3239                                base = index + 1;
3240                                --lim;
3241                        }
3242                }
3243
3244                /*
3245                 *      search miss - non-leaf page:
3246                 *
3247                 * base is the smallest index with key (Kj) greater than
3248                 * search key (K) and may be zero or maxentry index.
3249                 * if base is non-zero, decrement base by one to get the parent
3250                 * entry of the child page to search.
3251                 */
3252                index = base ? base - 1 : base;
3253
3254                /*
3255                 * go down to child page
3256                 */
3257              next:
3258                /* get the child page block number */
3259                bn = addressXAD(&p->xad[index]);
3260
3261                /* unpin the parent page */
3262                XT_PUTPAGE(mp);
3263        }
3264}
3265
3266
3267/*
3268 *      xtRelink()
3269 *
3270 * function:
3271 *      link around a freed page.
3272 *
3273 * Parameter:
3274 *      int             tid,
3275 *      struct inode    *ip,
3276 *      xtpage_t        *p)
3277 *
3278 * returns:
3279 */
3280static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3281{
3282        int rc = 0;
3283        struct metapage *mp;
3284        s64 nextbn, prevbn;
3285        struct tlock *tlck;
3286
3287        nextbn = le64_to_cpu(p->header.next);
3288        prevbn = le64_to_cpu(p->header.prev);
3289
3290        /* update prev pointer of the next page */
3291        if (nextbn != 0) {
3292                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3293                if (rc)
3294                        return rc;
3295
3296                /*
3297                 * acquire a transaction lock on the page;
3298                 *
3299                 * action: update prev pointer;
3300                 */
3301                BT_MARK_DIRTY(mp, ip);
3302                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3303
3304                /* the page may already have been tlock'd */
3305
3306                p->header.prev = cpu_to_le64(prevbn);
3307
3308                XT_PUTPAGE(mp);
3309        }
3310
3311        /* update next pointer of the previous page */
3312        if (prevbn != 0) {
3313                XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3314                if (rc)
3315                        return rc;
3316
3317                /*
3318                 * acquire a transaction lock on the page;
3319                 *
3320                 * action: update next pointer;
3321                 */
3322                BT_MARK_DIRTY(mp, ip);
3323                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3324
3325                /* the page may already have been tlock'd */
3326
3327                p->header.next = le64_to_cpu(nextbn);
3328
3329                XT_PUTPAGE(mp);
3330        }
3331
3332        return 0;
3333}
3334#endif                          /*  _STILL_TO_PORT */
3335
3336
3337/*
3338 *      xtInitRoot()
3339 *
3340 * initialize file root (inline in inode)
3341 */
3342void xtInitRoot(tid_t tid, struct inode *ip)
3343{
3344        xtpage_t *p;
3345
3346        /*
3347         * acquire a transaction lock on the root
3348         *
3349         * action:
3350         */
3351        txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3352                      tlckXTREE | tlckNEW);
3353        p = &JFS_IP(ip)->i_xtroot;
3354
3355        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3356        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3357
3358        if (S_ISDIR(ip->i_mode))
3359                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3360        else {
3361                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3362                ip->i_size = 0;
3363        }
3364
3365
3366        return;
3367}
3368
3369
3370/*
3371 * We can run into a deadlock truncating a file with a large number of
3372 * xtree pages (large fragmented file).  A robust fix would entail a
3373 * reservation system where we would reserve a number of metadata pages
3374 * and tlocks which we would be guaranteed without a deadlock.  Without
3375 * this, a partial fix is to limit number of metadata pages we will lock
3376 * in a single transaction.  Currently we will truncate the file so that
3377 * no more than 50 leaf pages will be locked.  The caller of xtTruncate
3378 * will be responsible for ensuring that the current transaction gets
3379 * committed, and that subsequent transactions are created to truncate
3380 * the file further if needed.
3381 */
3382#define MAX_TRUNCATE_LEAVES 50
3383
3384/*
3385 *      xtTruncate()
3386 *
3387 * function:
3388 *      traverse for truncation logging backward bottom up;
3389 *      terminate at the last extent entry at the current subtree
3390 *      root page covering new down size.
3391 *      truncation may occur within the last extent entry.
3392 *
3393 * parameter:
3394 *      int             tid,
3395 *      struct inode    *ip,
3396 *      s64             newsize,
3397 *      int             type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3398 *
3399 * return:
3400 *
3401 * note:
3402 *      PWMAP:
3403 *       1. truncate (non-COMMIT_NOLINK file)
3404 *          by jfs_truncate() or jfs_open(O_TRUNC):
3405 *          xtree is updated;
3406 *       2. truncate index table of directory when last entry removed
3407 *      map update via tlock at commit time;
3408 *      PMAP:
3409 *       Call xtTruncate_pmap instead
3410 *      WMAP:
3411 *       1. remove (free zero link count) on last reference release
3412 *          (pmap has been freed at commit zero link count);
3413 *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3414 *          xtree is updated;
3415 *       map update directly at truncation time;
3416 *
3417 *      if (DELETE)
3418 *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3419 *      else if (TRUNCATE)
3420 *              must write LOG_NOREDOPAGE for deleted index page;
3421 *
3422 * pages may already have been tlocked by anonymous transactions
3423 * during file growth (i.e., write) before truncation;
3424 *
3425 * except last truncated entry, deleted entries remains as is
3426 * in the page (nextindex is updated) for other use
3427 * (e.g., log/update allocation map): this avoid copying the page
3428 * info but delay free of pages;
3429 *
3430 */
3431s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3432{
3433        int rc = 0;
3434        s64 teof;
3435        struct metapage *mp;
3436        xtpage_t *p;
3437        s64 bn;
3438        int index, nextindex;
3439        xad_t *xad;
3440        s64 xoff, xaddr;
3441        int xlen, len, freexlen;
3442        struct btstack btstack;
3443        struct btframe *parent;
3444        struct tblock *tblk = NULL;
3445        struct tlock *tlck = NULL;
3446        struct xtlock *xtlck = NULL;
3447        struct xdlistlock xadlock;      /* maplock for COMMIT_WMAP */
3448        struct pxd_lock *pxdlock;               /* maplock for COMMIT_WMAP */
3449        s64 nfreed;
3450        int freed, log;
3451        int locked_leaves = 0;
3452
3453        /* save object truncation type */
3454        if (tid) {
3455                tblk = tid_to_tblock(tid);
3456                tblk->xflag |= flag;
3457        }
3458
3459        nfreed = 0;
3460
3461        flag &= COMMIT_MAP;
3462        assert(flag != COMMIT_PMAP);
3463
3464        if (flag == COMMIT_PWMAP)
3465                log = 1;
3466        else {
3467                log = 0;
3468                xadlock.flag = mlckFREEXADLIST;
3469                xadlock.index = 1;
3470        }
3471
3472        /*
3473         * if the newsize is not an integral number of pages,
3474         * the file between newsize and next page boundary will
3475         * be cleared.
3476         * if truncating into a file hole, it will cause
3477         * a full block to be allocated for the logical block.
3478         */
3479
3480        /*
3481         * release page blocks of truncated region <teof, eof>
3482         *
3483         * free the data blocks from the leaf index blocks.
3484         * delete the parent index entries corresponding to
3485         * the freed child data/index blocks.
3486         * free the index blocks themselves which aren't needed
3487         * in new sized file.
3488         *
3489         * index blocks are updated only if the blocks are to be
3490         * retained in the new sized file.
3491         * if type is PMAP, the data and index pages are NOT
3492         * freed, and the data and index blocks are NOT freed
3493         * from working map.
3494         * (this will allow continued access of data/index of
3495         * temporary file (zerolink count file truncated to zero-length)).
3496         */
3497        teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3498            JFS_SBI(ip->i_sb)->l2bsize;
3499
3500        /* clear stack */
3501        BT_CLR(&btstack);
3502
3503        /*
3504         * start with root
3505         *
3506         * root resides in the inode
3507         */
3508        bn = 0;
3509
3510        /*
3511         * first access of each page:
3512         */
3513      getPage:
3514        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3515        if (rc)
3516                return rc;
3517
3518        /* process entries backward from last index */
3519        index = le16_to_cpu(p->header.nextindex) - 1;
3520
3521
3522        /* Since this is the rightmost page at this level, and we may have
3523         * already freed a page that was formerly to the right, let's make
3524         * sure that the next pointer is zero.
3525         */
3526        if (p->header.next) {
3527                if (log)
3528                        /*
3529                         * Make sure this change to the header is logged.
3530                         * If we really truncate this leaf, the flag
3531                         * will be changed to tlckTRUNCATE
3532                         */
3533                        tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
3534                BT_MARK_DIRTY(mp, ip);
3535                p->header.next = 0;
3536        }
3537
3538        if (p->header.flag & BT_INTERNAL)
3539                goto getChild;
3540
3541        /*
3542         *      leaf page
3543         */
3544        freed = 0;
3545
3546        /* does region covered by leaf page precede Teof ? */
3547        xad = &p->xad[index];
3548        xoff = offsetXAD(xad);
3549        xlen = lengthXAD(xad);
3550        if (teof >= xoff + xlen) {
3551                XT_PUTPAGE(mp);
3552                goto getParent;
3553        }
3554
3555        /* (re)acquire tlock of the leaf page */
3556        if (log) {
3557                if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3558                        /*
3559                         * We need to limit the size of the transaction
3560                         * to avoid exhausting pagecache & tlocks
3561                         */
3562                        XT_PUTPAGE(mp);
3563                        newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3564                        goto getParent;
3565                }
3566                tlck = txLock(tid, ip, mp, tlckXTREE);
3567                tlck->type = tlckXTREE | tlckTRUNCATE;
3568                xtlck = (struct xtlock *) & tlck->lock;
3569                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3570        }
3571        BT_MARK_DIRTY(mp, ip);
3572
3573        /*
3574         * scan backward leaf page entries
3575         */
3576        for (; index >= XTENTRYSTART; index--) {
3577                xad = &p->xad[index];
3578                xoff = offsetXAD(xad);
3579                xlen = lengthXAD(xad);
3580                xaddr = addressXAD(xad);
3581
3582                /*
3583                 * The "data" for a directory is indexed by the block
3584                 * device's address space.  This metadata must be invalidated
3585                 * here
3586                 */
3587                if (S_ISDIR(ip->i_mode) && (teof == 0))
3588                        invalidate_xad_metapages(ip, *xad);
3589                /*
3590                 * entry beyond eof: continue scan of current page
3591                 *          xad
3592                 * ---|---=======------->
3593                 *   eof
3594                 */
3595                if (teof < xoff) {
3596                        nfreed += xlen;
3597                        continue;
3598                }
3599
3600                /*
3601                 * (xoff <= teof): last entry to be deleted from page;
3602                 * If other entries remain in page: keep and update the page.
3603                 */
3604
3605                /*
3606                 * eof == entry_start: delete the entry
3607                 *           xad
3608                 * -------|=======------->
3609                 *       eof
3610                 *
3611                 */
3612                if (teof == xoff) {
3613                        nfreed += xlen;
3614
3615                        if (index == XTENTRYSTART)
3616                                break;
3617
3618                        nextindex = index;
3619                }
3620                /*
3621                 * eof within the entry: truncate the entry.
3622                 *          xad
3623                 * -------===|===------->
3624                 *          eof
3625                 */
3626                else if (teof < xoff + xlen) {
3627                        /* update truncated entry */
3628                        len = teof - xoff;
3629                        freexlen = xlen - len;
3630                        XADlength(xad, len);
3631
3632                        /* save pxd of truncated extent in tlck */
3633                        xaddr += len;
3634                        if (log) {      /* COMMIT_PWMAP */
3635                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
3636                                    min(index, (int)xtlck->lwm.offset) : index;
3637                                xtlck->lwm.length = index + 1 -
3638                                    xtlck->lwm.offset;
3639                                xtlck->twm.offset = index;
3640                                pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3641                                pxdlock->flag = mlckFREEPXD;
3642                                PXDaddress(&pxdlock->pxd, xaddr);
3643                                PXDlength(&pxdlock->pxd, freexlen);
3644                        }
3645                        /* free truncated extent */
3646                        else {  /* COMMIT_WMAP */
3647
3648                                pxdlock = (struct pxd_lock *) & xadlock;
3649                                pxdlock->flag = mlckFREEPXD;
3650                                PXDaddress(&pxdlock->pxd, xaddr);
3651                                PXDlength(&pxdlock->pxd, freexlen);
3652                                txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
3653
3654                                /* reset map lock */
3655                                xadlock.flag = mlckFREEXADLIST;
3656                        }
3657
3658                        /* current entry is new last entry; */
3659                        nextindex = index + 1;
3660
3661                        nfreed += freexlen;
3662                }
3663                /*
3664                 * eof beyond the entry:
3665                 *          xad
3666                 * -------=======---|--->
3667                 *                 eof
3668                 */
3669                else {          /* (xoff + xlen < teof) */
3670
3671                        nextindex = index + 1;
3672                }
3673
3674                if (nextindex < le16_to_cpu(p->header.nextindex)) {
3675                        if (!log) {     /* COMMIT_WAMP */
3676                                xadlock.xdlist = &p->xad[nextindex];
3677                                xadlock.count =
3678                                    le16_to_cpu(p->header.nextindex) -
3679                                    nextindex;
3680                                txFreeMap(ip, (struct maplock *) & xadlock,
3681                                          NULL, COMMIT_WMAP);
3682                        }
3683                        p->header.nextindex = cpu_to_le16(nextindex);
3684                }
3685
3686                XT_PUTPAGE(mp);
3687
3688                /* assert(freed == 0); */
3689                goto getParent;
3690        }                       /* end scan of leaf page entries */
3691
3692        freed = 1;
3693
3694        /*
3695         * leaf page become empty: free the page if type != PMAP
3696         */
3697        if (log) {              /* COMMIT_PWMAP */
3698                /* txCommit() with tlckFREE:
3699                 * free data extents covered by leaf [XTENTRYSTART:hwm);
3700                 * invalidate leaf if COMMIT_PWMAP;
3701                 * if (TRUNCATE), will write LOG_NOREDOPAGE;
3702                 */
3703                tlck->type = tlckXTREE | tlckFREE;
3704        } else {                /* COMMIT_WAMP */
3705
3706                /* free data extents covered by leaf */
3707                xadlock.xdlist = &p->xad[XTENTRYSTART];
3708                xadlock.count =
3709                    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3710                txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
3711        }
3712
3713        if (p->header.flag & BT_ROOT) {
3714                p->header.flag &= ~BT_INTERNAL;
3715                p->header.flag |= BT_LEAF;
3716                p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3717
3718                XT_PUTPAGE(mp); /* debug */
3719                goto out;
3720        } else {
3721                if (log) {      /* COMMIT_PWMAP */
3722                        /* page will be invalidated at tx completion
3723                         */
3724                        XT_PUTPAGE(mp);
3725                } else {        /* COMMIT_WMAP */
3726
3727                        if (mp->lid)
3728                                lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3729
3730                        /* invalidate empty leaf page */
3731                        discard_metapage(mp);
3732                }
3733        }
3734
3735        /*
3736         * the leaf page become empty: delete the parent entry
3737         * for the leaf page if the parent page is to be kept
3738         * in the new sized file.
3739         */
3740
3741        /*
3742         * go back up to the parent page
3743         */
3744      getParent:
3745        /* pop/restore parent entry for the current child page */
3746        if ((parent = BT_POP(&btstack)) == NULL)
3747                /* current page must have been root */
3748                goto out;
3749
3750        /* get back the parent page */
3751        bn = parent->bn;
3752        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3753        if (rc)
3754                return rc;
3755
3756        index = parent->index;
3757
3758        /*
3759         * child page was not empty:
3760         */
3761        if (freed == 0) {
3762                /* has any entry deleted from parent ? */
3763                if (index < le16_to_cpu(p->header.nextindex) - 1) {
3764                        /* (re)acquire tlock on the parent page */
3765                        if (log) {      /* COMMIT_PWMAP */
3766                                /* txCommit() with tlckTRUNCATE:
3767                                 * free child extents covered by parent [);
3768                                 */
3769                                tlck = txLock(tid, ip, mp, tlckXTREE);
3770                                xtlck = (struct xtlock *) & tlck->lock;
3771                                if (!(tlck->type & tlckTRUNCATE)) {
3772                                        xtlck->hwm.offset =
3773                                            le16_to_cpu(p->header.
3774                                                        nextindex) - 1;
3775                                        tlck->type =
3776                                            tlckXTREE | tlckTRUNCATE;
3777                                }
3778                        } else {        /* COMMIT_WMAP */
3779
3780                                /* free child extents covered by parent */
3781                                xadlock.xdlist = &p->xad[index + 1];
3782                                xadlock.count =
3783                                    le16_to_cpu(p->header.nextindex) -
3784                                    index - 1;
3785                                txFreeMap(ip, (struct maplock *) & xadlock,
3786                                          NULL, COMMIT_WMAP);
3787                        }
3788                        BT_MARK_DIRTY(mp, ip);
3789
3790                        p->header.nextindex = cpu_to_le16(index + 1);
3791                }
3792                XT_PUTPAGE(mp);
3793                goto getParent;
3794        }
3795
3796        /*
3797         * child page was empty:
3798         */
3799        nfreed += lengthXAD(&p->xad[index]);
3800
3801        /*
3802         * During working map update, child page's tlock must be handled
3803         * before parent's.  This is because the parent's tlock will cause
3804         * the child's disk space to be marked available in the wmap, so
3805         * it's important that the child page be released by that time.
3806         *
3807         * ToDo:  tlocks should be on doubly-linked list, so we can
3808         * quickly remove it and add it to the end.
3809         */
3810
3811        /*
3812         * Move parent page's tlock to the end of the tid's tlock list
3813         */
3814        if (log && mp->lid && (tblk->last != mp->lid) &&
3815            lid_to_tlock(mp->lid)->tid) {
3816                lid_t lid = mp->lid;
3817                struct tlock *prev;
3818
3819                tlck = lid_to_tlock(lid);
3820
3821                if (tblk->next == lid)
3822                        tblk->next = tlck->next;
3823                else {
3824                        for (prev = lid_to_tlock(tblk->next);
3825                             prev->next != lid;
3826                             prev = lid_to_tlock(prev->next)) {
3827                                assert(prev->next);
3828                        }
3829                        prev->next = tlck->next;
3830                }
3831                lid_to_tlock(tblk->last)->next = lid;
3832                tlck->next = 0;
3833                tblk->last = lid;
3834        }
3835
3836        /*
3837         * parent page become empty: free the page
3838         */
3839        if (index == XTENTRYSTART) {
3840                if (log) {      /* COMMIT_PWMAP */
3841                        /* txCommit() with tlckFREE:
3842                         * free child extents covered by parent;
3843                         * invalidate parent if COMMIT_PWMAP;
3844                         */
3845                        tlck = txLock(tid, ip, mp, tlckXTREE);
3846                        xtlck = (struct xtlock *) & tlck->lock;
3847                        xtlck->hwm.offset =
3848                            le16_to_cpu(p->header.nextindex) - 1;
3849                        tlck->type = tlckXTREE | tlckFREE;
3850                } else {        /* COMMIT_WMAP */
3851
3852                        /* free child extents covered by parent */
3853                        xadlock.xdlist = &p->xad[XTENTRYSTART];
3854                        xadlock.count =
3855                            le16_to_cpu(p->header.nextindex) -
3856                            XTENTRYSTART;
3857                        txFreeMap(ip, (struct maplock *) & xadlock, NULL,
3858                                  COMMIT_WMAP);
3859                }
3860                BT_MARK_DIRTY(mp, ip);
3861
3862                if (p->header.flag & BT_ROOT) {
3863                        p->header.flag &= ~BT_INTERNAL;
3864                        p->header.flag |= BT_LEAF;
3865                        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3866                        if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3867                                /*
3868                                 * Shrink root down to allow inline
3869                                 * EA (otherwise fsck complains)
3870                                 */
3871                                p->header.maxentry =
3872                                    cpu_to_le16(XTROOTINITSLOT);
3873                                JFS_IP(ip)->mode2 |= INLINEEA;
3874                        }
3875
3876                        XT_PUTPAGE(mp); /* debug */
3877                        goto out;
3878                } else {
3879                        if (log) {      /* COMMIT_PWMAP */
3880                                /* page will be invalidated at tx completion
3881                                 */
3882                                XT_PUTPAGE(mp);
3883                        } else {        /* COMMIT_WMAP */
3884
3885                                if (mp->lid)
3886                                        lid_to_tlock(mp->lid)->flag |=
3887                                                tlckFREELOCK;
3888
3889                                /* invalidate parent page */
3890                                discard_metapage(mp);
3891                        }
3892
3893                        /* parent has become empty and freed:
3894                         * go back up to its parent page
3895                         */
3896                        /* freed = 1; */
3897                        goto getParent;
3898                }
3899        }
3900        /*
3901         * parent page still has entries for front region;
3902         */
3903        else {
3904                /* try truncate region covered by preceding entry
3905                 * (process backward)
3906                 */
3907                index--;
3908
3909                /* go back down to the child page corresponding
3910                 * to the entry
3911                 */
3912                goto getChild;
3913        }
3914
3915        /*
3916         *      internal page: go down to child page of current entry
3917         */
3918      getChild:
3919        /* save current parent entry for the child page */
3920        if (BT_STACK_FULL(&btstack)) {
3921                jfs_error(ip->i_sb, "stack overrun in xtTruncate!");
3922                XT_PUTPAGE(mp);
3923                return -EIO;
3924        }
3925        BT_PUSH(&btstack, bn, index);
3926
3927        /* get child page */
3928        xad = &p->xad[index];
3929        bn = addressXAD(xad);
3930
3931        /*
3932         * first access of each internal entry:
3933         */
3934        /* release parent page */
3935        XT_PUTPAGE(mp);
3936
3937        /* process the child page */
3938        goto getPage;
3939
3940      out:
3941        /*
3942         * update file resource stat
3943         */
3944        /* set size
3945         */
3946        if (S_ISDIR(ip->i_mode) && !newsize)
3947                ip->i_size = 1; /* fsck hates zero-length directories */
3948        else
3949                ip->i_size = newsize;
3950
3951        /* update quota allocation to reflect freed blocks */
3952        DQUOT_FREE_BLOCK(ip, nfreed);
3953
3954        /*
3955         * free tlock of invalidated pages
3956         */
3957        if (flag == COMMIT_WMAP)
3958                txFreelock(ip);
3959
3960        return newsize;
3961}
3962
3963
3964/*
3965 *      xtTruncate_pmap()
3966 *
3967 * function:
3968 *      Perform truncate to zero lenghth for deleted file, leaving the
3969 *      the xtree and working map untouched.  This allows the file to
3970 *      be accessed via open file handles, while the delete of the file
3971 *      is committed to disk.
3972 *
3973 * parameter:
3974 *      tid_t           tid,
3975 *      struct inode    *ip,
3976 *      s64             committed_size)
3977 *
3978 * return: new committed size
3979 *
3980 * note:
3981 *
3982 *      To avoid deadlock by holding too many transaction locks, the
3983 *      truncation may be broken up into multiple transactions.
3984 *      The committed_size keeps track of part of the file has been
3985 *      freed from the pmaps.
3986 */
3987s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3988{
3989        s64 bn;
3990        struct btstack btstack;
3991        int cmp;
3992        int index;
3993        int locked_leaves = 0;
3994        struct metapage *mp;
3995        xtpage_t *p;
3996        struct btframe *parent;
3997        int rc;
3998        struct tblock *tblk;
3999        struct tlock *tlck = NULL;
4000        xad_t *xad;
4001        int xlen;
4002        s64 xoff;
4003        struct xtlock *xtlck = NULL;
4004
4005        /* save object truncation type */
4006        tblk = tid_to_tblock(tid);
4007        tblk->xflag |= COMMIT_PMAP;
4008
4009        /* clear stack */
4010        BT_CLR(&btstack);
4011
4012        if (committed_size) {
4013                xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
4014                rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
4015                if (rc)
4016                        return rc;
4017
4018                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4019
4020                if (cmp != 0) {
4021                        XT_PUTPAGE(mp);
4022                        jfs_error(ip->i_sb,
4023                                  "xtTruncate_pmap: did not find extent");
4024                        return -EIO;
4025                }
4026        } else {
4027                /*
4028                 * start with root
4029                 *
4030                 * root resides in the inode
4031                 */
4032                bn = 0;
4033
4034                /*
4035                 * first access of each page:
4036                 */
4037      getPage:
4038                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4039                if (rc)
4040                        return rc;
4041
4042                /* process entries backward from last index */
4043                index = le16_to_cpu(p->header.nextindex) - 1;
4044
4045                if (p->header.flag & BT_INTERNAL)
4046                        goto getChild;
4047        }
4048
4049        /*
4050         *      leaf page
4051         */
4052
4053        if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
4054                /*
4055                 * We need to limit the size of the transaction
4056                 * to avoid exhausting pagecache & tlocks
4057                 */
4058                xad = &p->xad[index];
4059                xoff = offsetXAD(xad);
4060                xlen = lengthXAD(xad);
4061                XT_PUTPAGE(mp);
4062                return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
4063        }
4064        tlck = txLock(tid, ip, mp, tlckXTREE);
4065        tlck->type = tlckXTREE | tlckFREE;
4066        xtlck = (struct xtlock *) & tlck->lock;
4067        xtlck->hwm.offset = index;
4068
4069
4070        XT_PUTPAGE(mp);
4071
4072        /*
4073         * go back up to the parent page
4074         */
4075      getParent:
4076        /* pop/restore parent entry for the current child page */
4077        if ((parent = BT_POP(&btstack)) == NULL)
4078                /* current page must have been root */
4079                goto out;
4080
4081        /* get back the parent page */
4082        bn = parent->bn;
4083        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4084        if (rc)
4085                return rc;
4086
4087        index = parent->index;
4088
4089        /*
4090         * parent page become empty: free the page
4091         */
4092        if (index == XTENTRYSTART) {
4093                /* txCommit() with tlckFREE:
4094                 * free child extents covered by parent;
4095                 * invalidate parent if COMMIT_PWMAP;
4096                 */
4097                tlck = txLock(tid, ip, mp, tlckXTREE);
4098                xtlck = (struct xtlock *) & tlck->lock;
4099                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
4100                tlck->type = tlckXTREE | tlckFREE;
4101
4102                XT_PUTPAGE(mp);
4103
4104                if (p->header.flag & BT_ROOT) {
4105
4106                        goto out;
4107                } else {
4108                        goto getParent;
4109                }
4110        }
4111        /*
4112         * parent page still has entries for front region;
4113         */
4114        else
4115                index--;
4116        /*
4117         *      internal page: go down to child page of current entry
4118         */
4119      getChild:
4120        /* save current parent entry for the child page */
4121        if (BT_STACK_FULL(&btstack)) {
4122                jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!");
4123                XT_PUTPAGE(mp);
4124                return -EIO;
4125        }
4126        BT_PUSH(&btstack, bn, index);
4127
4128        /* get child page */
4129        xad = &p->xad[index];
4130        bn = addressXAD(xad);
4131
4132        /*
4133         * first access of each internal entry:
4134         */
4135        /* release parent page */
4136        XT_PUTPAGE(mp);
4137
4138        /* process the child page */
4139        goto getPage;
4140
4141      out:
4142
4143        return 0;
4144}
4145
4146#ifdef CONFIG_JFS_STATISTICS
4147int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4148                    int *eof, void *data)
4149{
4150        int len = 0;
4151        off_t begin;
4152
4153        len += sprintf(buffer,
4154                       "JFS Xtree statistics\n"
4155                       "====================\n"
4156                       "searches = %d\n"
4157                       "fast searches = %d\n"
4158                       "splits = %d\n",
4159                       xtStat.search,
4160                       xtStat.fastSearch,
4161                       xtStat.split);
4162
4163        begin = offset;
4164        *start = buffer + begin;
4165        len -= begin;
4166
4167        if (len > length)
4168                len = length;
4169        else
4170                *eof = 1;
4171
4172        if (len < 0)
4173                len = 0;
4174
4175        return len;
4176}
4177#endif
4178