linux/arch/sparc64/kernel/ldc.c
<<
>>
Prefs
   1/* ldc.c: Logical Domain Channel link-layer protocol driver.
   2 *
   3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/module.h>
   8#include <linux/slab.h>
   9#include <linux/spinlock.h>
  10#include <linux/delay.h>
  11#include <linux/errno.h>
  12#include <linux/string.h>
  13#include <linux/scatterlist.h>
  14#include <linux/interrupt.h>
  15#include <linux/list.h>
  16#include <linux/init.h>
  17
  18#include <asm/hypervisor.h>
  19#include <asm/iommu.h>
  20#include <asm/page.h>
  21#include <asm/ldc.h>
  22#include <asm/mdesc.h>
  23
  24#define DRV_MODULE_NAME         "ldc"
  25#define PFX DRV_MODULE_NAME     ": "
  26#define DRV_MODULE_VERSION      "1.0"
  27#define DRV_MODULE_RELDATE      "June 25, 2007"
  28
  29static char version[] __devinitdata =
  30        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  31#define LDC_PACKET_SIZE         64
  32
  33/* Packet header layout for unreliable and reliable mode frames.
  34 * When in RAW mode, packets are simply straight 64-byte payloads
  35 * with no headers.
  36 */
  37struct ldc_packet {
  38        u8                      type;
  39#define LDC_CTRL                0x01
  40#define LDC_DATA                0x02
  41#define LDC_ERR                 0x10
  42
  43        u8                      stype;
  44#define LDC_INFO                0x01
  45#define LDC_ACK                 0x02
  46#define LDC_NACK                0x04
  47
  48        u8                      ctrl;
  49#define LDC_VERS                0x01 /* Link Version            */
  50#define LDC_RTS                 0x02 /* Request To Send         */
  51#define LDC_RTR                 0x03 /* Ready To Receive        */
  52#define LDC_RDX                 0x04 /* Ready for Data eXchange */
  53#define LDC_CTRL_MSK            0x0f
  54
  55        u8                      env;
  56#define LDC_LEN                 0x3f
  57#define LDC_FRAG_MASK           0xc0
  58#define LDC_START               0x40
  59#define LDC_STOP                0x80
  60
  61        u32                     seqid;
  62
  63        union {
  64                u8              u_data[LDC_PACKET_SIZE - 8];
  65                struct {
  66                        u32     pad;
  67                        u32     ackid;
  68                        u8      r_data[LDC_PACKET_SIZE - 8 - 8];
  69                } r;
  70        } u;
  71};
  72
  73struct ldc_version {
  74        u16 major;
  75        u16 minor;
  76};
  77
  78/* Ordered from largest major to lowest.  */
  79static struct ldc_version ver_arr[] = {
  80        { .major = 1, .minor = 0 },
  81};
  82
  83#define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
  84#define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
  85
  86struct ldc_channel;
  87
  88struct ldc_mode_ops {
  89        int (*write)(struct ldc_channel *, const void *, unsigned int);
  90        int (*read)(struct ldc_channel *, void *, unsigned int);
  91};
  92
  93static const struct ldc_mode_ops raw_ops;
  94static const struct ldc_mode_ops nonraw_ops;
  95static const struct ldc_mode_ops stream_ops;
  96
  97int ldom_domaining_enabled;
  98
  99struct ldc_iommu {
 100        /* Protects arena alloc/free.  */
 101        spinlock_t                      lock;
 102        struct iommu_arena              arena;
 103        struct ldc_mtable_entry         *page_table;
 104};
 105
 106struct ldc_channel {
 107        /* Protects all operations that depend upon channel state.  */
 108        spinlock_t                      lock;
 109
 110        unsigned long                   id;
 111
 112        u8                              *mssbuf;
 113        u32                             mssbuf_len;
 114        u32                             mssbuf_off;
 115
 116        struct ldc_packet               *tx_base;
 117        unsigned long                   tx_head;
 118        unsigned long                   tx_tail;
 119        unsigned long                   tx_num_entries;
 120        unsigned long                   tx_ra;
 121
 122        unsigned long                   tx_acked;
 123
 124        struct ldc_packet               *rx_base;
 125        unsigned long                   rx_head;
 126        unsigned long                   rx_tail;
 127        unsigned long                   rx_num_entries;
 128        unsigned long                   rx_ra;
 129
 130        u32                             rcv_nxt;
 131        u32                             snd_nxt;
 132
 133        unsigned long                   chan_state;
 134
 135        struct ldc_channel_config       cfg;
 136        void                            *event_arg;
 137
 138        const struct ldc_mode_ops       *mops;
 139
 140        struct ldc_iommu                iommu;
 141
 142        struct ldc_version              ver;
 143
 144        u8                              hs_state;
 145#define LDC_HS_CLOSED                   0x00
 146#define LDC_HS_OPEN                     0x01
 147#define LDC_HS_GOTVERS                  0x02
 148#define LDC_HS_SENTRTR                  0x03
 149#define LDC_HS_GOTRTR                   0x04
 150#define LDC_HS_COMPLETE                 0x10
 151
 152        u8                              flags;
 153#define LDC_FLAG_ALLOCED_QUEUES         0x01
 154#define LDC_FLAG_REGISTERED_QUEUES      0x02
 155#define LDC_FLAG_REGISTERED_IRQS        0x04
 156#define LDC_FLAG_RESET                  0x10
 157
 158        u8                              mss;
 159        u8                              state;
 160
 161#define LDC_IRQ_NAME_MAX                32
 162        char                            rx_irq_name[LDC_IRQ_NAME_MAX];
 163        char                            tx_irq_name[LDC_IRQ_NAME_MAX];
 164
 165        struct hlist_head               mh_list;
 166
 167        struct hlist_node               list;
 168};
 169
 170#define ldcdbg(TYPE, f, a...) \
 171do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 172                printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 173} while (0)
 174
 175static const char *state_to_str(u8 state)
 176{
 177        switch (state) {
 178        case LDC_STATE_INVALID:
 179                return "INVALID";
 180        case LDC_STATE_INIT:
 181                return "INIT";
 182        case LDC_STATE_BOUND:
 183                return "BOUND";
 184        case LDC_STATE_READY:
 185                return "READY";
 186        case LDC_STATE_CONNECTED:
 187                return "CONNECTED";
 188        default:
 189                return "<UNKNOWN>";
 190        }
 191}
 192
 193static void ldc_set_state(struct ldc_channel *lp, u8 state)
 194{
 195        ldcdbg(STATE, "STATE (%s) --> (%s)\n",
 196               state_to_str(lp->state),
 197               state_to_str(state));
 198
 199        lp->state = state;
 200}
 201
 202static unsigned long __advance(unsigned long off, unsigned long num_entries)
 203{
 204        off += LDC_PACKET_SIZE;
 205        if (off == (num_entries * LDC_PACKET_SIZE))
 206                off = 0;
 207
 208        return off;
 209}
 210
 211static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 212{
 213        return __advance(off, lp->rx_num_entries);
 214}
 215
 216static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 217{
 218        return __advance(off, lp->tx_num_entries);
 219}
 220
 221static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 222                                                  unsigned long *new_tail)
 223{
 224        struct ldc_packet *p;
 225        unsigned long t;
 226
 227        t = tx_advance(lp, lp->tx_tail);
 228        if (t == lp->tx_head)
 229                return NULL;
 230
 231        *new_tail = t;
 232
 233        p = lp->tx_base;
 234        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 235}
 236
 237/* When we are in reliable or stream mode, have to track the next packet
 238 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 239 * to be careful not to stomp over the queue past that point.  During
 240 * the handshake, we don't have TX data packets pending in the queue
 241 * and that's why handshake_get_tx_packet() need not be mindful of
 242 * lp->tx_acked.
 243 */
 244static unsigned long head_for_data(struct ldc_channel *lp)
 245{
 246        if (lp->cfg.mode == LDC_MODE_STREAM)
 247                return lp->tx_acked;
 248        return lp->tx_head;
 249}
 250
 251static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 252{
 253        unsigned long limit, tail, new_tail, diff;
 254        unsigned int mss;
 255
 256        limit = head_for_data(lp);
 257        tail = lp->tx_tail;
 258        new_tail = tx_advance(lp, tail);
 259        if (new_tail == limit)
 260                return 0;
 261
 262        if (limit > new_tail)
 263                diff = limit - new_tail;
 264        else
 265                diff = (limit +
 266                        ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 267        diff /= LDC_PACKET_SIZE;
 268        mss = lp->mss;
 269
 270        if (diff * mss < size)
 271                return 0;
 272
 273        return 1;
 274}
 275
 276static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 277                                             unsigned long *new_tail)
 278{
 279        struct ldc_packet *p;
 280        unsigned long h, t;
 281
 282        h = head_for_data(lp);
 283        t = tx_advance(lp, lp->tx_tail);
 284        if (t == h)
 285                return NULL;
 286
 287        *new_tail = t;
 288
 289        p = lp->tx_base;
 290        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 291}
 292
 293static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 294{
 295        unsigned long orig_tail = lp->tx_tail;
 296        int limit = 1000;
 297
 298        lp->tx_tail = tail;
 299        while (limit-- > 0) {
 300                unsigned long err;
 301
 302                err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 303                if (!err)
 304                        return 0;
 305
 306                if (err != HV_EWOULDBLOCK) {
 307                        lp->tx_tail = orig_tail;
 308                        return -EINVAL;
 309                }
 310                udelay(1);
 311        }
 312
 313        lp->tx_tail = orig_tail;
 314        return -EBUSY;
 315}
 316
 317/* This just updates the head value in the hypervisor using
 318 * a polling loop with a timeout.  The caller takes care of
 319 * upating software state representing the head change, if any.
 320 */
 321static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 322{
 323        int limit = 1000;
 324
 325        while (limit-- > 0) {
 326                unsigned long err;
 327
 328                err = sun4v_ldc_rx_set_qhead(lp->id, head);
 329                if (!err)
 330                        return 0;
 331
 332                if (err != HV_EWOULDBLOCK)
 333                        return -EINVAL;
 334
 335                udelay(1);
 336        }
 337
 338        return -EBUSY;
 339}
 340
 341static int send_tx_packet(struct ldc_channel *lp,
 342                          struct ldc_packet *p,
 343                          unsigned long new_tail)
 344{
 345        BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 346
 347        return set_tx_tail(lp, new_tail);
 348}
 349
 350static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 351                                                 u8 stype, u8 ctrl,
 352                                                 void *data, int dlen,
 353                                                 unsigned long *new_tail)
 354{
 355        struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 356
 357        if (p) {
 358                memset(p, 0, sizeof(*p));
 359                p->type = LDC_CTRL;
 360                p->stype = stype;
 361                p->ctrl = ctrl;
 362                if (data)
 363                        memcpy(p->u.u_data, data, dlen);
 364        }
 365        return p;
 366}
 367
 368static int start_handshake(struct ldc_channel *lp)
 369{
 370        struct ldc_packet *p;
 371        struct ldc_version *ver;
 372        unsigned long new_tail;
 373
 374        ver = &ver_arr[0];
 375
 376        ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 377               ver->major, ver->minor);
 378
 379        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 380                                   ver, sizeof(*ver), &new_tail);
 381        if (p) {
 382                int err = send_tx_packet(lp, p, new_tail);
 383                if (!err)
 384                        lp->flags &= ~LDC_FLAG_RESET;
 385                return err;
 386        }
 387        return -EBUSY;
 388}
 389
 390static int send_version_nack(struct ldc_channel *lp,
 391                             u16 major, u16 minor)
 392{
 393        struct ldc_packet *p;
 394        struct ldc_version ver;
 395        unsigned long new_tail;
 396
 397        ver.major = major;
 398        ver.minor = minor;
 399
 400        p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 401                                   &ver, sizeof(ver), &new_tail);
 402        if (p) {
 403                ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 404                       ver.major, ver.minor);
 405
 406                return send_tx_packet(lp, p, new_tail);
 407        }
 408        return -EBUSY;
 409}
 410
 411static int send_version_ack(struct ldc_channel *lp,
 412                            struct ldc_version *vp)
 413{
 414        struct ldc_packet *p;
 415        unsigned long new_tail;
 416
 417        p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 418                                   vp, sizeof(*vp), &new_tail);
 419        if (p) {
 420                ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 421                       vp->major, vp->minor);
 422
 423                return send_tx_packet(lp, p, new_tail);
 424        }
 425        return -EBUSY;
 426}
 427
 428static int send_rts(struct ldc_channel *lp)
 429{
 430        struct ldc_packet *p;
 431        unsigned long new_tail;
 432
 433        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 434                                   &new_tail);
 435        if (p) {
 436                p->env = lp->cfg.mode;
 437                p->seqid = 0;
 438                lp->rcv_nxt = 0;
 439
 440                ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 441                       p->env, p->seqid);
 442
 443                return send_tx_packet(lp, p, new_tail);
 444        }
 445        return -EBUSY;
 446}
 447
 448static int send_rtr(struct ldc_channel *lp)
 449{
 450        struct ldc_packet *p;
 451        unsigned long new_tail;
 452
 453        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 454                                   &new_tail);
 455        if (p) {
 456                p->env = lp->cfg.mode;
 457                p->seqid = 0;
 458
 459                ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 460                       p->env, p->seqid);
 461
 462                return send_tx_packet(lp, p, new_tail);
 463        }
 464        return -EBUSY;
 465}
 466
 467static int send_rdx(struct ldc_channel *lp)
 468{
 469        struct ldc_packet *p;
 470        unsigned long new_tail;
 471
 472        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 473                                   &new_tail);
 474        if (p) {
 475                p->env = 0;
 476                p->seqid = ++lp->snd_nxt;
 477                p->u.r.ackid = lp->rcv_nxt;
 478
 479                ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 480                       p->env, p->seqid, p->u.r.ackid);
 481
 482                return send_tx_packet(lp, p, new_tail);
 483        }
 484        return -EBUSY;
 485}
 486
 487static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 488{
 489        struct ldc_packet *p;
 490        unsigned long new_tail;
 491        int err;
 492
 493        p = data_get_tx_packet(lp, &new_tail);
 494        if (!p)
 495                return -EBUSY;
 496        memset(p, 0, sizeof(*p));
 497        p->type = data_pkt->type;
 498        p->stype = LDC_NACK;
 499        p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 500        p->seqid = lp->snd_nxt + 1;
 501        p->u.r.ackid = lp->rcv_nxt;
 502
 503        ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 504               p->type, p->ctrl, p->seqid, p->u.r.ackid);
 505
 506        err = send_tx_packet(lp, p, new_tail);
 507        if (!err)
 508                lp->snd_nxt++;
 509
 510        return err;
 511}
 512
 513static int ldc_abort(struct ldc_channel *lp)
 514{
 515        unsigned long hv_err;
 516
 517        ldcdbg(STATE, "ABORT\n");
 518
 519        /* We report but do not act upon the hypervisor errors because
 520         * there really isn't much we can do if they fail at this point.
 521         */
 522        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 523        if (hv_err)
 524                printk(KERN_ERR PFX "ldc_abort: "
 525                       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 526                       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 527
 528        hv_err = sun4v_ldc_tx_get_state(lp->id,
 529                                        &lp->tx_head,
 530                                        &lp->tx_tail,
 531                                        &lp->chan_state);
 532        if (hv_err)
 533                printk(KERN_ERR PFX "ldc_abort: "
 534                       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 535                       lp->id, hv_err);
 536
 537        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 538        if (hv_err)
 539                printk(KERN_ERR PFX "ldc_abort: "
 540                       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 541                       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 542
 543        /* Refetch the RX queue state as well, because we could be invoked
 544         * here in the queue processing context.
 545         */
 546        hv_err = sun4v_ldc_rx_get_state(lp->id,
 547                                        &lp->rx_head,
 548                                        &lp->rx_tail,
 549                                        &lp->chan_state);
 550        if (hv_err)
 551                printk(KERN_ERR PFX "ldc_abort: "
 552                       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 553                       lp->id, hv_err);
 554
 555        return -ECONNRESET;
 556}
 557
 558static struct ldc_version *find_by_major(u16 major)
 559{
 560        struct ldc_version *ret = NULL;
 561        int i;
 562
 563        for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 564                struct ldc_version *v = &ver_arr[i];
 565                if (v->major <= major) {
 566                        ret = v;
 567                        break;
 568                }
 569        }
 570        return ret;
 571}
 572
 573static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 574{
 575        struct ldc_version *vap;
 576        int err;
 577
 578        ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 579               vp->major, vp->minor);
 580
 581        if (lp->hs_state == LDC_HS_GOTVERS) {
 582                lp->hs_state = LDC_HS_OPEN;
 583                memset(&lp->ver, 0, sizeof(lp->ver));
 584        }
 585
 586        vap = find_by_major(vp->major);
 587        if (!vap) {
 588                err = send_version_nack(lp, 0, 0);
 589        } else if (vap->major != vp->major) {
 590                err = send_version_nack(lp, vap->major, vap->minor);
 591        } else {
 592                struct ldc_version ver = *vp;
 593                if (ver.minor > vap->minor)
 594                        ver.minor = vap->minor;
 595                err = send_version_ack(lp, &ver);
 596                if (!err) {
 597                        lp->ver = ver;
 598                        lp->hs_state = LDC_HS_GOTVERS;
 599                }
 600        }
 601        if (err)
 602                return ldc_abort(lp);
 603
 604        return 0;
 605}
 606
 607static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 608{
 609        ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 610               vp->major, vp->minor);
 611
 612        if (lp->hs_state == LDC_HS_GOTVERS) {
 613                if (lp->ver.major != vp->major ||
 614                    lp->ver.minor != vp->minor)
 615                        return ldc_abort(lp);
 616        } else {
 617                lp->ver = *vp;
 618                lp->hs_state = LDC_HS_GOTVERS;
 619        }
 620        if (send_rts(lp))
 621                return ldc_abort(lp);
 622        return 0;
 623}
 624
 625static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 626{
 627        struct ldc_version *vap;
 628
 629        if ((vp->major == 0 && vp->minor == 0) ||
 630            !(vap = find_by_major(vp->major))) {
 631                return ldc_abort(lp);
 632        } else {
 633                struct ldc_packet *p;
 634                unsigned long new_tail;
 635
 636                p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 637                                           vap, sizeof(*vap),
 638                                           &new_tail);
 639                if (p)
 640                        return send_tx_packet(lp, p, new_tail);
 641                else
 642                        return ldc_abort(lp);
 643        }
 644}
 645
 646static int process_version(struct ldc_channel *lp,
 647                           struct ldc_packet *p)
 648{
 649        struct ldc_version *vp;
 650
 651        vp = (struct ldc_version *) p->u.u_data;
 652
 653        switch (p->stype) {
 654        case LDC_INFO:
 655                return process_ver_info(lp, vp);
 656
 657        case LDC_ACK:
 658                return process_ver_ack(lp, vp);
 659
 660        case LDC_NACK:
 661                return process_ver_nack(lp, vp);
 662
 663        default:
 664                return ldc_abort(lp);
 665        }
 666}
 667
 668static int process_rts(struct ldc_channel *lp,
 669                       struct ldc_packet *p)
 670{
 671        ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 672               p->stype, p->seqid, p->env);
 673
 674        if (p->stype     != LDC_INFO       ||
 675            lp->hs_state != LDC_HS_GOTVERS ||
 676            p->env       != lp->cfg.mode)
 677                return ldc_abort(lp);
 678
 679        lp->snd_nxt = p->seqid;
 680        lp->rcv_nxt = p->seqid;
 681        lp->hs_state = LDC_HS_SENTRTR;
 682        if (send_rtr(lp))
 683                return ldc_abort(lp);
 684
 685        return 0;
 686}
 687
 688static int process_rtr(struct ldc_channel *lp,
 689                       struct ldc_packet *p)
 690{
 691        ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 692               p->stype, p->seqid, p->env);
 693
 694        if (p->stype     != LDC_INFO ||
 695            p->env       != lp->cfg.mode)
 696                return ldc_abort(lp);
 697
 698        lp->snd_nxt = p->seqid;
 699        lp->hs_state = LDC_HS_COMPLETE;
 700        ldc_set_state(lp, LDC_STATE_CONNECTED);
 701        send_rdx(lp);
 702
 703        return LDC_EVENT_UP;
 704}
 705
 706static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 707{
 708        return lp->rcv_nxt + 1 == seqid;
 709}
 710
 711static int process_rdx(struct ldc_channel *lp,
 712                       struct ldc_packet *p)
 713{
 714        ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 715               p->stype, p->seqid, p->env, p->u.r.ackid);
 716
 717        if (p->stype != LDC_INFO ||
 718            !(rx_seq_ok(lp, p->seqid)))
 719                return ldc_abort(lp);
 720
 721        lp->rcv_nxt = p->seqid;
 722
 723        lp->hs_state = LDC_HS_COMPLETE;
 724        ldc_set_state(lp, LDC_STATE_CONNECTED);
 725
 726        return LDC_EVENT_UP;
 727}
 728
 729static int process_control_frame(struct ldc_channel *lp,
 730                                 struct ldc_packet *p)
 731{
 732        switch (p->ctrl) {
 733        case LDC_VERS:
 734                return process_version(lp, p);
 735
 736        case LDC_RTS:
 737                return process_rts(lp, p);
 738
 739        case LDC_RTR:
 740                return process_rtr(lp, p);
 741
 742        case LDC_RDX:
 743                return process_rdx(lp, p);
 744
 745        default:
 746                return ldc_abort(lp);
 747        }
 748}
 749
 750static int process_error_frame(struct ldc_channel *lp,
 751                               struct ldc_packet *p)
 752{
 753        return ldc_abort(lp);
 754}
 755
 756static int process_data_ack(struct ldc_channel *lp,
 757                            struct ldc_packet *ack)
 758{
 759        unsigned long head = lp->tx_acked;
 760        u32 ackid = ack->u.r.ackid;
 761
 762        while (1) {
 763                struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 764
 765                head = tx_advance(lp, head);
 766
 767                if (p->seqid == ackid) {
 768                        lp->tx_acked = head;
 769                        return 0;
 770                }
 771                if (head == lp->tx_tail)
 772                        return ldc_abort(lp);
 773        }
 774
 775        return 0;
 776}
 777
 778static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 779{
 780        if (event_mask & LDC_EVENT_RESET)
 781                lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 782        if (event_mask & LDC_EVENT_UP)
 783                lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 784        if (event_mask & LDC_EVENT_DATA_READY)
 785                lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 786}
 787
 788static irqreturn_t ldc_rx(int irq, void *dev_id)
 789{
 790        struct ldc_channel *lp = dev_id;
 791        unsigned long orig_state, hv_err, flags;
 792        unsigned int event_mask;
 793
 794        spin_lock_irqsave(&lp->lock, flags);
 795
 796        orig_state = lp->chan_state;
 797        hv_err = sun4v_ldc_rx_get_state(lp->id,
 798                                        &lp->rx_head,
 799                                        &lp->rx_tail,
 800                                        &lp->chan_state);
 801
 802        ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 803               orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 804
 805        event_mask = 0;
 806
 807        if (lp->cfg.mode == LDC_MODE_RAW &&
 808            lp->chan_state == LDC_CHANNEL_UP) {
 809                lp->hs_state = LDC_HS_COMPLETE;
 810                ldc_set_state(lp, LDC_STATE_CONNECTED);
 811
 812                event_mask |= LDC_EVENT_UP;
 813
 814                orig_state = lp->chan_state;
 815        }
 816
 817        /* If we are in reset state, flush the RX queue and ignore
 818         * everything.
 819         */
 820        if (lp->flags & LDC_FLAG_RESET) {
 821                (void) __set_rx_head(lp, lp->rx_tail);
 822                goto out;
 823        }
 824
 825        /* Once we finish the handshake, we let the ldc_read()
 826         * paths do all of the control frame and state management.
 827         * Just trigger the callback.
 828         */
 829        if (lp->hs_state == LDC_HS_COMPLETE) {
 830handshake_complete:
 831                if (lp->chan_state != orig_state) {
 832                        unsigned int event = LDC_EVENT_RESET;
 833
 834                        if (lp->chan_state == LDC_CHANNEL_UP)
 835                                event = LDC_EVENT_UP;
 836
 837                        event_mask |= event;
 838                }
 839                if (lp->rx_head != lp->rx_tail)
 840                        event_mask |= LDC_EVENT_DATA_READY;
 841
 842                goto out;
 843        }
 844
 845        if (lp->chan_state != orig_state)
 846                goto out;
 847
 848        while (lp->rx_head != lp->rx_tail) {
 849                struct ldc_packet *p;
 850                unsigned long new;
 851                int err;
 852
 853                p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 854
 855                switch (p->type) {
 856                case LDC_CTRL:
 857                        err = process_control_frame(lp, p);
 858                        if (err > 0)
 859                                event_mask |= err;
 860                        break;
 861
 862                case LDC_DATA:
 863                        event_mask |= LDC_EVENT_DATA_READY;
 864                        err = 0;
 865                        break;
 866
 867                case LDC_ERR:
 868                        err = process_error_frame(lp, p);
 869                        break;
 870
 871                default:
 872                        err = ldc_abort(lp);
 873                        break;
 874                }
 875
 876                if (err < 0)
 877                        break;
 878
 879                new = lp->rx_head;
 880                new += LDC_PACKET_SIZE;
 881                if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 882                        new = 0;
 883                lp->rx_head = new;
 884
 885                err = __set_rx_head(lp, new);
 886                if (err < 0) {
 887                        (void) ldc_abort(lp);
 888                        break;
 889                }
 890                if (lp->hs_state == LDC_HS_COMPLETE)
 891                        goto handshake_complete;
 892        }
 893
 894out:
 895        spin_unlock_irqrestore(&lp->lock, flags);
 896
 897        send_events(lp, event_mask);
 898
 899        return IRQ_HANDLED;
 900}
 901
 902static irqreturn_t ldc_tx(int irq, void *dev_id)
 903{
 904        struct ldc_channel *lp = dev_id;
 905        unsigned long flags, hv_err, orig_state;
 906        unsigned int event_mask = 0;
 907
 908        spin_lock_irqsave(&lp->lock, flags);
 909
 910        orig_state = lp->chan_state;
 911        hv_err = sun4v_ldc_tx_get_state(lp->id,
 912                                        &lp->tx_head,
 913                                        &lp->tx_tail,
 914                                        &lp->chan_state);
 915
 916        ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 917               orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 918
 919        if (lp->cfg.mode == LDC_MODE_RAW &&
 920            lp->chan_state == LDC_CHANNEL_UP) {
 921                lp->hs_state = LDC_HS_COMPLETE;
 922                ldc_set_state(lp, LDC_STATE_CONNECTED);
 923
 924                event_mask |= LDC_EVENT_UP;
 925        }
 926
 927        spin_unlock_irqrestore(&lp->lock, flags);
 928
 929        send_events(lp, event_mask);
 930
 931        return IRQ_HANDLED;
 932}
 933
 934/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 935 * XXX that addition and removal from the ldc_channel_list has
 936 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 937 * XXX totally pointless as another thread can slip into ldc_alloc()
 938 * XXX and add a channel with the same ID.  There also needs to be
 939 * XXX a spinlock for ldc_channel_list.
 940 */
 941static HLIST_HEAD(ldc_channel_list);
 942
 943static int __ldc_channel_exists(unsigned long id)
 944{
 945        struct ldc_channel *lp;
 946        struct hlist_node *n;
 947
 948        hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
 949                if (lp->id == id)
 950                        return 1;
 951        }
 952        return 0;
 953}
 954
 955static int alloc_queue(const char *name, unsigned long num_entries,
 956                       struct ldc_packet **base, unsigned long *ra)
 957{
 958        unsigned long size, order;
 959        void *q;
 960
 961        size = num_entries * LDC_PACKET_SIZE;
 962        order = get_order(size);
 963
 964        q = (void *) __get_free_pages(GFP_KERNEL, order);
 965        if (!q) {
 966                printk(KERN_ERR PFX "Alloc of %s queue failed with "
 967                       "size=%lu order=%lu\n", name, size, order);
 968                return -ENOMEM;
 969        }
 970
 971        memset(q, 0, PAGE_SIZE << order);
 972
 973        *base = q;
 974        *ra = __pa(q);
 975
 976        return 0;
 977}
 978
 979static void free_queue(unsigned long num_entries, struct ldc_packet *q)
 980{
 981        unsigned long size, order;
 982
 983        if (!q)
 984                return;
 985
 986        size = num_entries * LDC_PACKET_SIZE;
 987        order = get_order(size);
 988
 989        free_pages((unsigned long)q, order);
 990}
 991
 992/* XXX Make this configurable... XXX */
 993#define LDC_IOTABLE_SIZE        (8 * 1024)
 994
 995static int ldc_iommu_init(struct ldc_channel *lp)
 996{
 997        unsigned long sz, num_tsb_entries, tsbsize, order;
 998        struct ldc_iommu *iommu = &lp->iommu;
 999        struct ldc_mtable_entry *table;
1000        unsigned long hv_err;
1001        int err;
1002
1003        num_tsb_entries = LDC_IOTABLE_SIZE;
1004        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1005
1006        spin_lock_init(&iommu->lock);
1007
1008        sz = num_tsb_entries / 8;
1009        sz = (sz + 7UL) & ~7UL;
1010        iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1011        if (!iommu->arena.map) {
1012                printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1013                return -ENOMEM;
1014        }
1015
1016        iommu->arena.limit = num_tsb_entries;
1017
1018        order = get_order(tsbsize);
1019
1020        table = (struct ldc_mtable_entry *)
1021                __get_free_pages(GFP_KERNEL, order);
1022        err = -ENOMEM;
1023        if (!table) {
1024                printk(KERN_ERR PFX "Alloc of MTE table failed, "
1025                       "size=%lu order=%lu\n", tsbsize, order);
1026                goto out_free_map;
1027        }
1028
1029        memset(table, 0, PAGE_SIZE << order);
1030
1031        iommu->page_table = table;
1032
1033        hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1034                                         num_tsb_entries);
1035        err = -EINVAL;
1036        if (hv_err)
1037                goto out_free_table;
1038
1039        return 0;
1040
1041out_free_table:
1042        free_pages((unsigned long) table, order);
1043        iommu->page_table = NULL;
1044
1045out_free_map:
1046        kfree(iommu->arena.map);
1047        iommu->arena.map = NULL;
1048
1049        return err;
1050}
1051
1052static void ldc_iommu_release(struct ldc_channel *lp)
1053{
1054        struct ldc_iommu *iommu = &lp->iommu;
1055        unsigned long num_tsb_entries, tsbsize, order;
1056
1057        (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1058
1059        num_tsb_entries = iommu->arena.limit;
1060        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1061        order = get_order(tsbsize);
1062
1063        free_pages((unsigned long) iommu->page_table, order);
1064        iommu->page_table = NULL;
1065
1066        kfree(iommu->arena.map);
1067        iommu->arena.map = NULL;
1068}
1069
1070struct ldc_channel *ldc_alloc(unsigned long id,
1071                              const struct ldc_channel_config *cfgp,
1072                              void *event_arg)
1073{
1074        struct ldc_channel *lp;
1075        const struct ldc_mode_ops *mops;
1076        unsigned long dummy1, dummy2, hv_err;
1077        u8 mss, *mssbuf;
1078        int err;
1079
1080        err = -ENODEV;
1081        if (!ldom_domaining_enabled)
1082                goto out_err;
1083
1084        err = -EINVAL;
1085        if (!cfgp)
1086                goto out_err;
1087
1088        switch (cfgp->mode) {
1089        case LDC_MODE_RAW:
1090                mops = &raw_ops;
1091                mss = LDC_PACKET_SIZE;
1092                break;
1093
1094        case LDC_MODE_UNRELIABLE:
1095                mops = &nonraw_ops;
1096                mss = LDC_PACKET_SIZE - 8;
1097                break;
1098
1099        case LDC_MODE_STREAM:
1100                mops = &stream_ops;
1101                mss = LDC_PACKET_SIZE - 8 - 8;
1102                break;
1103
1104        default:
1105                goto out_err;
1106        }
1107
1108        if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1109                goto out_err;
1110
1111        hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1112        err = -ENODEV;
1113        if (hv_err == HV_ECHANNEL)
1114                goto out_err;
1115
1116        err = -EEXIST;
1117        if (__ldc_channel_exists(id))
1118                goto out_err;
1119
1120        mssbuf = NULL;
1121
1122        lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1123        err = -ENOMEM;
1124        if (!lp)
1125                goto out_err;
1126
1127        spin_lock_init(&lp->lock);
1128
1129        lp->id = id;
1130
1131        err = ldc_iommu_init(lp);
1132        if (err)
1133                goto out_free_ldc;
1134
1135        lp->mops = mops;
1136        lp->mss = mss;
1137
1138        lp->cfg = *cfgp;
1139        if (!lp->cfg.mtu)
1140                lp->cfg.mtu = LDC_DEFAULT_MTU;
1141
1142        if (lp->cfg.mode == LDC_MODE_STREAM) {
1143                mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1144                if (!mssbuf) {
1145                        err = -ENOMEM;
1146                        goto out_free_iommu;
1147                }
1148                lp->mssbuf = mssbuf;
1149        }
1150
1151        lp->event_arg = event_arg;
1152
1153        /* XXX allow setting via ldc_channel_config to override defaults
1154         * XXX or use some formula based upon mtu
1155         */
1156        lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1157        lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1158
1159        err = alloc_queue("TX", lp->tx_num_entries,
1160                          &lp->tx_base, &lp->tx_ra);
1161        if (err)
1162                goto out_free_mssbuf;
1163
1164        err = alloc_queue("RX", lp->rx_num_entries,
1165                          &lp->rx_base, &lp->rx_ra);
1166        if (err)
1167                goto out_free_txq;
1168
1169        lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1170
1171        lp->hs_state = LDC_HS_CLOSED;
1172        ldc_set_state(lp, LDC_STATE_INIT);
1173
1174        INIT_HLIST_NODE(&lp->list);
1175        hlist_add_head(&lp->list, &ldc_channel_list);
1176
1177        INIT_HLIST_HEAD(&lp->mh_list);
1178
1179        return lp;
1180
1181out_free_txq:
1182        free_queue(lp->tx_num_entries, lp->tx_base);
1183
1184out_free_mssbuf:
1185        if (mssbuf)
1186                kfree(mssbuf);
1187
1188out_free_iommu:
1189        ldc_iommu_release(lp);
1190
1191out_free_ldc:
1192        kfree(lp);
1193
1194out_err:
1195        return ERR_PTR(err);
1196}
1197EXPORT_SYMBOL(ldc_alloc);
1198
1199void ldc_free(struct ldc_channel *lp)
1200{
1201        if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1202                free_irq(lp->cfg.rx_irq, lp);
1203                free_irq(lp->cfg.tx_irq, lp);
1204        }
1205
1206        if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1207                sun4v_ldc_tx_qconf(lp->id, 0, 0);
1208                sun4v_ldc_rx_qconf(lp->id, 0, 0);
1209                lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1210        }
1211        if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1212                free_queue(lp->tx_num_entries, lp->tx_base);
1213                free_queue(lp->rx_num_entries, lp->rx_base);
1214                lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1215        }
1216
1217        hlist_del(&lp->list);
1218
1219        if (lp->mssbuf)
1220                kfree(lp->mssbuf);
1221
1222        ldc_iommu_release(lp);
1223
1224        kfree(lp);
1225}
1226EXPORT_SYMBOL(ldc_free);
1227
1228/* Bind the channel.  This registers the LDC queues with
1229 * the hypervisor and puts the channel into a pseudo-listening
1230 * state.  This does not initiate a handshake, ldc_connect() does
1231 * that.
1232 */
1233int ldc_bind(struct ldc_channel *lp, const char *name)
1234{
1235        unsigned long hv_err, flags;
1236        int err = -EINVAL;
1237
1238        spin_lock_irqsave(&lp->lock, flags);
1239
1240        if (!name)
1241                goto out_err;
1242
1243        if (lp->state != LDC_STATE_INIT)
1244                goto out_err;
1245
1246        snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1247        snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1248
1249        err = request_irq(lp->cfg.rx_irq, ldc_rx,
1250                          IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1251                          lp->rx_irq_name, lp);
1252        if (err)
1253                goto out_err;
1254
1255        err = request_irq(lp->cfg.tx_irq, ldc_tx,
1256                          IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1257                          lp->tx_irq_name, lp);
1258        if (err)
1259                goto out_free_rx_irq;
1260
1261
1262        lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1263
1264        err = -ENODEV;
1265        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1266        if (hv_err)
1267                goto out_free_tx_irq;
1268
1269        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1270        if (hv_err)
1271                goto out_free_tx_irq;
1272
1273        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1274        if (hv_err)
1275                goto out_unmap_tx;
1276
1277        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1278        if (hv_err)
1279                goto out_unmap_tx;
1280
1281        lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1282
1283        hv_err = sun4v_ldc_tx_get_state(lp->id,
1284                                        &lp->tx_head,
1285                                        &lp->tx_tail,
1286                                        &lp->chan_state);
1287        err = -EBUSY;
1288        if (hv_err)
1289                goto out_unmap_rx;
1290
1291        lp->tx_acked = lp->tx_head;
1292
1293        lp->hs_state = LDC_HS_OPEN;
1294        ldc_set_state(lp, LDC_STATE_BOUND);
1295
1296        spin_unlock_irqrestore(&lp->lock, flags);
1297
1298        return 0;
1299
1300out_unmap_rx:
1301        lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1302        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1303
1304out_unmap_tx:
1305        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1306
1307out_free_tx_irq:
1308        lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1309        free_irq(lp->cfg.tx_irq, lp);
1310
1311out_free_rx_irq:
1312        free_irq(lp->cfg.rx_irq, lp);
1313
1314out_err:
1315        spin_unlock_irqrestore(&lp->lock, flags);
1316
1317        return err;
1318}
1319EXPORT_SYMBOL(ldc_bind);
1320
1321int ldc_connect(struct ldc_channel *lp)
1322{
1323        unsigned long flags;
1324        int err;
1325
1326        if (lp->cfg.mode == LDC_MODE_RAW)
1327                return -EINVAL;
1328
1329        spin_lock_irqsave(&lp->lock, flags);
1330
1331        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1332            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1333            lp->hs_state != LDC_HS_OPEN)
1334                err = -EINVAL;
1335        else
1336                err = start_handshake(lp);
1337
1338        spin_unlock_irqrestore(&lp->lock, flags);
1339
1340        return err;
1341}
1342EXPORT_SYMBOL(ldc_connect);
1343
1344int ldc_disconnect(struct ldc_channel *lp)
1345{
1346        unsigned long hv_err, flags;
1347        int err;
1348
1349        if (lp->cfg.mode == LDC_MODE_RAW)
1350                return -EINVAL;
1351
1352        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1353            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1354                return -EINVAL;
1355
1356        spin_lock_irqsave(&lp->lock, flags);
1357
1358        err = -ENODEV;
1359        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1360        if (hv_err)
1361                goto out_err;
1362
1363        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1364        if (hv_err)
1365                goto out_err;
1366
1367        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1368        if (hv_err)
1369                goto out_err;
1370
1371        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1372        if (hv_err)
1373                goto out_err;
1374
1375        ldc_set_state(lp, LDC_STATE_BOUND);
1376        lp->hs_state = LDC_HS_OPEN;
1377        lp->flags |= LDC_FLAG_RESET;
1378
1379        spin_unlock_irqrestore(&lp->lock, flags);
1380
1381        return 0;
1382
1383out_err:
1384        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1385        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1386        free_irq(lp->cfg.tx_irq, lp);
1387        free_irq(lp->cfg.rx_irq, lp);
1388        lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1389                       LDC_FLAG_REGISTERED_QUEUES);
1390        ldc_set_state(lp, LDC_STATE_INIT);
1391
1392        spin_unlock_irqrestore(&lp->lock, flags);
1393
1394        return err;
1395}
1396EXPORT_SYMBOL(ldc_disconnect);
1397
1398int ldc_state(struct ldc_channel *lp)
1399{
1400        return lp->state;
1401}
1402EXPORT_SYMBOL(ldc_state);
1403
1404static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1405{
1406        struct ldc_packet *p;
1407        unsigned long new_tail;
1408        int err;
1409
1410        if (size > LDC_PACKET_SIZE)
1411                return -EMSGSIZE;
1412
1413        p = data_get_tx_packet(lp, &new_tail);
1414        if (!p)
1415                return -EAGAIN;
1416
1417        memcpy(p, buf, size);
1418
1419        err = send_tx_packet(lp, p, new_tail);
1420        if (!err)
1421                err = size;
1422
1423        return err;
1424}
1425
1426static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1427{
1428        struct ldc_packet *p;
1429        unsigned long hv_err, new;
1430        int err;
1431
1432        if (size < LDC_PACKET_SIZE)
1433                return -EINVAL;
1434
1435        hv_err = sun4v_ldc_rx_get_state(lp->id,
1436                                        &lp->rx_head,
1437                                        &lp->rx_tail,
1438                                        &lp->chan_state);
1439        if (hv_err)
1440                return ldc_abort(lp);
1441
1442        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1443            lp->chan_state == LDC_CHANNEL_RESETTING)
1444                return -ECONNRESET;
1445
1446        if (lp->rx_head == lp->rx_tail)
1447                return 0;
1448
1449        p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1450        memcpy(buf, p, LDC_PACKET_SIZE);
1451
1452        new = rx_advance(lp, lp->rx_head);
1453        lp->rx_head = new;
1454
1455        err = __set_rx_head(lp, new);
1456        if (err < 0)
1457                err = -ECONNRESET;
1458        else
1459                err = LDC_PACKET_SIZE;
1460
1461        return err;
1462}
1463
1464static const struct ldc_mode_ops raw_ops = {
1465        .write          =       write_raw,
1466        .read           =       read_raw,
1467};
1468
1469static int write_nonraw(struct ldc_channel *lp, const void *buf,
1470                        unsigned int size)
1471{
1472        unsigned long hv_err, tail;
1473        unsigned int copied;
1474        u32 seq;
1475        int err;
1476
1477        hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1478                                        &lp->chan_state);
1479        if (unlikely(hv_err))
1480                return -EBUSY;
1481
1482        if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1483                return ldc_abort(lp);
1484
1485        if (!tx_has_space_for(lp, size))
1486                return -EAGAIN;
1487
1488        seq = lp->snd_nxt;
1489        copied = 0;
1490        tail = lp->tx_tail;
1491        while (copied < size) {
1492                struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1493                u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1494                            p->u.u_data :
1495                            p->u.r.r_data);
1496                int data_len;
1497
1498                p->type = LDC_DATA;
1499                p->stype = LDC_INFO;
1500                p->ctrl = 0;
1501
1502                data_len = size - copied;
1503                if (data_len > lp->mss)
1504                        data_len = lp->mss;
1505
1506                BUG_ON(data_len > LDC_LEN);
1507
1508                p->env = (data_len |
1509                          (copied == 0 ? LDC_START : 0) |
1510                          (data_len == size - copied ? LDC_STOP : 0));
1511
1512                p->seqid = ++seq;
1513
1514                ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1515                       p->type,
1516                       p->stype,
1517                       p->ctrl,
1518                       p->env,
1519                       p->seqid);
1520
1521                memcpy(data, buf, data_len);
1522                buf += data_len;
1523                copied += data_len;
1524
1525                tail = tx_advance(lp, tail);
1526        }
1527
1528        err = set_tx_tail(lp, tail);
1529        if (!err) {
1530                lp->snd_nxt = seq;
1531                err = size;
1532        }
1533
1534        return err;
1535}
1536
1537static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1538                      struct ldc_packet *first_frag)
1539{
1540        int err;
1541
1542        if (first_frag)
1543                lp->rcv_nxt = first_frag->seqid - 1;
1544
1545        err = send_data_nack(lp, p);
1546        if (err)
1547                return err;
1548
1549        err = __set_rx_head(lp, lp->rx_tail);
1550        if (err < 0)
1551                return ldc_abort(lp);
1552
1553        return 0;
1554}
1555
1556static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1557{
1558        if (p->stype & LDC_ACK) {
1559                int err = process_data_ack(lp, p);
1560                if (err)
1561                        return err;
1562        }
1563        if (p->stype & LDC_NACK)
1564                return ldc_abort(lp);
1565
1566        return 0;
1567}
1568
1569static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1570{
1571        unsigned long dummy;
1572        int limit = 1000;
1573
1574        ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1575               cur_head, lp->rx_head, lp->rx_tail);
1576        while (limit-- > 0) {
1577                unsigned long hv_err;
1578
1579                hv_err = sun4v_ldc_rx_get_state(lp->id,
1580                                                &dummy,
1581                                                &lp->rx_tail,
1582                                                &lp->chan_state);
1583                if (hv_err)
1584                        return ldc_abort(lp);
1585
1586                if (lp->chan_state == LDC_CHANNEL_DOWN ||
1587                    lp->chan_state == LDC_CHANNEL_RESETTING)
1588                        return -ECONNRESET;
1589
1590                if (cur_head != lp->rx_tail) {
1591                        ldcdbg(DATA, "DATA WAIT DONE "
1592                               "head[%lx] tail[%lx] chan_state[%lx]\n",
1593                               dummy, lp->rx_tail, lp->chan_state);
1594                        return 0;
1595                }
1596
1597                udelay(1);
1598        }
1599        return -EAGAIN;
1600}
1601
1602static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1603{
1604        int err = __set_rx_head(lp, head);
1605
1606        if (err < 0)
1607                return ldc_abort(lp);
1608
1609        lp->rx_head = head;
1610        return 0;
1611}
1612
1613static void send_data_ack(struct ldc_channel *lp)
1614{
1615        unsigned long new_tail;
1616        struct ldc_packet *p;
1617
1618        p = data_get_tx_packet(lp, &new_tail);
1619        if (likely(p)) {
1620                int err;
1621
1622                memset(p, 0, sizeof(*p));
1623                p->type = LDC_DATA;
1624                p->stype = LDC_ACK;
1625                p->ctrl = 0;
1626                p->seqid = lp->snd_nxt + 1;
1627                p->u.r.ackid = lp->rcv_nxt;
1628
1629                err = send_tx_packet(lp, p, new_tail);
1630                if (!err)
1631                        lp->snd_nxt++;
1632        }
1633}
1634
1635static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1636{
1637        struct ldc_packet *first_frag;
1638        unsigned long hv_err, new;
1639        int err, copied;
1640
1641        hv_err = sun4v_ldc_rx_get_state(lp->id,
1642                                        &lp->rx_head,
1643                                        &lp->rx_tail,
1644                                        &lp->chan_state);
1645        if (hv_err)
1646                return ldc_abort(lp);
1647
1648        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1649            lp->chan_state == LDC_CHANNEL_RESETTING)
1650                return -ECONNRESET;
1651
1652        if (lp->rx_head == lp->rx_tail)
1653                return 0;
1654
1655        first_frag = NULL;
1656        copied = err = 0;
1657        new = lp->rx_head;
1658        while (1) {
1659                struct ldc_packet *p;
1660                int pkt_len;
1661
1662                BUG_ON(new == lp->rx_tail);
1663                p = lp->rx_base + (new / LDC_PACKET_SIZE);
1664
1665                ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1666                       "rcv_nxt[%08x]\n",
1667                       p->type,
1668                       p->stype,
1669                       p->ctrl,
1670                       p->env,
1671                       p->seqid,
1672                       p->u.r.ackid,
1673                       lp->rcv_nxt);
1674
1675                if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1676                        err = rx_bad_seq(lp, p, first_frag);
1677                        copied = 0;
1678                        break;
1679                }
1680
1681                if (p->type & LDC_CTRL) {
1682                        err = process_control_frame(lp, p);
1683                        if (err < 0)
1684                                break;
1685                        err = 0;
1686                }
1687
1688                lp->rcv_nxt = p->seqid;
1689
1690                if (!(p->type & LDC_DATA)) {
1691                        new = rx_advance(lp, new);
1692                        goto no_data;
1693                }
1694                if (p->stype & (LDC_ACK | LDC_NACK)) {
1695                        err = data_ack_nack(lp, p);
1696                        if (err)
1697                                break;
1698                }
1699                if (!(p->stype & LDC_INFO)) {
1700                        new = rx_advance(lp, new);
1701                        err = rx_set_head(lp, new);
1702                        if (err)
1703                                break;
1704                        goto no_data;
1705                }
1706
1707                pkt_len = p->env & LDC_LEN;
1708
1709                /* Every initial packet starts with the START bit set.
1710                 *
1711                 * Singleton packets will have both START+STOP set.
1712                 *
1713                 * Fragments will have START set in the first frame, STOP
1714                 * set in the last frame, and neither bit set in middle
1715                 * frames of the packet.
1716                 *
1717                 * Therefore if we are at the beginning of a packet and
1718                 * we don't see START, or we are in the middle of a fragmented
1719                 * packet and do see START, we are unsynchronized and should
1720                 * flush the RX queue.
1721                 */
1722                if ((first_frag == NULL && !(p->env & LDC_START)) ||
1723                    (first_frag != NULL &&  (p->env & LDC_START))) {
1724                        if (!first_frag)
1725                                new = rx_advance(lp, new);
1726
1727                        err = rx_set_head(lp, new);
1728                        if (err)
1729                                break;
1730
1731                        if (!first_frag)
1732                                goto no_data;
1733                }
1734                if (!first_frag)
1735                        first_frag = p;
1736
1737                if (pkt_len > size - copied) {
1738                        /* User didn't give us a big enough buffer,
1739                         * what to do?  This is a pretty serious error.
1740                         *
1741                         * Since we haven't updated the RX ring head to
1742                         * consume any of the packets, signal the error
1743                         * to the user and just leave the RX ring alone.
1744                         *
1745                         * This seems the best behavior because this allows
1746                         * a user of the LDC layer to start with a small
1747                         * RX buffer for ldc_read() calls and use -EMSGSIZE
1748                         * as a cue to enlarge it's read buffer.
1749                         */
1750                        err = -EMSGSIZE;
1751                        break;
1752                }
1753
1754                /* Ok, we are gonna eat this one.  */
1755                new = rx_advance(lp, new);
1756
1757                memcpy(buf,
1758                       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1759                        p->u.u_data : p->u.r.r_data), pkt_len);
1760                buf += pkt_len;
1761                copied += pkt_len;
1762
1763                if (p->env & LDC_STOP)
1764                        break;
1765
1766no_data:
1767                if (new == lp->rx_tail) {
1768                        err = rx_data_wait(lp, new);
1769                        if (err)
1770                                break;
1771                }
1772        }
1773
1774        if (!err)
1775                err = rx_set_head(lp, new);
1776
1777        if (err && first_frag)
1778                lp->rcv_nxt = first_frag->seqid - 1;
1779
1780        if (!err) {
1781                err = copied;
1782                if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1783                        send_data_ack(lp);
1784        }
1785
1786        return err;
1787}
1788
1789static const struct ldc_mode_ops nonraw_ops = {
1790        .write          =       write_nonraw,
1791        .read           =       read_nonraw,
1792};
1793
1794static int write_stream(struct ldc_channel *lp, const void *buf,
1795                        unsigned int size)
1796{
1797        if (size > lp->cfg.mtu)
1798                size = lp->cfg.mtu;
1799        return write_nonraw(lp, buf, size);
1800}
1801
1802static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1803{
1804        if (!lp->mssbuf_len) {
1805                int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1806                if (err < 0)
1807                        return err;
1808
1809                lp->mssbuf_len = err;
1810                lp->mssbuf_off = 0;
1811        }
1812
1813        if (size > lp->mssbuf_len)
1814                size = lp->mssbuf_len;
1815        memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1816
1817        lp->mssbuf_off += size;
1818        lp->mssbuf_len -= size;
1819
1820        return size;
1821}
1822
1823static const struct ldc_mode_ops stream_ops = {
1824        .write          =       write_stream,
1825        .read           =       read_stream,
1826};
1827
1828int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1829{
1830        unsigned long flags;
1831        int err;
1832
1833        if (!buf)
1834                return -EINVAL;
1835
1836        if (!size)
1837                return 0;
1838
1839        spin_lock_irqsave(&lp->lock, flags);
1840
1841        if (lp->hs_state != LDC_HS_COMPLETE)
1842                err = -ENOTCONN;
1843        else
1844                err = lp->mops->write(lp, buf, size);
1845
1846        spin_unlock_irqrestore(&lp->lock, flags);
1847
1848        return err;
1849}
1850EXPORT_SYMBOL(ldc_write);
1851
1852int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1853{
1854        unsigned long flags;
1855        int err;
1856
1857        if (!buf)
1858                return -EINVAL;
1859
1860        if (!size)
1861                return 0;
1862
1863        spin_lock_irqsave(&lp->lock, flags);
1864
1865        if (lp->hs_state != LDC_HS_COMPLETE)
1866                err = -ENOTCONN;
1867        else
1868                err = lp->mops->read(lp, buf, size);
1869
1870        spin_unlock_irqrestore(&lp->lock, flags);
1871
1872        return err;
1873}
1874EXPORT_SYMBOL(ldc_read);
1875
1876static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1877{
1878        struct iommu_arena *arena = &iommu->arena;
1879        unsigned long n, i, start, end, limit;
1880        int pass;
1881
1882        limit = arena->limit;
1883        start = arena->hint;
1884        pass = 0;
1885
1886again:
1887        n = find_next_zero_bit(arena->map, limit, start);
1888        end = n + npages;
1889        if (unlikely(end >= limit)) {
1890                if (likely(pass < 1)) {
1891                        limit = start;
1892                        start = 0;
1893                        pass++;
1894                        goto again;
1895                } else {
1896                        /* Scanned the whole thing, give up. */
1897                        return -1;
1898                }
1899        }
1900
1901        for (i = n; i < end; i++) {
1902                if (test_bit(i, arena->map)) {
1903                        start = i + 1;
1904                        goto again;
1905                }
1906        }
1907
1908        for (i = n; i < end; i++)
1909                __set_bit(i, arena->map);
1910
1911        arena->hint = end;
1912
1913        return n;
1914}
1915
1916#define COOKIE_PGSZ_CODE        0xf000000000000000ULL
1917#define COOKIE_PGSZ_CODE_SHIFT  60ULL
1918
1919static u64 pagesize_code(void)
1920{
1921        switch (PAGE_SIZE) {
1922        default:
1923        case (8ULL * 1024ULL):
1924                return 0;
1925        case (64ULL * 1024ULL):
1926                return 1;
1927        case (512ULL * 1024ULL):
1928                return 2;
1929        case (4ULL * 1024ULL * 1024ULL):
1930                return 3;
1931        case (32ULL * 1024ULL * 1024ULL):
1932                return 4;
1933        case (256ULL * 1024ULL * 1024ULL):
1934                return 5;
1935        }
1936}
1937
1938static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1939{
1940        return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1941                (index << PAGE_SHIFT) |
1942                page_offset);
1943}
1944
1945static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1946{
1947        u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1948
1949        cookie &= ~COOKIE_PGSZ_CODE;
1950
1951        *shift = szcode * 3;
1952
1953        return (cookie >> (13ULL + (szcode * 3ULL)));
1954}
1955
1956static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1957                                             unsigned long npages)
1958{
1959        long entry;
1960
1961        entry = arena_alloc(iommu, npages);
1962        if (unlikely(entry < 0))
1963                return NULL;
1964
1965        return iommu->page_table + entry;
1966}
1967
1968static u64 perm_to_mte(unsigned int map_perm)
1969{
1970        u64 mte_base;
1971
1972        mte_base = pagesize_code();
1973
1974        if (map_perm & LDC_MAP_SHADOW) {
1975                if (map_perm & LDC_MAP_R)
1976                        mte_base |= LDC_MTE_COPY_R;
1977                if (map_perm & LDC_MAP_W)
1978                        mte_base |= LDC_MTE_COPY_W;
1979        }
1980        if (map_perm & LDC_MAP_DIRECT) {
1981                if (map_perm & LDC_MAP_R)
1982                        mte_base |= LDC_MTE_READ;
1983                if (map_perm & LDC_MAP_W)
1984                        mte_base |= LDC_MTE_WRITE;
1985                if (map_perm & LDC_MAP_X)
1986                        mte_base |= LDC_MTE_EXEC;
1987        }
1988        if (map_perm & LDC_MAP_IO) {
1989                if (map_perm & LDC_MAP_R)
1990                        mte_base |= LDC_MTE_IOMMU_R;
1991                if (map_perm & LDC_MAP_W)
1992                        mte_base |= LDC_MTE_IOMMU_W;
1993        }
1994
1995        return mte_base;
1996}
1997
1998static int pages_in_region(unsigned long base, long len)
1999{
2000        int count = 0;
2001
2002        do {
2003                unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2004
2005                len -= (new - base);
2006                base = new;
2007                count++;
2008        } while (len > 0);
2009
2010        return count;
2011}
2012
2013struct cookie_state {
2014        struct ldc_mtable_entry         *page_table;
2015        struct ldc_trans_cookie         *cookies;
2016        u64                             mte_base;
2017        u64                             prev_cookie;
2018        u32                             pte_idx;
2019        u32                             nc;
2020};
2021
2022static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2023                         unsigned long off, unsigned long len)
2024{
2025        do {
2026                unsigned long tlen, new = pa + PAGE_SIZE;
2027                u64 this_cookie;
2028
2029                sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2030
2031                tlen = PAGE_SIZE;
2032                if (off)
2033                        tlen = PAGE_SIZE - off;
2034                if (tlen > len)
2035                        tlen = len;
2036
2037                this_cookie = make_cookie(sp->pte_idx,
2038                                          pagesize_code(), off);
2039
2040                off = 0;
2041
2042                if (this_cookie == sp->prev_cookie) {
2043                        sp->cookies[sp->nc - 1].cookie_size += tlen;
2044                } else {
2045                        sp->cookies[sp->nc].cookie_addr = this_cookie;
2046                        sp->cookies[sp->nc].cookie_size = tlen;
2047                        sp->nc++;
2048                }
2049                sp->prev_cookie = this_cookie + tlen;
2050
2051                sp->pte_idx++;
2052
2053                len -= tlen;
2054                pa = new;
2055        } while (len > 0);
2056}
2057
2058static int sg_count_one(struct scatterlist *sg)
2059{
2060        unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2061        long len = sg->length;
2062
2063        if ((sg->offset | len) & (8UL - 1))
2064                return -EFAULT;
2065
2066        return pages_in_region(base + sg->offset, len);
2067}
2068
2069static int sg_count_pages(struct scatterlist *sg, int num_sg)
2070{
2071        int count;
2072        int i;
2073
2074        count = 0;
2075        for (i = 0; i < num_sg; i++) {
2076                int err = sg_count_one(sg + i);
2077                if (err < 0)
2078                        return err;
2079                count += err;
2080        }
2081
2082        return count;
2083}
2084
2085int ldc_map_sg(struct ldc_channel *lp,
2086               struct scatterlist *sg, int num_sg,
2087               struct ldc_trans_cookie *cookies, int ncookies,
2088               unsigned int map_perm)
2089{
2090        unsigned long i, npages, flags;
2091        struct ldc_mtable_entry *base;
2092        struct cookie_state state;
2093        struct ldc_iommu *iommu;
2094        int err;
2095
2096        if (map_perm & ~LDC_MAP_ALL)
2097                return -EINVAL;
2098
2099        err = sg_count_pages(sg, num_sg);
2100        if (err < 0)
2101                return err;
2102
2103        npages = err;
2104        if (err > ncookies)
2105                return -EMSGSIZE;
2106
2107        iommu = &lp->iommu;
2108
2109        spin_lock_irqsave(&iommu->lock, flags);
2110        base = alloc_npages(iommu, npages);
2111        spin_unlock_irqrestore(&iommu->lock, flags);
2112
2113        if (!base)
2114                return -ENOMEM;
2115
2116        state.page_table = iommu->page_table;
2117        state.cookies = cookies;
2118        state.mte_base = perm_to_mte(map_perm);
2119        state.prev_cookie = ~(u64)0;
2120        state.pte_idx = (base - iommu->page_table);
2121        state.nc = 0;
2122
2123        for (i = 0; i < num_sg; i++)
2124                fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2125                             sg[i].offset, sg[i].length);
2126
2127        return state.nc;
2128}
2129EXPORT_SYMBOL(ldc_map_sg);
2130
2131int ldc_map_single(struct ldc_channel *lp,
2132                   void *buf, unsigned int len,
2133                   struct ldc_trans_cookie *cookies, int ncookies,
2134                   unsigned int map_perm)
2135{
2136        unsigned long npages, pa, flags;
2137        struct ldc_mtable_entry *base;
2138        struct cookie_state state;
2139        struct ldc_iommu *iommu;
2140
2141        if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2142                return -EINVAL;
2143
2144        pa = __pa(buf);
2145        if ((pa | len) & (8UL - 1))
2146                return -EFAULT;
2147
2148        npages = pages_in_region(pa, len);
2149
2150        iommu = &lp->iommu;
2151
2152        spin_lock_irqsave(&iommu->lock, flags);
2153        base = alloc_npages(iommu, npages);
2154        spin_unlock_irqrestore(&iommu->lock, flags);
2155
2156        if (!base)
2157                return -ENOMEM;
2158
2159        state.page_table = iommu->page_table;
2160        state.cookies = cookies;
2161        state.mte_base = perm_to_mte(map_perm);
2162        state.prev_cookie = ~(u64)0;
2163        state.pte_idx = (base - iommu->page_table);
2164        state.nc = 0;
2165        fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2166        BUG_ON(state.nc != 1);
2167
2168        return state.nc;
2169}
2170EXPORT_SYMBOL(ldc_map_single);
2171
2172static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2173                        u64 cookie, u64 size)
2174{
2175        struct iommu_arena *arena = &iommu->arena;
2176        unsigned long i, shift, index, npages;
2177        struct ldc_mtable_entry *base;
2178
2179        npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2180        index = cookie_to_index(cookie, &shift);
2181        base = iommu->page_table + index;
2182
2183        BUG_ON(index > arena->limit ||
2184               (index + npages) > arena->limit);
2185
2186        for (i = 0; i < npages; i++) {
2187                if (base->cookie)
2188                        sun4v_ldc_revoke(id, cookie + (i << shift),
2189                                         base->cookie);
2190                base->mte = 0;
2191                __clear_bit(index + i, arena->map);
2192        }
2193}
2194
2195void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2196               int ncookies)
2197{
2198        struct ldc_iommu *iommu = &lp->iommu;
2199        unsigned long flags;
2200        int i;
2201
2202        spin_lock_irqsave(&iommu->lock, flags);
2203        for (i = 0; i < ncookies; i++) {
2204                u64 addr = cookies[i].cookie_addr;
2205                u64 size = cookies[i].cookie_size;
2206
2207                free_npages(lp->id, iommu, addr, size);
2208        }
2209        spin_unlock_irqrestore(&iommu->lock, flags);
2210}
2211EXPORT_SYMBOL(ldc_unmap);
2212
2213int ldc_copy(struct ldc_channel *lp, int copy_dir,
2214             void *buf, unsigned int len, unsigned long offset,
2215             struct ldc_trans_cookie *cookies, int ncookies)
2216{
2217        unsigned int orig_len;
2218        unsigned long ra;
2219        int i;
2220
2221        if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2222                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2223                       lp->id, copy_dir);
2224                return -EINVAL;
2225        }
2226
2227        ra = __pa(buf);
2228        if ((ra | len | offset) & (8UL - 1)) {
2229                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2230                       "ra[%lx] len[%x] offset[%lx]\n",
2231                       lp->id, ra, len, offset);
2232                return -EFAULT;
2233        }
2234
2235        if (lp->hs_state != LDC_HS_COMPLETE ||
2236            (lp->flags & LDC_FLAG_RESET)) {
2237                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2238                       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2239                return -ECONNRESET;
2240        }
2241
2242        orig_len = len;
2243        for (i = 0; i < ncookies; i++) {
2244                unsigned long cookie_raddr = cookies[i].cookie_addr;
2245                unsigned long this_len = cookies[i].cookie_size;
2246                unsigned long actual_len;
2247
2248                if (unlikely(offset)) {
2249                        unsigned long this_off = offset;
2250
2251                        if (this_off > this_len)
2252                                this_off = this_len;
2253
2254                        offset -= this_off;
2255                        this_len -= this_off;
2256                        if (!this_len)
2257                                continue;
2258                        cookie_raddr += this_off;
2259                }
2260
2261                if (this_len > len)
2262                        this_len = len;
2263
2264                while (1) {
2265                        unsigned long hv_err;
2266
2267                        hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2268                                                cookie_raddr, ra,
2269                                                this_len, &actual_len);
2270                        if (unlikely(hv_err)) {
2271                                printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2272                                       "HV error %lu\n",
2273                                       lp->id, hv_err);
2274                                if (lp->hs_state != LDC_HS_COMPLETE ||
2275                                    (lp->flags & LDC_FLAG_RESET))
2276                                        return -ECONNRESET;
2277                                else
2278                                        return -EFAULT;
2279                        }
2280
2281                        cookie_raddr += actual_len;
2282                        ra += actual_len;
2283                        len -= actual_len;
2284                        if (actual_len == this_len)
2285                                break;
2286
2287                        this_len -= actual_len;
2288                }
2289
2290                if (!len)
2291                        break;
2292        }
2293
2294        /* It is caller policy what to do about short copies.
2295         * For example, a networking driver can declare the
2296         * packet a runt and drop it.
2297         */
2298
2299        return orig_len - len;
2300}
2301EXPORT_SYMBOL(ldc_copy);
2302
2303void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2304                          struct ldc_trans_cookie *cookies, int *ncookies,
2305                          unsigned int map_perm)
2306{
2307        void *buf;
2308        int err;
2309
2310        if (len & (8UL - 1))
2311                return ERR_PTR(-EINVAL);
2312
2313        buf = kzalloc(len, GFP_KERNEL);
2314        if (!buf)
2315                return ERR_PTR(-ENOMEM);
2316
2317        err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2318        if (err < 0) {
2319                kfree(buf);
2320                return ERR_PTR(err);
2321        }
2322        *ncookies = err;
2323
2324        return buf;
2325}
2326EXPORT_SYMBOL(ldc_alloc_exp_dring);
2327
2328void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2329                        struct ldc_trans_cookie *cookies, int ncookies)
2330{
2331        ldc_unmap(lp, cookies, ncookies);
2332        kfree(buf);
2333}
2334EXPORT_SYMBOL(ldc_free_exp_dring);
2335
2336static int __init ldc_init(void)
2337{
2338        unsigned long major, minor;
2339        struct mdesc_handle *hp;
2340        const u64 *v;
2341        int err;
2342        u64 mp;
2343
2344        hp = mdesc_grab();
2345        if (!hp)
2346                return -ENODEV;
2347
2348        mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2349        err = -ENODEV;
2350        if (mp == MDESC_NODE_NULL)
2351                goto out;
2352
2353        v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2354        if (!v)
2355                goto out;
2356
2357        major = 1;
2358        minor = 0;
2359        if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2360                printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2361                goto out;
2362        }
2363
2364        printk(KERN_INFO "%s", version);
2365
2366        if (!*v) {
2367                printk(KERN_INFO PFX "Domaining disabled.\n");
2368                goto out;
2369        }
2370        ldom_domaining_enabled = 1;
2371        err = 0;
2372
2373out:
2374        mdesc_release(hp);
2375        return err;
2376}
2377
2378core_initcall(ldc_init);
2379