linux/arch/sparc/kernel/ldc.c
<<
>>
Prefs
   1/* ldc.c: Logical Domain Channel link-layer protocol driver.
   2 *
   3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/module.h>
   8#include <linux/slab.h>
   9#include <linux/spinlock.h>
  10#include <linux/delay.h>
  11#include <linux/errno.h>
  12#include <linux/string.h>
  13#include <linux/scatterlist.h>
  14#include <linux/interrupt.h>
  15#include <linux/list.h>
  16#include <linux/init.h>
  17#include <linux/bitmap.h>
  18
  19#include <asm/hypervisor.h>
  20#include <asm/iommu.h>
  21#include <asm/page.h>
  22#include <asm/ldc.h>
  23#include <asm/mdesc.h>
  24
  25#define DRV_MODULE_NAME         "ldc"
  26#define PFX DRV_MODULE_NAME     ": "
  27#define DRV_MODULE_VERSION      "1.1"
  28#define DRV_MODULE_RELDATE      "July 22, 2008"
  29
  30static char version[] __devinitdata =
  31        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  32#define LDC_PACKET_SIZE         64
  33
  34/* Packet header layout for unreliable and reliable mode frames.
  35 * When in RAW mode, packets are simply straight 64-byte payloads
  36 * with no headers.
  37 */
  38struct ldc_packet {
  39        u8                      type;
  40#define LDC_CTRL                0x01
  41#define LDC_DATA                0x02
  42#define LDC_ERR                 0x10
  43
  44        u8                      stype;
  45#define LDC_INFO                0x01
  46#define LDC_ACK                 0x02
  47#define LDC_NACK                0x04
  48
  49        u8                      ctrl;
  50#define LDC_VERS                0x01 /* Link Version            */
  51#define LDC_RTS                 0x02 /* Request To Send         */
  52#define LDC_RTR                 0x03 /* Ready To Receive        */
  53#define LDC_RDX                 0x04 /* Ready for Data eXchange */
  54#define LDC_CTRL_MSK            0x0f
  55
  56        u8                      env;
  57#define LDC_LEN                 0x3f
  58#define LDC_FRAG_MASK           0xc0
  59#define LDC_START               0x40
  60#define LDC_STOP                0x80
  61
  62        u32                     seqid;
  63
  64        union {
  65                u8              u_data[LDC_PACKET_SIZE - 8];
  66                struct {
  67                        u32     pad;
  68                        u32     ackid;
  69                        u8      r_data[LDC_PACKET_SIZE - 8 - 8];
  70                } r;
  71        } u;
  72};
  73
  74struct ldc_version {
  75        u16 major;
  76        u16 minor;
  77};
  78
  79/* Ordered from largest major to lowest.  */
  80static struct ldc_version ver_arr[] = {
  81        { .major = 1, .minor = 0 },
  82};
  83
  84#define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
  85#define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
  86
  87struct ldc_channel;
  88
  89struct ldc_mode_ops {
  90        int (*write)(struct ldc_channel *, const void *, unsigned int);
  91        int (*read)(struct ldc_channel *, void *, unsigned int);
  92};
  93
  94static const struct ldc_mode_ops raw_ops;
  95static const struct ldc_mode_ops nonraw_ops;
  96static const struct ldc_mode_ops stream_ops;
  97
  98int ldom_domaining_enabled;
  99
 100struct ldc_iommu {
 101        /* Protects arena alloc/free.  */
 102        spinlock_t                      lock;
 103        struct iommu_arena              arena;
 104        struct ldc_mtable_entry         *page_table;
 105};
 106
 107struct ldc_channel {
 108        /* Protects all operations that depend upon channel state.  */
 109        spinlock_t                      lock;
 110
 111        unsigned long                   id;
 112
 113        u8                              *mssbuf;
 114        u32                             mssbuf_len;
 115        u32                             mssbuf_off;
 116
 117        struct ldc_packet               *tx_base;
 118        unsigned long                   tx_head;
 119        unsigned long                   tx_tail;
 120        unsigned long                   tx_num_entries;
 121        unsigned long                   tx_ra;
 122
 123        unsigned long                   tx_acked;
 124
 125        struct ldc_packet               *rx_base;
 126        unsigned long                   rx_head;
 127        unsigned long                   rx_tail;
 128        unsigned long                   rx_num_entries;
 129        unsigned long                   rx_ra;
 130
 131        u32                             rcv_nxt;
 132        u32                             snd_nxt;
 133
 134        unsigned long                   chan_state;
 135
 136        struct ldc_channel_config       cfg;
 137        void                            *event_arg;
 138
 139        const struct ldc_mode_ops       *mops;
 140
 141        struct ldc_iommu                iommu;
 142
 143        struct ldc_version              ver;
 144
 145        u8                              hs_state;
 146#define LDC_HS_CLOSED                   0x00
 147#define LDC_HS_OPEN                     0x01
 148#define LDC_HS_GOTVERS                  0x02
 149#define LDC_HS_SENTRTR                  0x03
 150#define LDC_HS_GOTRTR                   0x04
 151#define LDC_HS_COMPLETE                 0x10
 152
 153        u8                              flags;
 154#define LDC_FLAG_ALLOCED_QUEUES         0x01
 155#define LDC_FLAG_REGISTERED_QUEUES      0x02
 156#define LDC_FLAG_REGISTERED_IRQS        0x04
 157#define LDC_FLAG_RESET                  0x10
 158
 159        u8                              mss;
 160        u8                              state;
 161
 162#define LDC_IRQ_NAME_MAX                32
 163        char                            rx_irq_name[LDC_IRQ_NAME_MAX];
 164        char                            tx_irq_name[LDC_IRQ_NAME_MAX];
 165
 166        struct hlist_head               mh_list;
 167
 168        struct hlist_node               list;
 169};
 170
 171#define ldcdbg(TYPE, f, a...) \
 172do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 173                printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 174} while (0)
 175
 176static const char *state_to_str(u8 state)
 177{
 178        switch (state) {
 179        case LDC_STATE_INVALID:
 180                return "INVALID";
 181        case LDC_STATE_INIT:
 182                return "INIT";
 183        case LDC_STATE_BOUND:
 184                return "BOUND";
 185        case LDC_STATE_READY:
 186                return "READY";
 187        case LDC_STATE_CONNECTED:
 188                return "CONNECTED";
 189        default:
 190                return "<UNKNOWN>";
 191        }
 192}
 193
 194static void ldc_set_state(struct ldc_channel *lp, u8 state)
 195{
 196        ldcdbg(STATE, "STATE (%s) --> (%s)\n",
 197               state_to_str(lp->state),
 198               state_to_str(state));
 199
 200        lp->state = state;
 201}
 202
 203static unsigned long __advance(unsigned long off, unsigned long num_entries)
 204{
 205        off += LDC_PACKET_SIZE;
 206        if (off == (num_entries * LDC_PACKET_SIZE))
 207                off = 0;
 208
 209        return off;
 210}
 211
 212static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 213{
 214        return __advance(off, lp->rx_num_entries);
 215}
 216
 217static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 218{
 219        return __advance(off, lp->tx_num_entries);
 220}
 221
 222static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 223                                                  unsigned long *new_tail)
 224{
 225        struct ldc_packet *p;
 226        unsigned long t;
 227
 228        t = tx_advance(lp, lp->tx_tail);
 229        if (t == lp->tx_head)
 230                return NULL;
 231
 232        *new_tail = t;
 233
 234        p = lp->tx_base;
 235        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 236}
 237
 238/* When we are in reliable or stream mode, have to track the next packet
 239 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 240 * to be careful not to stomp over the queue past that point.  During
 241 * the handshake, we don't have TX data packets pending in the queue
 242 * and that's why handshake_get_tx_packet() need not be mindful of
 243 * lp->tx_acked.
 244 */
 245static unsigned long head_for_data(struct ldc_channel *lp)
 246{
 247        if (lp->cfg.mode == LDC_MODE_STREAM)
 248                return lp->tx_acked;
 249        return lp->tx_head;
 250}
 251
 252static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 253{
 254        unsigned long limit, tail, new_tail, diff;
 255        unsigned int mss;
 256
 257        limit = head_for_data(lp);
 258        tail = lp->tx_tail;
 259        new_tail = tx_advance(lp, tail);
 260        if (new_tail == limit)
 261                return 0;
 262
 263        if (limit > new_tail)
 264                diff = limit - new_tail;
 265        else
 266                diff = (limit +
 267                        ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 268        diff /= LDC_PACKET_SIZE;
 269        mss = lp->mss;
 270
 271        if (diff * mss < size)
 272                return 0;
 273
 274        return 1;
 275}
 276
 277static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 278                                             unsigned long *new_tail)
 279{
 280        struct ldc_packet *p;
 281        unsigned long h, t;
 282
 283        h = head_for_data(lp);
 284        t = tx_advance(lp, lp->tx_tail);
 285        if (t == h)
 286                return NULL;
 287
 288        *new_tail = t;
 289
 290        p = lp->tx_base;
 291        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 292}
 293
 294static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 295{
 296        unsigned long orig_tail = lp->tx_tail;
 297        int limit = 1000;
 298
 299        lp->tx_tail = tail;
 300        while (limit-- > 0) {
 301                unsigned long err;
 302
 303                err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 304                if (!err)
 305                        return 0;
 306
 307                if (err != HV_EWOULDBLOCK) {
 308                        lp->tx_tail = orig_tail;
 309                        return -EINVAL;
 310                }
 311                udelay(1);
 312        }
 313
 314        lp->tx_tail = orig_tail;
 315        return -EBUSY;
 316}
 317
 318/* This just updates the head value in the hypervisor using
 319 * a polling loop with a timeout.  The caller takes care of
 320 * upating software state representing the head change, if any.
 321 */
 322static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 323{
 324        int limit = 1000;
 325
 326        while (limit-- > 0) {
 327                unsigned long err;
 328
 329                err = sun4v_ldc_rx_set_qhead(lp->id, head);
 330                if (!err)
 331                        return 0;
 332
 333                if (err != HV_EWOULDBLOCK)
 334                        return -EINVAL;
 335
 336                udelay(1);
 337        }
 338
 339        return -EBUSY;
 340}
 341
 342static int send_tx_packet(struct ldc_channel *lp,
 343                          struct ldc_packet *p,
 344                          unsigned long new_tail)
 345{
 346        BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 347
 348        return set_tx_tail(lp, new_tail);
 349}
 350
 351static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 352                                                 u8 stype, u8 ctrl,
 353                                                 void *data, int dlen,
 354                                                 unsigned long *new_tail)
 355{
 356        struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 357
 358        if (p) {
 359                memset(p, 0, sizeof(*p));
 360                p->type = LDC_CTRL;
 361                p->stype = stype;
 362                p->ctrl = ctrl;
 363                if (data)
 364                        memcpy(p->u.u_data, data, dlen);
 365        }
 366        return p;
 367}
 368
 369static int start_handshake(struct ldc_channel *lp)
 370{
 371        struct ldc_packet *p;
 372        struct ldc_version *ver;
 373        unsigned long new_tail;
 374
 375        ver = &ver_arr[0];
 376
 377        ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 378               ver->major, ver->minor);
 379
 380        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 381                                   ver, sizeof(*ver), &new_tail);
 382        if (p) {
 383                int err = send_tx_packet(lp, p, new_tail);
 384                if (!err)
 385                        lp->flags &= ~LDC_FLAG_RESET;
 386                return err;
 387        }
 388        return -EBUSY;
 389}
 390
 391static int send_version_nack(struct ldc_channel *lp,
 392                             u16 major, u16 minor)
 393{
 394        struct ldc_packet *p;
 395        struct ldc_version ver;
 396        unsigned long new_tail;
 397
 398        ver.major = major;
 399        ver.minor = minor;
 400
 401        p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 402                                   &ver, sizeof(ver), &new_tail);
 403        if (p) {
 404                ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 405                       ver.major, ver.minor);
 406
 407                return send_tx_packet(lp, p, new_tail);
 408        }
 409        return -EBUSY;
 410}
 411
 412static int send_version_ack(struct ldc_channel *lp,
 413                            struct ldc_version *vp)
 414{
 415        struct ldc_packet *p;
 416        unsigned long new_tail;
 417
 418        p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 419                                   vp, sizeof(*vp), &new_tail);
 420        if (p) {
 421                ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 422                       vp->major, vp->minor);
 423
 424                return send_tx_packet(lp, p, new_tail);
 425        }
 426        return -EBUSY;
 427}
 428
 429static int send_rts(struct ldc_channel *lp)
 430{
 431        struct ldc_packet *p;
 432        unsigned long new_tail;
 433
 434        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 435                                   &new_tail);
 436        if (p) {
 437                p->env = lp->cfg.mode;
 438                p->seqid = 0;
 439                lp->rcv_nxt = 0;
 440
 441                ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 442                       p->env, p->seqid);
 443
 444                return send_tx_packet(lp, p, new_tail);
 445        }
 446        return -EBUSY;
 447}
 448
 449static int send_rtr(struct ldc_channel *lp)
 450{
 451        struct ldc_packet *p;
 452        unsigned long new_tail;
 453
 454        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 455                                   &new_tail);
 456        if (p) {
 457                p->env = lp->cfg.mode;
 458                p->seqid = 0;
 459
 460                ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 461                       p->env, p->seqid);
 462
 463                return send_tx_packet(lp, p, new_tail);
 464        }
 465        return -EBUSY;
 466}
 467
 468static int send_rdx(struct ldc_channel *lp)
 469{
 470        struct ldc_packet *p;
 471        unsigned long new_tail;
 472
 473        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 474                                   &new_tail);
 475        if (p) {
 476                p->env = 0;
 477                p->seqid = ++lp->snd_nxt;
 478                p->u.r.ackid = lp->rcv_nxt;
 479
 480                ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 481                       p->env, p->seqid, p->u.r.ackid);
 482
 483                return send_tx_packet(lp, p, new_tail);
 484        }
 485        return -EBUSY;
 486}
 487
 488static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 489{
 490        struct ldc_packet *p;
 491        unsigned long new_tail;
 492        int err;
 493
 494        p = data_get_tx_packet(lp, &new_tail);
 495        if (!p)
 496                return -EBUSY;
 497        memset(p, 0, sizeof(*p));
 498        p->type = data_pkt->type;
 499        p->stype = LDC_NACK;
 500        p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 501        p->seqid = lp->snd_nxt + 1;
 502        p->u.r.ackid = lp->rcv_nxt;
 503
 504        ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 505               p->type, p->ctrl, p->seqid, p->u.r.ackid);
 506
 507        err = send_tx_packet(lp, p, new_tail);
 508        if (!err)
 509                lp->snd_nxt++;
 510
 511        return err;
 512}
 513
 514static int ldc_abort(struct ldc_channel *lp)
 515{
 516        unsigned long hv_err;
 517
 518        ldcdbg(STATE, "ABORT\n");
 519
 520        /* We report but do not act upon the hypervisor errors because
 521         * there really isn't much we can do if they fail at this point.
 522         */
 523        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 524        if (hv_err)
 525                printk(KERN_ERR PFX "ldc_abort: "
 526                       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 527                       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 528
 529        hv_err = sun4v_ldc_tx_get_state(lp->id,
 530                                        &lp->tx_head,
 531                                        &lp->tx_tail,
 532                                        &lp->chan_state);
 533        if (hv_err)
 534                printk(KERN_ERR PFX "ldc_abort: "
 535                       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 536                       lp->id, hv_err);
 537
 538        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 539        if (hv_err)
 540                printk(KERN_ERR PFX "ldc_abort: "
 541                       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 542                       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 543
 544        /* Refetch the RX queue state as well, because we could be invoked
 545         * here in the queue processing context.
 546         */
 547        hv_err = sun4v_ldc_rx_get_state(lp->id,
 548                                        &lp->rx_head,
 549                                        &lp->rx_tail,
 550                                        &lp->chan_state);
 551        if (hv_err)
 552                printk(KERN_ERR PFX "ldc_abort: "
 553                       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 554                       lp->id, hv_err);
 555
 556        return -ECONNRESET;
 557}
 558
 559static struct ldc_version *find_by_major(u16 major)
 560{
 561        struct ldc_version *ret = NULL;
 562        int i;
 563
 564        for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 565                struct ldc_version *v = &ver_arr[i];
 566                if (v->major <= major) {
 567                        ret = v;
 568                        break;
 569                }
 570        }
 571        return ret;
 572}
 573
 574static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 575{
 576        struct ldc_version *vap;
 577        int err;
 578
 579        ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 580               vp->major, vp->minor);
 581
 582        if (lp->hs_state == LDC_HS_GOTVERS) {
 583                lp->hs_state = LDC_HS_OPEN;
 584                memset(&lp->ver, 0, sizeof(lp->ver));
 585        }
 586
 587        vap = find_by_major(vp->major);
 588        if (!vap) {
 589                err = send_version_nack(lp, 0, 0);
 590        } else if (vap->major != vp->major) {
 591                err = send_version_nack(lp, vap->major, vap->minor);
 592        } else {
 593                struct ldc_version ver = *vp;
 594                if (ver.minor > vap->minor)
 595                        ver.minor = vap->minor;
 596                err = send_version_ack(lp, &ver);
 597                if (!err) {
 598                        lp->ver = ver;
 599                        lp->hs_state = LDC_HS_GOTVERS;
 600                }
 601        }
 602        if (err)
 603                return ldc_abort(lp);
 604
 605        return 0;
 606}
 607
 608static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 609{
 610        ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 611               vp->major, vp->minor);
 612
 613        if (lp->hs_state == LDC_HS_GOTVERS) {
 614                if (lp->ver.major != vp->major ||
 615                    lp->ver.minor != vp->minor)
 616                        return ldc_abort(lp);
 617        } else {
 618                lp->ver = *vp;
 619                lp->hs_state = LDC_HS_GOTVERS;
 620        }
 621        if (send_rts(lp))
 622                return ldc_abort(lp);
 623        return 0;
 624}
 625
 626static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 627{
 628        struct ldc_version *vap;
 629        struct ldc_packet *p;
 630        unsigned long new_tail;
 631
 632        if (vp->major == 0 && vp->minor == 0)
 633                return ldc_abort(lp);
 634
 635        vap = find_by_major(vp->major);
 636        if (!vap)
 637                return ldc_abort(lp);
 638
 639        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 640                                           vap, sizeof(*vap),
 641                                           &new_tail);
 642        if (!p)
 643                return ldc_abort(lp);
 644
 645        return send_tx_packet(lp, p, new_tail);
 646}
 647
 648static int process_version(struct ldc_channel *lp,
 649                           struct ldc_packet *p)
 650{
 651        struct ldc_version *vp;
 652
 653        vp = (struct ldc_version *) p->u.u_data;
 654
 655        switch (p->stype) {
 656        case LDC_INFO:
 657                return process_ver_info(lp, vp);
 658
 659        case LDC_ACK:
 660                return process_ver_ack(lp, vp);
 661
 662        case LDC_NACK:
 663                return process_ver_nack(lp, vp);
 664
 665        default:
 666                return ldc_abort(lp);
 667        }
 668}
 669
 670static int process_rts(struct ldc_channel *lp,
 671                       struct ldc_packet *p)
 672{
 673        ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 674               p->stype, p->seqid, p->env);
 675
 676        if (p->stype     != LDC_INFO       ||
 677            lp->hs_state != LDC_HS_GOTVERS ||
 678            p->env       != lp->cfg.mode)
 679                return ldc_abort(lp);
 680
 681        lp->snd_nxt = p->seqid;
 682        lp->rcv_nxt = p->seqid;
 683        lp->hs_state = LDC_HS_SENTRTR;
 684        if (send_rtr(lp))
 685                return ldc_abort(lp);
 686
 687        return 0;
 688}
 689
 690static int process_rtr(struct ldc_channel *lp,
 691                       struct ldc_packet *p)
 692{
 693        ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 694               p->stype, p->seqid, p->env);
 695
 696        if (p->stype     != LDC_INFO ||
 697            p->env       != lp->cfg.mode)
 698                return ldc_abort(lp);
 699
 700        lp->snd_nxt = p->seqid;
 701        lp->hs_state = LDC_HS_COMPLETE;
 702        ldc_set_state(lp, LDC_STATE_CONNECTED);
 703        send_rdx(lp);
 704
 705        return LDC_EVENT_UP;
 706}
 707
 708static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 709{
 710        return lp->rcv_nxt + 1 == seqid;
 711}
 712
 713static int process_rdx(struct ldc_channel *lp,
 714                       struct ldc_packet *p)
 715{
 716        ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 717               p->stype, p->seqid, p->env, p->u.r.ackid);
 718
 719        if (p->stype != LDC_INFO ||
 720            !(rx_seq_ok(lp, p->seqid)))
 721                return ldc_abort(lp);
 722
 723        lp->rcv_nxt = p->seqid;
 724
 725        lp->hs_state = LDC_HS_COMPLETE;
 726        ldc_set_state(lp, LDC_STATE_CONNECTED);
 727
 728        return LDC_EVENT_UP;
 729}
 730
 731static int process_control_frame(struct ldc_channel *lp,
 732                                 struct ldc_packet *p)
 733{
 734        switch (p->ctrl) {
 735        case LDC_VERS:
 736                return process_version(lp, p);
 737
 738        case LDC_RTS:
 739                return process_rts(lp, p);
 740
 741        case LDC_RTR:
 742                return process_rtr(lp, p);
 743
 744        case LDC_RDX:
 745                return process_rdx(lp, p);
 746
 747        default:
 748                return ldc_abort(lp);
 749        }
 750}
 751
 752static int process_error_frame(struct ldc_channel *lp,
 753                               struct ldc_packet *p)
 754{
 755        return ldc_abort(lp);
 756}
 757
 758static int process_data_ack(struct ldc_channel *lp,
 759                            struct ldc_packet *ack)
 760{
 761        unsigned long head = lp->tx_acked;
 762        u32 ackid = ack->u.r.ackid;
 763
 764        while (1) {
 765                struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 766
 767                head = tx_advance(lp, head);
 768
 769                if (p->seqid == ackid) {
 770                        lp->tx_acked = head;
 771                        return 0;
 772                }
 773                if (head == lp->tx_tail)
 774                        return ldc_abort(lp);
 775        }
 776
 777        return 0;
 778}
 779
 780static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 781{
 782        if (event_mask & LDC_EVENT_RESET)
 783                lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 784        if (event_mask & LDC_EVENT_UP)
 785                lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 786        if (event_mask & LDC_EVENT_DATA_READY)
 787                lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 788}
 789
 790static irqreturn_t ldc_rx(int irq, void *dev_id)
 791{
 792        struct ldc_channel *lp = dev_id;
 793        unsigned long orig_state, hv_err, flags;
 794        unsigned int event_mask;
 795
 796        spin_lock_irqsave(&lp->lock, flags);
 797
 798        orig_state = lp->chan_state;
 799        hv_err = sun4v_ldc_rx_get_state(lp->id,
 800                                        &lp->rx_head,
 801                                        &lp->rx_tail,
 802                                        &lp->chan_state);
 803
 804        ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 805               orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 806
 807        event_mask = 0;
 808
 809        if (lp->cfg.mode == LDC_MODE_RAW &&
 810            lp->chan_state == LDC_CHANNEL_UP) {
 811                lp->hs_state = LDC_HS_COMPLETE;
 812                ldc_set_state(lp, LDC_STATE_CONNECTED);
 813
 814                event_mask |= LDC_EVENT_UP;
 815
 816                orig_state = lp->chan_state;
 817        }
 818
 819        /* If we are in reset state, flush the RX queue and ignore
 820         * everything.
 821         */
 822        if (lp->flags & LDC_FLAG_RESET) {
 823                (void) __set_rx_head(lp, lp->rx_tail);
 824                goto out;
 825        }
 826
 827        /* Once we finish the handshake, we let the ldc_read()
 828         * paths do all of the control frame and state management.
 829         * Just trigger the callback.
 830         */
 831        if (lp->hs_state == LDC_HS_COMPLETE) {
 832handshake_complete:
 833                if (lp->chan_state != orig_state) {
 834                        unsigned int event = LDC_EVENT_RESET;
 835
 836                        if (lp->chan_state == LDC_CHANNEL_UP)
 837                                event = LDC_EVENT_UP;
 838
 839                        event_mask |= event;
 840                }
 841                if (lp->rx_head != lp->rx_tail)
 842                        event_mask |= LDC_EVENT_DATA_READY;
 843
 844                goto out;
 845        }
 846
 847        if (lp->chan_state != orig_state)
 848                goto out;
 849
 850        while (lp->rx_head != lp->rx_tail) {
 851                struct ldc_packet *p;
 852                unsigned long new;
 853                int err;
 854
 855                p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 856
 857                switch (p->type) {
 858                case LDC_CTRL:
 859                        err = process_control_frame(lp, p);
 860                        if (err > 0)
 861                                event_mask |= err;
 862                        break;
 863
 864                case LDC_DATA:
 865                        event_mask |= LDC_EVENT_DATA_READY;
 866                        err = 0;
 867                        break;
 868
 869                case LDC_ERR:
 870                        err = process_error_frame(lp, p);
 871                        break;
 872
 873                default:
 874                        err = ldc_abort(lp);
 875                        break;
 876                }
 877
 878                if (err < 0)
 879                        break;
 880
 881                new = lp->rx_head;
 882                new += LDC_PACKET_SIZE;
 883                if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 884                        new = 0;
 885                lp->rx_head = new;
 886
 887                err = __set_rx_head(lp, new);
 888                if (err < 0) {
 889                        (void) ldc_abort(lp);
 890                        break;
 891                }
 892                if (lp->hs_state == LDC_HS_COMPLETE)
 893                        goto handshake_complete;
 894        }
 895
 896out:
 897        spin_unlock_irqrestore(&lp->lock, flags);
 898
 899        send_events(lp, event_mask);
 900
 901        return IRQ_HANDLED;
 902}
 903
 904static irqreturn_t ldc_tx(int irq, void *dev_id)
 905{
 906        struct ldc_channel *lp = dev_id;
 907        unsigned long flags, hv_err, orig_state;
 908        unsigned int event_mask = 0;
 909
 910        spin_lock_irqsave(&lp->lock, flags);
 911
 912        orig_state = lp->chan_state;
 913        hv_err = sun4v_ldc_tx_get_state(lp->id,
 914                                        &lp->tx_head,
 915                                        &lp->tx_tail,
 916                                        &lp->chan_state);
 917
 918        ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 919               orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 920
 921        if (lp->cfg.mode == LDC_MODE_RAW &&
 922            lp->chan_state == LDC_CHANNEL_UP) {
 923                lp->hs_state = LDC_HS_COMPLETE;
 924                ldc_set_state(lp, LDC_STATE_CONNECTED);
 925
 926                event_mask |= LDC_EVENT_UP;
 927        }
 928
 929        spin_unlock_irqrestore(&lp->lock, flags);
 930
 931        send_events(lp, event_mask);
 932
 933        return IRQ_HANDLED;
 934}
 935
 936/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 937 * XXX that addition and removal from the ldc_channel_list has
 938 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 939 * XXX totally pointless as another thread can slip into ldc_alloc()
 940 * XXX and add a channel with the same ID.  There also needs to be
 941 * XXX a spinlock for ldc_channel_list.
 942 */
 943static HLIST_HEAD(ldc_channel_list);
 944
 945static int __ldc_channel_exists(unsigned long id)
 946{
 947        struct ldc_channel *lp;
 948        struct hlist_node *n;
 949
 950        hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
 951                if (lp->id == id)
 952                        return 1;
 953        }
 954        return 0;
 955}
 956
 957static int alloc_queue(const char *name, unsigned long num_entries,
 958                       struct ldc_packet **base, unsigned long *ra)
 959{
 960        unsigned long size, order;
 961        void *q;
 962
 963        size = num_entries * LDC_PACKET_SIZE;
 964        order = get_order(size);
 965
 966        q = (void *) __get_free_pages(GFP_KERNEL, order);
 967        if (!q) {
 968                printk(KERN_ERR PFX "Alloc of %s queue failed with "
 969                       "size=%lu order=%lu\n", name, size, order);
 970                return -ENOMEM;
 971        }
 972
 973        memset(q, 0, PAGE_SIZE << order);
 974
 975        *base = q;
 976        *ra = __pa(q);
 977
 978        return 0;
 979}
 980
 981static void free_queue(unsigned long num_entries, struct ldc_packet *q)
 982{
 983        unsigned long size, order;
 984
 985        if (!q)
 986                return;
 987
 988        size = num_entries * LDC_PACKET_SIZE;
 989        order = get_order(size);
 990
 991        free_pages((unsigned long)q, order);
 992}
 993
 994/* XXX Make this configurable... XXX */
 995#define LDC_IOTABLE_SIZE        (8 * 1024)
 996
 997static int ldc_iommu_init(struct ldc_channel *lp)
 998{
 999        unsigned long sz, num_tsb_entries, tsbsize, order;
1000        struct ldc_iommu *iommu = &lp->iommu;
1001        struct ldc_mtable_entry *table;
1002        unsigned long hv_err;
1003        int err;
1004
1005        num_tsb_entries = LDC_IOTABLE_SIZE;
1006        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1007
1008        spin_lock_init(&iommu->lock);
1009
1010        sz = num_tsb_entries / 8;
1011        sz = (sz + 7UL) & ~7UL;
1012        iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1013        if (!iommu->arena.map) {
1014                printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1015                return -ENOMEM;
1016        }
1017
1018        iommu->arena.limit = num_tsb_entries;
1019
1020        order = get_order(tsbsize);
1021
1022        table = (struct ldc_mtable_entry *)
1023                __get_free_pages(GFP_KERNEL, order);
1024        err = -ENOMEM;
1025        if (!table) {
1026                printk(KERN_ERR PFX "Alloc of MTE table failed, "
1027                       "size=%lu order=%lu\n", tsbsize, order);
1028                goto out_free_map;
1029        }
1030
1031        memset(table, 0, PAGE_SIZE << order);
1032
1033        iommu->page_table = table;
1034
1035        hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1036                                         num_tsb_entries);
1037        err = -EINVAL;
1038        if (hv_err)
1039                goto out_free_table;
1040
1041        return 0;
1042
1043out_free_table:
1044        free_pages((unsigned long) table, order);
1045        iommu->page_table = NULL;
1046
1047out_free_map:
1048        kfree(iommu->arena.map);
1049        iommu->arena.map = NULL;
1050
1051        return err;
1052}
1053
1054static void ldc_iommu_release(struct ldc_channel *lp)
1055{
1056        struct ldc_iommu *iommu = &lp->iommu;
1057        unsigned long num_tsb_entries, tsbsize, order;
1058
1059        (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1060
1061        num_tsb_entries = iommu->arena.limit;
1062        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1063        order = get_order(tsbsize);
1064
1065        free_pages((unsigned long) iommu->page_table, order);
1066        iommu->page_table = NULL;
1067
1068        kfree(iommu->arena.map);
1069        iommu->arena.map = NULL;
1070}
1071
1072struct ldc_channel *ldc_alloc(unsigned long id,
1073                              const struct ldc_channel_config *cfgp,
1074                              void *event_arg)
1075{
1076        struct ldc_channel *lp;
1077        const struct ldc_mode_ops *mops;
1078        unsigned long dummy1, dummy2, hv_err;
1079        u8 mss, *mssbuf;
1080        int err;
1081
1082        err = -ENODEV;
1083        if (!ldom_domaining_enabled)
1084                goto out_err;
1085
1086        err = -EINVAL;
1087        if (!cfgp)
1088                goto out_err;
1089
1090        switch (cfgp->mode) {
1091        case LDC_MODE_RAW:
1092                mops = &raw_ops;
1093                mss = LDC_PACKET_SIZE;
1094                break;
1095
1096        case LDC_MODE_UNRELIABLE:
1097                mops = &nonraw_ops;
1098                mss = LDC_PACKET_SIZE - 8;
1099                break;
1100
1101        case LDC_MODE_STREAM:
1102                mops = &stream_ops;
1103                mss = LDC_PACKET_SIZE - 8 - 8;
1104                break;
1105
1106        default:
1107                goto out_err;
1108        }
1109
1110        if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1111                goto out_err;
1112
1113        hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1114        err = -ENODEV;
1115        if (hv_err == HV_ECHANNEL)
1116                goto out_err;
1117
1118        err = -EEXIST;
1119        if (__ldc_channel_exists(id))
1120                goto out_err;
1121
1122        mssbuf = NULL;
1123
1124        lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1125        err = -ENOMEM;
1126        if (!lp)
1127                goto out_err;
1128
1129        spin_lock_init(&lp->lock);
1130
1131        lp->id = id;
1132
1133        err = ldc_iommu_init(lp);
1134        if (err)
1135                goto out_free_ldc;
1136
1137        lp->mops = mops;
1138        lp->mss = mss;
1139
1140        lp->cfg = *cfgp;
1141        if (!lp->cfg.mtu)
1142                lp->cfg.mtu = LDC_DEFAULT_MTU;
1143
1144        if (lp->cfg.mode == LDC_MODE_STREAM) {
1145                mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1146                if (!mssbuf) {
1147                        err = -ENOMEM;
1148                        goto out_free_iommu;
1149                }
1150                lp->mssbuf = mssbuf;
1151        }
1152
1153        lp->event_arg = event_arg;
1154
1155        /* XXX allow setting via ldc_channel_config to override defaults
1156         * XXX or use some formula based upon mtu
1157         */
1158        lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1159        lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1160
1161        err = alloc_queue("TX", lp->tx_num_entries,
1162                          &lp->tx_base, &lp->tx_ra);
1163        if (err)
1164                goto out_free_mssbuf;
1165
1166        err = alloc_queue("RX", lp->rx_num_entries,
1167                          &lp->rx_base, &lp->rx_ra);
1168        if (err)
1169                goto out_free_txq;
1170
1171        lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1172
1173        lp->hs_state = LDC_HS_CLOSED;
1174        ldc_set_state(lp, LDC_STATE_INIT);
1175
1176        INIT_HLIST_NODE(&lp->list);
1177        hlist_add_head(&lp->list, &ldc_channel_list);
1178
1179        INIT_HLIST_HEAD(&lp->mh_list);
1180
1181        return lp;
1182
1183out_free_txq:
1184        free_queue(lp->tx_num_entries, lp->tx_base);
1185
1186out_free_mssbuf:
1187        kfree(mssbuf);
1188
1189out_free_iommu:
1190        ldc_iommu_release(lp);
1191
1192out_free_ldc:
1193        kfree(lp);
1194
1195out_err:
1196        return ERR_PTR(err);
1197}
1198EXPORT_SYMBOL(ldc_alloc);
1199
1200void ldc_free(struct ldc_channel *lp)
1201{
1202        if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1203                free_irq(lp->cfg.rx_irq, lp);
1204                free_irq(lp->cfg.tx_irq, lp);
1205        }
1206
1207        if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1208                sun4v_ldc_tx_qconf(lp->id, 0, 0);
1209                sun4v_ldc_rx_qconf(lp->id, 0, 0);
1210                lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1211        }
1212        if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1213                free_queue(lp->tx_num_entries, lp->tx_base);
1214                free_queue(lp->rx_num_entries, lp->rx_base);
1215                lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1216        }
1217
1218        hlist_del(&lp->list);
1219
1220        kfree(lp->mssbuf);
1221
1222        ldc_iommu_release(lp);
1223
1224        kfree(lp);
1225}
1226EXPORT_SYMBOL(ldc_free);
1227
1228/* Bind the channel.  This registers the LDC queues with
1229 * the hypervisor and puts the channel into a pseudo-listening
1230 * state.  This does not initiate a handshake, ldc_connect() does
1231 * that.
1232 */
1233int ldc_bind(struct ldc_channel *lp, const char *name)
1234{
1235        unsigned long hv_err, flags;
1236        int err = -EINVAL;
1237
1238        if (!name ||
1239            (lp->state != LDC_STATE_INIT))
1240                return -EINVAL;
1241
1242        snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1243        snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1244
1245        err = request_irq(lp->cfg.rx_irq, ldc_rx,
1246                          IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1247                          lp->rx_irq_name, lp);
1248        if (err)
1249                return err;
1250
1251        err = request_irq(lp->cfg.tx_irq, ldc_tx,
1252                          IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1253                          lp->tx_irq_name, lp);
1254        if (err) {
1255                free_irq(lp->cfg.rx_irq, lp);
1256                return err;
1257        }
1258
1259
1260        spin_lock_irqsave(&lp->lock, flags);
1261
1262        enable_irq(lp->cfg.rx_irq);
1263        enable_irq(lp->cfg.tx_irq);
1264
1265        lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1266
1267        err = -ENODEV;
1268        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1269        if (hv_err)
1270                goto out_free_irqs;
1271
1272        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1273        if (hv_err)
1274                goto out_free_irqs;
1275
1276        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1277        if (hv_err)
1278                goto out_unmap_tx;
1279
1280        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1281        if (hv_err)
1282                goto out_unmap_tx;
1283
1284        lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1285
1286        hv_err = sun4v_ldc_tx_get_state(lp->id,
1287                                        &lp->tx_head,
1288                                        &lp->tx_tail,
1289                                        &lp->chan_state);
1290        err = -EBUSY;
1291        if (hv_err)
1292                goto out_unmap_rx;
1293
1294        lp->tx_acked = lp->tx_head;
1295
1296        lp->hs_state = LDC_HS_OPEN;
1297        ldc_set_state(lp, LDC_STATE_BOUND);
1298
1299        spin_unlock_irqrestore(&lp->lock, flags);
1300
1301        return 0;
1302
1303out_unmap_rx:
1304        lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1305        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1306
1307out_unmap_tx:
1308        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1309
1310out_free_irqs:
1311        lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1312        free_irq(lp->cfg.tx_irq, lp);
1313        free_irq(lp->cfg.rx_irq, lp);
1314
1315        spin_unlock_irqrestore(&lp->lock, flags);
1316
1317        return err;
1318}
1319EXPORT_SYMBOL(ldc_bind);
1320
1321int ldc_connect(struct ldc_channel *lp)
1322{
1323        unsigned long flags;
1324        int err;
1325
1326        if (lp->cfg.mode == LDC_MODE_RAW)
1327                return -EINVAL;
1328
1329        spin_lock_irqsave(&lp->lock, flags);
1330
1331        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1332            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1333            lp->hs_state != LDC_HS_OPEN)
1334                err = -EINVAL;
1335        else
1336                err = start_handshake(lp);
1337
1338        spin_unlock_irqrestore(&lp->lock, flags);
1339
1340        return err;
1341}
1342EXPORT_SYMBOL(ldc_connect);
1343
1344int ldc_disconnect(struct ldc_channel *lp)
1345{
1346        unsigned long hv_err, flags;
1347        int err;
1348
1349        if (lp->cfg.mode == LDC_MODE_RAW)
1350                return -EINVAL;
1351
1352        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1353            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1354                return -EINVAL;
1355
1356        spin_lock_irqsave(&lp->lock, flags);
1357
1358        err = -ENODEV;
1359        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1360        if (hv_err)
1361                goto out_err;
1362
1363        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1364        if (hv_err)
1365                goto out_err;
1366
1367        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1368        if (hv_err)
1369                goto out_err;
1370
1371        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1372        if (hv_err)
1373                goto out_err;
1374
1375        ldc_set_state(lp, LDC_STATE_BOUND);
1376        lp->hs_state = LDC_HS_OPEN;
1377        lp->flags |= LDC_FLAG_RESET;
1378
1379        spin_unlock_irqrestore(&lp->lock, flags);
1380
1381        return 0;
1382
1383out_err:
1384        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1385        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1386        free_irq(lp->cfg.tx_irq, lp);
1387        free_irq(lp->cfg.rx_irq, lp);
1388        lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1389                       LDC_FLAG_REGISTERED_QUEUES);
1390        ldc_set_state(lp, LDC_STATE_INIT);
1391
1392        spin_unlock_irqrestore(&lp->lock, flags);
1393
1394        return err;
1395}
1396EXPORT_SYMBOL(ldc_disconnect);
1397
1398int ldc_state(struct ldc_channel *lp)
1399{
1400        return lp->state;
1401}
1402EXPORT_SYMBOL(ldc_state);
1403
1404static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1405{
1406        struct ldc_packet *p;
1407        unsigned long new_tail;
1408        int err;
1409
1410        if (size > LDC_PACKET_SIZE)
1411                return -EMSGSIZE;
1412
1413        p = data_get_tx_packet(lp, &new_tail);
1414        if (!p)
1415                return -EAGAIN;
1416
1417        memcpy(p, buf, size);
1418
1419        err = send_tx_packet(lp, p, new_tail);
1420        if (!err)
1421                err = size;
1422
1423        return err;
1424}
1425
1426static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1427{
1428        struct ldc_packet *p;
1429        unsigned long hv_err, new;
1430        int err;
1431
1432        if (size < LDC_PACKET_SIZE)
1433                return -EINVAL;
1434
1435        hv_err = sun4v_ldc_rx_get_state(lp->id,
1436                                        &lp->rx_head,
1437                                        &lp->rx_tail,
1438                                        &lp->chan_state);
1439        if (hv_err)
1440                return ldc_abort(lp);
1441
1442        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1443            lp->chan_state == LDC_CHANNEL_RESETTING)
1444                return -ECONNRESET;
1445
1446        if (lp->rx_head == lp->rx_tail)
1447                return 0;
1448
1449        p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1450        memcpy(buf, p, LDC_PACKET_SIZE);
1451
1452        new = rx_advance(lp, lp->rx_head);
1453        lp->rx_head = new;
1454
1455        err = __set_rx_head(lp, new);
1456        if (err < 0)
1457                err = -ECONNRESET;
1458        else
1459                err = LDC_PACKET_SIZE;
1460
1461        return err;
1462}
1463
1464static const struct ldc_mode_ops raw_ops = {
1465        .write          =       write_raw,
1466        .read           =       read_raw,
1467};
1468
1469static int write_nonraw(struct ldc_channel *lp, const void *buf,
1470                        unsigned int size)
1471{
1472        unsigned long hv_err, tail;
1473        unsigned int copied;
1474        u32 seq;
1475        int err;
1476
1477        hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1478                                        &lp->chan_state);
1479        if (unlikely(hv_err))
1480                return -EBUSY;
1481
1482        if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1483                return ldc_abort(lp);
1484
1485        if (!tx_has_space_for(lp, size))
1486                return -EAGAIN;
1487
1488        seq = lp->snd_nxt;
1489        copied = 0;
1490        tail = lp->tx_tail;
1491        while (copied < size) {
1492                struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1493                u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1494                            p->u.u_data :
1495                            p->u.r.r_data);
1496                int data_len;
1497
1498                p->type = LDC_DATA;
1499                p->stype = LDC_INFO;
1500                p->ctrl = 0;
1501
1502                data_len = size - copied;
1503                if (data_len > lp->mss)
1504                        data_len = lp->mss;
1505
1506                BUG_ON(data_len > LDC_LEN);
1507
1508                p->env = (data_len |
1509                          (copied == 0 ? LDC_START : 0) |
1510                          (data_len == size - copied ? LDC_STOP : 0));
1511
1512                p->seqid = ++seq;
1513
1514                ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1515                       p->type,
1516                       p->stype,
1517                       p->ctrl,
1518                       p->env,
1519                       p->seqid);
1520
1521                memcpy(data, buf, data_len);
1522                buf += data_len;
1523                copied += data_len;
1524
1525                tail = tx_advance(lp, tail);
1526        }
1527
1528        err = set_tx_tail(lp, tail);
1529        if (!err) {
1530                lp->snd_nxt = seq;
1531                err = size;
1532        }
1533
1534        return err;
1535}
1536
1537static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1538                      struct ldc_packet *first_frag)
1539{
1540        int err;
1541
1542        if (first_frag)
1543                lp->rcv_nxt = first_frag->seqid - 1;
1544
1545        err = send_data_nack(lp, p);
1546        if (err)
1547                return err;
1548
1549        err = __set_rx_head(lp, lp->rx_tail);
1550        if (err < 0)
1551                return ldc_abort(lp);
1552
1553        return 0;
1554}
1555
1556static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1557{
1558        if (p->stype & LDC_ACK) {
1559                int err = process_data_ack(lp, p);
1560                if (err)
1561                        return err;
1562        }
1563        if (p->stype & LDC_NACK)
1564                return ldc_abort(lp);
1565
1566        return 0;
1567}
1568
1569static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1570{
1571        unsigned long dummy;
1572        int limit = 1000;
1573
1574        ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1575               cur_head, lp->rx_head, lp->rx_tail);
1576        while (limit-- > 0) {
1577                unsigned long hv_err;
1578
1579                hv_err = sun4v_ldc_rx_get_state(lp->id,
1580                                                &dummy,
1581                                                &lp->rx_tail,
1582                                                &lp->chan_state);
1583                if (hv_err)
1584                        return ldc_abort(lp);
1585
1586                if (lp->chan_state == LDC_CHANNEL_DOWN ||
1587                    lp->chan_state == LDC_CHANNEL_RESETTING)
1588                        return -ECONNRESET;
1589
1590                if (cur_head != lp->rx_tail) {
1591                        ldcdbg(DATA, "DATA WAIT DONE "
1592                               "head[%lx] tail[%lx] chan_state[%lx]\n",
1593                               dummy, lp->rx_tail, lp->chan_state);
1594                        return 0;
1595                }
1596
1597                udelay(1);
1598        }
1599        return -EAGAIN;
1600}
1601
1602static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1603{
1604        int err = __set_rx_head(lp, head);
1605
1606        if (err < 0)
1607                return ldc_abort(lp);
1608
1609        lp->rx_head = head;
1610        return 0;
1611}
1612
1613static void send_data_ack(struct ldc_channel *lp)
1614{
1615        unsigned long new_tail;
1616        struct ldc_packet *p;
1617
1618        p = data_get_tx_packet(lp, &new_tail);
1619        if (likely(p)) {
1620                int err;
1621
1622                memset(p, 0, sizeof(*p));
1623                p->type = LDC_DATA;
1624                p->stype = LDC_ACK;
1625                p->ctrl = 0;
1626                p->seqid = lp->snd_nxt + 1;
1627                p->u.r.ackid = lp->rcv_nxt;
1628
1629                err = send_tx_packet(lp, p, new_tail);
1630                if (!err)
1631                        lp->snd_nxt++;
1632        }
1633}
1634
1635static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1636{
1637        struct ldc_packet *first_frag;
1638        unsigned long hv_err, new;
1639        int err, copied;
1640
1641        hv_err = sun4v_ldc_rx_get_state(lp->id,
1642                                        &lp->rx_head,
1643                                        &lp->rx_tail,
1644                                        &lp->chan_state);
1645        if (hv_err)
1646                return ldc_abort(lp);
1647
1648        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1649            lp->chan_state == LDC_CHANNEL_RESETTING)
1650                return -ECONNRESET;
1651
1652        if (lp->rx_head == lp->rx_tail)
1653                return 0;
1654
1655        first_frag = NULL;
1656        copied = err = 0;
1657        new = lp->rx_head;
1658        while (1) {
1659                struct ldc_packet *p;
1660                int pkt_len;
1661
1662                BUG_ON(new == lp->rx_tail);
1663                p = lp->rx_base + (new / LDC_PACKET_SIZE);
1664
1665                ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1666                       "rcv_nxt[%08x]\n",
1667                       p->type,
1668                       p->stype,
1669                       p->ctrl,
1670                       p->env,
1671                       p->seqid,
1672                       p->u.r.ackid,
1673                       lp->rcv_nxt);
1674
1675                if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1676                        err = rx_bad_seq(lp, p, first_frag);
1677                        copied = 0;
1678                        break;
1679                }
1680
1681                if (p->type & LDC_CTRL) {
1682                        err = process_control_frame(lp, p);
1683                        if (err < 0)
1684                                break;
1685                        err = 0;
1686                }
1687
1688                lp->rcv_nxt = p->seqid;
1689
1690                if (!(p->type & LDC_DATA)) {
1691                        new = rx_advance(lp, new);
1692                        goto no_data;
1693                }
1694                if (p->stype & (LDC_ACK | LDC_NACK)) {
1695                        err = data_ack_nack(lp, p);
1696                        if (err)
1697                                break;
1698                }
1699                if (!(p->stype & LDC_INFO)) {
1700                        new = rx_advance(lp, new);
1701                        err = rx_set_head(lp, new);
1702                        if (err)
1703                                break;
1704                        goto no_data;
1705                }
1706
1707                pkt_len = p->env & LDC_LEN;
1708
1709                /* Every initial packet starts with the START bit set.
1710                 *
1711                 * Singleton packets will have both START+STOP set.
1712                 *
1713                 * Fragments will have START set in the first frame, STOP
1714                 * set in the last frame, and neither bit set in middle
1715                 * frames of the packet.
1716                 *
1717                 * Therefore if we are at the beginning of a packet and
1718                 * we don't see START, or we are in the middle of a fragmented
1719                 * packet and do see START, we are unsynchronized and should
1720                 * flush the RX queue.
1721                 */
1722                if ((first_frag == NULL && !(p->env & LDC_START)) ||
1723                    (first_frag != NULL &&  (p->env & LDC_START))) {
1724                        if (!first_frag)
1725                                new = rx_advance(lp, new);
1726
1727                        err = rx_set_head(lp, new);
1728                        if (err)
1729                                break;
1730
1731                        if (!first_frag)
1732                                goto no_data;
1733                }
1734                if (!first_frag)
1735                        first_frag = p;
1736
1737                if (pkt_len > size - copied) {
1738                        /* User didn't give us a big enough buffer,
1739                         * what to do?  This is a pretty serious error.
1740                         *
1741                         * Since we haven't updated the RX ring head to
1742                         * consume any of the packets, signal the error
1743                         * to the user and just leave the RX ring alone.
1744                         *
1745                         * This seems the best behavior because this allows
1746                         * a user of the LDC layer to start with a small
1747                         * RX buffer for ldc_read() calls and use -EMSGSIZE
1748                         * as a cue to enlarge it's read buffer.
1749                         */
1750                        err = -EMSGSIZE;
1751                        break;
1752                }
1753
1754                /* Ok, we are gonna eat this one.  */
1755                new = rx_advance(lp, new);
1756
1757                memcpy(buf,
1758                       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1759                        p->u.u_data : p->u.r.r_data), pkt_len);
1760                buf += pkt_len;
1761                copied += pkt_len;
1762
1763                if (p->env & LDC_STOP)
1764                        break;
1765
1766no_data:
1767                if (new == lp->rx_tail) {
1768                        err = rx_data_wait(lp, new);
1769                        if (err)
1770                                break;
1771                }
1772        }
1773
1774        if (!err)
1775                err = rx_set_head(lp, new);
1776
1777        if (err && first_frag)
1778                lp->rcv_nxt = first_frag->seqid - 1;
1779
1780        if (!err) {
1781                err = copied;
1782                if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1783                        send_data_ack(lp);
1784        }
1785
1786        return err;
1787}
1788
1789static const struct ldc_mode_ops nonraw_ops = {
1790        .write          =       write_nonraw,
1791        .read           =       read_nonraw,
1792};
1793
1794static int write_stream(struct ldc_channel *lp, const void *buf,
1795                        unsigned int size)
1796{
1797        if (size > lp->cfg.mtu)
1798                size = lp->cfg.mtu;
1799        return write_nonraw(lp, buf, size);
1800}
1801
1802static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1803{
1804        if (!lp->mssbuf_len) {
1805                int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1806                if (err < 0)
1807                        return err;
1808
1809                lp->mssbuf_len = err;
1810                lp->mssbuf_off = 0;
1811        }
1812
1813        if (size > lp->mssbuf_len)
1814                size = lp->mssbuf_len;
1815        memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1816
1817        lp->mssbuf_off += size;
1818        lp->mssbuf_len -= size;
1819
1820        return size;
1821}
1822
1823static const struct ldc_mode_ops stream_ops = {
1824        .write          =       write_stream,
1825        .read           =       read_stream,
1826};
1827
1828int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1829{
1830        unsigned long flags;
1831        int err;
1832
1833        if (!buf)
1834                return -EINVAL;
1835
1836        if (!size)
1837                return 0;
1838
1839        spin_lock_irqsave(&lp->lock, flags);
1840
1841        if (lp->hs_state != LDC_HS_COMPLETE)
1842                err = -ENOTCONN;
1843        else
1844                err = lp->mops->write(lp, buf, size);
1845
1846        spin_unlock_irqrestore(&lp->lock, flags);
1847
1848        return err;
1849}
1850EXPORT_SYMBOL(ldc_write);
1851
1852int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1853{
1854        unsigned long flags;
1855        int err;
1856
1857        if (!buf)
1858                return -EINVAL;
1859
1860        if (!size)
1861                return 0;
1862
1863        spin_lock_irqsave(&lp->lock, flags);
1864
1865        if (lp->hs_state != LDC_HS_COMPLETE)
1866                err = -ENOTCONN;
1867        else
1868                err = lp->mops->read(lp, buf, size);
1869
1870        spin_unlock_irqrestore(&lp->lock, flags);
1871
1872        return err;
1873}
1874EXPORT_SYMBOL(ldc_read);
1875
1876static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1877{
1878        struct iommu_arena *arena = &iommu->arena;
1879        unsigned long n, start, end, limit;
1880        int pass;
1881
1882        limit = arena->limit;
1883        start = arena->hint;
1884        pass = 0;
1885
1886again:
1887        n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
1888        end = n + npages;
1889        if (unlikely(end >= limit)) {
1890                if (likely(pass < 1)) {
1891                        limit = start;
1892                        start = 0;
1893                        pass++;
1894                        goto again;
1895                } else {
1896                        /* Scanned the whole thing, give up. */
1897                        return -1;
1898                }
1899        }
1900        bitmap_set(arena->map, n, npages);
1901
1902        arena->hint = end;
1903
1904        return n;
1905}
1906
1907#define COOKIE_PGSZ_CODE        0xf000000000000000ULL
1908#define COOKIE_PGSZ_CODE_SHIFT  60ULL
1909
1910static u64 pagesize_code(void)
1911{
1912        switch (PAGE_SIZE) {
1913        default:
1914        case (8ULL * 1024ULL):
1915                return 0;
1916        case (64ULL * 1024ULL):
1917                return 1;
1918        case (512ULL * 1024ULL):
1919                return 2;
1920        case (4ULL * 1024ULL * 1024ULL):
1921                return 3;
1922        case (32ULL * 1024ULL * 1024ULL):
1923                return 4;
1924        case (256ULL * 1024ULL * 1024ULL):
1925                return 5;
1926        }
1927}
1928
1929static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1930{
1931        return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1932                (index << PAGE_SHIFT) |
1933                page_offset);
1934}
1935
1936static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1937{
1938        u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1939
1940        cookie &= ~COOKIE_PGSZ_CODE;
1941
1942        *shift = szcode * 3;
1943
1944        return (cookie >> (13ULL + (szcode * 3ULL)));
1945}
1946
1947static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1948                                             unsigned long npages)
1949{
1950        long entry;
1951
1952        entry = arena_alloc(iommu, npages);
1953        if (unlikely(entry < 0))
1954                return NULL;
1955
1956        return iommu->page_table + entry;
1957}
1958
1959static u64 perm_to_mte(unsigned int map_perm)
1960{
1961        u64 mte_base;
1962
1963        mte_base = pagesize_code();
1964
1965        if (map_perm & LDC_MAP_SHADOW) {
1966                if (map_perm & LDC_MAP_R)
1967                        mte_base |= LDC_MTE_COPY_R;
1968                if (map_perm & LDC_MAP_W)
1969                        mte_base |= LDC_MTE_COPY_W;
1970        }
1971        if (map_perm & LDC_MAP_DIRECT) {
1972                if (map_perm & LDC_MAP_R)
1973                        mte_base |= LDC_MTE_READ;
1974                if (map_perm & LDC_MAP_W)
1975                        mte_base |= LDC_MTE_WRITE;
1976                if (map_perm & LDC_MAP_X)
1977                        mte_base |= LDC_MTE_EXEC;
1978        }
1979        if (map_perm & LDC_MAP_IO) {
1980                if (map_perm & LDC_MAP_R)
1981                        mte_base |= LDC_MTE_IOMMU_R;
1982                if (map_perm & LDC_MAP_W)
1983                        mte_base |= LDC_MTE_IOMMU_W;
1984        }
1985
1986        return mte_base;
1987}
1988
1989static int pages_in_region(unsigned long base, long len)
1990{
1991        int count = 0;
1992
1993        do {
1994                unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1995
1996                len -= (new - base);
1997                base = new;
1998                count++;
1999        } while (len > 0);
2000
2001        return count;
2002}
2003
2004struct cookie_state {
2005        struct ldc_mtable_entry         *page_table;
2006        struct ldc_trans_cookie         *cookies;
2007        u64                             mte_base;
2008        u64                             prev_cookie;
2009        u32                             pte_idx;
2010        u32                             nc;
2011};
2012
2013static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2014                         unsigned long off, unsigned long len)
2015{
2016        do {
2017                unsigned long tlen, new = pa + PAGE_SIZE;
2018                u64 this_cookie;
2019
2020                sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2021
2022                tlen = PAGE_SIZE;
2023                if (off)
2024                        tlen = PAGE_SIZE - off;
2025                if (tlen > len)
2026                        tlen = len;
2027
2028                this_cookie = make_cookie(sp->pte_idx,
2029                                          pagesize_code(), off);
2030
2031                off = 0;
2032
2033                if (this_cookie == sp->prev_cookie) {
2034                        sp->cookies[sp->nc - 1].cookie_size += tlen;
2035                } else {
2036                        sp->cookies[sp->nc].cookie_addr = this_cookie;
2037                        sp->cookies[sp->nc].cookie_size = tlen;
2038                        sp->nc++;
2039                }
2040                sp->prev_cookie = this_cookie + tlen;
2041
2042                sp->pte_idx++;
2043
2044                len -= tlen;
2045                pa = new;
2046        } while (len > 0);
2047}
2048
2049static int sg_count_one(struct scatterlist *sg)
2050{
2051        unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2052        long len = sg->length;
2053
2054        if ((sg->offset | len) & (8UL - 1))
2055                return -EFAULT;
2056
2057        return pages_in_region(base + sg->offset, len);
2058}
2059
2060static int sg_count_pages(struct scatterlist *sg, int num_sg)
2061{
2062        int count;
2063        int i;
2064
2065        count = 0;
2066        for (i = 0; i < num_sg; i++) {
2067                int err = sg_count_one(sg + i);
2068                if (err < 0)
2069                        return err;
2070                count += err;
2071        }
2072
2073        return count;
2074}
2075
2076int ldc_map_sg(struct ldc_channel *lp,
2077               struct scatterlist *sg, int num_sg,
2078               struct ldc_trans_cookie *cookies, int ncookies,
2079               unsigned int map_perm)
2080{
2081        unsigned long i, npages, flags;
2082        struct ldc_mtable_entry *base;
2083        struct cookie_state state;
2084        struct ldc_iommu *iommu;
2085        int err;
2086
2087        if (map_perm & ~LDC_MAP_ALL)
2088                return -EINVAL;
2089
2090        err = sg_count_pages(sg, num_sg);
2091        if (err < 0)
2092                return err;
2093
2094        npages = err;
2095        if (err > ncookies)
2096                return -EMSGSIZE;
2097
2098        iommu = &lp->iommu;
2099
2100        spin_lock_irqsave(&iommu->lock, flags);
2101        base = alloc_npages(iommu, npages);
2102        spin_unlock_irqrestore(&iommu->lock, flags);
2103
2104        if (!base)
2105                return -ENOMEM;
2106
2107        state.page_table = iommu->page_table;
2108        state.cookies = cookies;
2109        state.mte_base = perm_to_mte(map_perm);
2110        state.prev_cookie = ~(u64)0;
2111        state.pte_idx = (base - iommu->page_table);
2112        state.nc = 0;
2113
2114        for (i = 0; i < num_sg; i++)
2115                fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2116                             sg[i].offset, sg[i].length);
2117
2118        return state.nc;
2119}
2120EXPORT_SYMBOL(ldc_map_sg);
2121
2122int ldc_map_single(struct ldc_channel *lp,
2123                   void *buf, unsigned int len,
2124                   struct ldc_trans_cookie *cookies, int ncookies,
2125                   unsigned int map_perm)
2126{
2127        unsigned long npages, pa, flags;
2128        struct ldc_mtable_entry *base;
2129        struct cookie_state state;
2130        struct ldc_iommu *iommu;
2131
2132        if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2133                return -EINVAL;
2134
2135        pa = __pa(buf);
2136        if ((pa | len) & (8UL - 1))
2137                return -EFAULT;
2138
2139        npages = pages_in_region(pa, len);
2140
2141        iommu = &lp->iommu;
2142
2143        spin_lock_irqsave(&iommu->lock, flags);
2144        base = alloc_npages(iommu, npages);
2145        spin_unlock_irqrestore(&iommu->lock, flags);
2146
2147        if (!base)
2148                return -ENOMEM;
2149
2150        state.page_table = iommu->page_table;
2151        state.cookies = cookies;
2152        state.mte_base = perm_to_mte(map_perm);
2153        state.prev_cookie = ~(u64)0;
2154        state.pte_idx = (base - iommu->page_table);
2155        state.nc = 0;
2156        fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2157        BUG_ON(state.nc != 1);
2158
2159        return state.nc;
2160}
2161EXPORT_SYMBOL(ldc_map_single);
2162
2163static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2164                        u64 cookie, u64 size)
2165{
2166        struct iommu_arena *arena = &iommu->arena;
2167        unsigned long i, shift, index, npages;
2168        struct ldc_mtable_entry *base;
2169
2170        npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2171        index = cookie_to_index(cookie, &shift);
2172        base = iommu->page_table + index;
2173
2174        BUG_ON(index > arena->limit ||
2175               (index + npages) > arena->limit);
2176
2177        for (i = 0; i < npages; i++) {
2178                if (base->cookie)
2179                        sun4v_ldc_revoke(id, cookie + (i << shift),
2180                                         base->cookie);
2181                base->mte = 0;
2182                __clear_bit(index + i, arena->map);
2183        }
2184}
2185
2186void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2187               int ncookies)
2188{
2189        struct ldc_iommu *iommu = &lp->iommu;
2190        unsigned long flags;
2191        int i;
2192
2193        spin_lock_irqsave(&iommu->lock, flags);
2194        for (i = 0; i < ncookies; i++) {
2195                u64 addr = cookies[i].cookie_addr;
2196                u64 size = cookies[i].cookie_size;
2197
2198                free_npages(lp->id, iommu, addr, size);
2199        }
2200        spin_unlock_irqrestore(&iommu->lock, flags);
2201}
2202EXPORT_SYMBOL(ldc_unmap);
2203
2204int ldc_copy(struct ldc_channel *lp, int copy_dir,
2205             void *buf, unsigned int len, unsigned long offset,
2206             struct ldc_trans_cookie *cookies, int ncookies)
2207{
2208        unsigned int orig_len;
2209        unsigned long ra;
2210        int i;
2211
2212        if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2213                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2214                       lp->id, copy_dir);
2215                return -EINVAL;
2216        }
2217
2218        ra = __pa(buf);
2219        if ((ra | len | offset) & (8UL - 1)) {
2220                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2221                       "ra[%lx] len[%x] offset[%lx]\n",
2222                       lp->id, ra, len, offset);
2223                return -EFAULT;
2224        }
2225
2226        if (lp->hs_state != LDC_HS_COMPLETE ||
2227            (lp->flags & LDC_FLAG_RESET)) {
2228                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2229                       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2230                return -ECONNRESET;
2231        }
2232
2233        orig_len = len;
2234        for (i = 0; i < ncookies; i++) {
2235                unsigned long cookie_raddr = cookies[i].cookie_addr;
2236                unsigned long this_len = cookies[i].cookie_size;
2237                unsigned long actual_len;
2238
2239                if (unlikely(offset)) {
2240                        unsigned long this_off = offset;
2241
2242                        if (this_off > this_len)
2243                                this_off = this_len;
2244
2245                        offset -= this_off;
2246                        this_len -= this_off;
2247                        if (!this_len)
2248                                continue;
2249                        cookie_raddr += this_off;
2250                }
2251
2252                if (this_len > len)
2253                        this_len = len;
2254
2255                while (1) {
2256                        unsigned long hv_err;
2257
2258                        hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2259                                                cookie_raddr, ra,
2260                                                this_len, &actual_len);
2261                        if (unlikely(hv_err)) {
2262                                printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2263                                       "HV error %lu\n",
2264                                       lp->id, hv_err);
2265                                if (lp->hs_state != LDC_HS_COMPLETE ||
2266                                    (lp->flags & LDC_FLAG_RESET))
2267                                        return -ECONNRESET;
2268                                else
2269                                        return -EFAULT;
2270                        }
2271
2272                        cookie_raddr += actual_len;
2273                        ra += actual_len;
2274                        len -= actual_len;
2275                        if (actual_len == this_len)
2276                                break;
2277
2278                        this_len -= actual_len;
2279                }
2280
2281                if (!len)
2282                        break;
2283        }
2284
2285        /* It is caller policy what to do about short copies.
2286         * For example, a networking driver can declare the
2287         * packet a runt and drop it.
2288         */
2289
2290        return orig_len - len;
2291}
2292EXPORT_SYMBOL(ldc_copy);
2293
2294void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2295                          struct ldc_trans_cookie *cookies, int *ncookies,
2296                          unsigned int map_perm)
2297{
2298        void *buf;
2299        int err;
2300
2301        if (len & (8UL - 1))
2302                return ERR_PTR(-EINVAL);
2303
2304        buf = kzalloc(len, GFP_KERNEL);
2305        if (!buf)
2306                return ERR_PTR(-ENOMEM);
2307
2308        err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2309        if (err < 0) {
2310                kfree(buf);
2311                return ERR_PTR(err);
2312        }
2313        *ncookies = err;
2314
2315        return buf;
2316}
2317EXPORT_SYMBOL(ldc_alloc_exp_dring);
2318
2319void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2320                        struct ldc_trans_cookie *cookies, int ncookies)
2321{
2322        ldc_unmap(lp, cookies, ncookies);
2323        kfree(buf);
2324}
2325EXPORT_SYMBOL(ldc_free_exp_dring);
2326
2327static int __init ldc_init(void)
2328{
2329        unsigned long major, minor;
2330        struct mdesc_handle *hp;
2331        const u64 *v;
2332        int err;
2333        u64 mp;
2334
2335        hp = mdesc_grab();
2336        if (!hp)
2337                return -ENODEV;
2338
2339        mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2340        err = -ENODEV;
2341        if (mp == MDESC_NODE_NULL)
2342                goto out;
2343
2344        v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2345        if (!v)
2346                goto out;
2347
2348        major = 1;
2349        minor = 0;
2350        if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2351                printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2352                goto out;
2353        }
2354
2355        printk(KERN_INFO "%s", version);
2356
2357        if (!*v) {
2358                printk(KERN_INFO PFX "Domaining disabled.\n");
2359                goto out;
2360        }
2361        ldom_domaining_enabled = 1;
2362        err = 0;
2363
2364out:
2365        mdesc_release(hp);
2366        return err;
2367}
2368
2369core_initcall(ldc_init);
2370