linux/arch/sparc/kernel/ldc.c
<<
>>
Prefs
   1/* ldc.c: Logical Domain Channel link-layer protocol driver.
   2 *
   3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/export.h>
   8#include <linux/slab.h>
   9#include <linux/spinlock.h>
  10#include <linux/delay.h>
  11#include <linux/errno.h>
  12#include <linux/string.h>
  13#include <linux/scatterlist.h>
  14#include <linux/interrupt.h>
  15#include <linux/list.h>
  16#include <linux/init.h>
  17#include <linux/bitmap.h>
  18#include <linux/iommu-common.h>
  19
  20#include <asm/hypervisor.h>
  21#include <asm/iommu.h>
  22#include <asm/page.h>
  23#include <asm/ldc.h>
  24#include <asm/mdesc.h>
  25
  26#define DRV_MODULE_NAME         "ldc"
  27#define PFX DRV_MODULE_NAME     ": "
  28#define DRV_MODULE_VERSION      "1.1"
  29#define DRV_MODULE_RELDATE      "July 22, 2008"
  30
  31#define COOKIE_PGSZ_CODE        0xf000000000000000ULL
  32#define COOKIE_PGSZ_CODE_SHIFT  60ULL
  33
  34
  35static char version[] =
  36        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  37#define LDC_PACKET_SIZE         64
  38
  39/* Packet header layout for unreliable and reliable mode frames.
  40 * When in RAW mode, packets are simply straight 64-byte payloads
  41 * with no headers.
  42 */
  43struct ldc_packet {
  44        u8                      type;
  45#define LDC_CTRL                0x01
  46#define LDC_DATA                0x02
  47#define LDC_ERR                 0x10
  48
  49        u8                      stype;
  50#define LDC_INFO                0x01
  51#define LDC_ACK                 0x02
  52#define LDC_NACK                0x04
  53
  54        u8                      ctrl;
  55#define LDC_VERS                0x01 /* Link Version            */
  56#define LDC_RTS                 0x02 /* Request To Send         */
  57#define LDC_RTR                 0x03 /* Ready To Receive        */
  58#define LDC_RDX                 0x04 /* Ready for Data eXchange */
  59#define LDC_CTRL_MSK            0x0f
  60
  61        u8                      env;
  62#define LDC_LEN                 0x3f
  63#define LDC_FRAG_MASK           0xc0
  64#define LDC_START               0x40
  65#define LDC_STOP                0x80
  66
  67        u32                     seqid;
  68
  69        union {
  70                u8              u_data[LDC_PACKET_SIZE - 8];
  71                struct {
  72                        u32     pad;
  73                        u32     ackid;
  74                        u8      r_data[LDC_PACKET_SIZE - 8 - 8];
  75                } r;
  76        } u;
  77};
  78
  79struct ldc_version {
  80        u16 major;
  81        u16 minor;
  82};
  83
  84/* Ordered from largest major to lowest.  */
  85static struct ldc_version ver_arr[] = {
  86        { .major = 1, .minor = 0 },
  87};
  88
  89#define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
  90#define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
  91
  92struct ldc_channel;
  93
  94struct ldc_mode_ops {
  95        int (*write)(struct ldc_channel *, const void *, unsigned int);
  96        int (*read)(struct ldc_channel *, void *, unsigned int);
  97};
  98
  99static const struct ldc_mode_ops raw_ops;
 100static const struct ldc_mode_ops nonraw_ops;
 101static const struct ldc_mode_ops stream_ops;
 102
 103int ldom_domaining_enabled;
 104
 105struct ldc_iommu {
 106        /* Protects ldc_unmap.  */
 107        spinlock_t                      lock;
 108        struct ldc_mtable_entry         *page_table;
 109        struct iommu_map_table          iommu_map_table;
 110};
 111
 112struct ldc_channel {
 113        /* Protects all operations that depend upon channel state.  */
 114        spinlock_t                      lock;
 115
 116        unsigned long                   id;
 117
 118        u8                              *mssbuf;
 119        u32                             mssbuf_len;
 120        u32                             mssbuf_off;
 121
 122        struct ldc_packet               *tx_base;
 123        unsigned long                   tx_head;
 124        unsigned long                   tx_tail;
 125        unsigned long                   tx_num_entries;
 126        unsigned long                   tx_ra;
 127
 128        unsigned long                   tx_acked;
 129
 130        struct ldc_packet               *rx_base;
 131        unsigned long                   rx_head;
 132        unsigned long                   rx_tail;
 133        unsigned long                   rx_num_entries;
 134        unsigned long                   rx_ra;
 135
 136        u32                             rcv_nxt;
 137        u32                             snd_nxt;
 138
 139        unsigned long                   chan_state;
 140
 141        struct ldc_channel_config       cfg;
 142        void                            *event_arg;
 143
 144        const struct ldc_mode_ops       *mops;
 145
 146        struct ldc_iommu                iommu;
 147
 148        struct ldc_version              ver;
 149
 150        u8                              hs_state;
 151#define LDC_HS_CLOSED                   0x00
 152#define LDC_HS_OPEN                     0x01
 153#define LDC_HS_GOTVERS                  0x02
 154#define LDC_HS_SENTRTR                  0x03
 155#define LDC_HS_GOTRTR                   0x04
 156#define LDC_HS_COMPLETE                 0x10
 157
 158        u8                              flags;
 159#define LDC_FLAG_ALLOCED_QUEUES         0x01
 160#define LDC_FLAG_REGISTERED_QUEUES      0x02
 161#define LDC_FLAG_REGISTERED_IRQS        0x04
 162#define LDC_FLAG_RESET                  0x10
 163
 164        u8                              mss;
 165        u8                              state;
 166
 167#define LDC_IRQ_NAME_MAX                32
 168        char                            rx_irq_name[LDC_IRQ_NAME_MAX];
 169        char                            tx_irq_name[LDC_IRQ_NAME_MAX];
 170
 171        struct hlist_head               mh_list;
 172
 173        struct hlist_node               list;
 174};
 175
 176#define ldcdbg(TYPE, f, a...) \
 177do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 178                printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 179} while (0)
 180
 181static const char *state_to_str(u8 state)
 182{
 183        switch (state) {
 184        case LDC_STATE_INVALID:
 185                return "INVALID";
 186        case LDC_STATE_INIT:
 187                return "INIT";
 188        case LDC_STATE_BOUND:
 189                return "BOUND";
 190        case LDC_STATE_READY:
 191                return "READY";
 192        case LDC_STATE_CONNECTED:
 193                return "CONNECTED";
 194        default:
 195                return "<UNKNOWN>";
 196        }
 197}
 198
 199static void ldc_set_state(struct ldc_channel *lp, u8 state)
 200{
 201        ldcdbg(STATE, "STATE (%s) --> (%s)\n",
 202               state_to_str(lp->state),
 203               state_to_str(state));
 204
 205        lp->state = state;
 206}
 207
 208static unsigned long __advance(unsigned long off, unsigned long num_entries)
 209{
 210        off += LDC_PACKET_SIZE;
 211        if (off == (num_entries * LDC_PACKET_SIZE))
 212                off = 0;
 213
 214        return off;
 215}
 216
 217static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 218{
 219        return __advance(off, lp->rx_num_entries);
 220}
 221
 222static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 223{
 224        return __advance(off, lp->tx_num_entries);
 225}
 226
 227static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 228                                                  unsigned long *new_tail)
 229{
 230        struct ldc_packet *p;
 231        unsigned long t;
 232
 233        t = tx_advance(lp, lp->tx_tail);
 234        if (t == lp->tx_head)
 235                return NULL;
 236
 237        *new_tail = t;
 238
 239        p = lp->tx_base;
 240        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 241}
 242
 243/* When we are in reliable or stream mode, have to track the next packet
 244 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 245 * to be careful not to stomp over the queue past that point.  During
 246 * the handshake, we don't have TX data packets pending in the queue
 247 * and that's why handshake_get_tx_packet() need not be mindful of
 248 * lp->tx_acked.
 249 */
 250static unsigned long head_for_data(struct ldc_channel *lp)
 251{
 252        if (lp->cfg.mode == LDC_MODE_STREAM)
 253                return lp->tx_acked;
 254        return lp->tx_head;
 255}
 256
 257static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 258{
 259        unsigned long limit, tail, new_tail, diff;
 260        unsigned int mss;
 261
 262        limit = head_for_data(lp);
 263        tail = lp->tx_tail;
 264        new_tail = tx_advance(lp, tail);
 265        if (new_tail == limit)
 266                return 0;
 267
 268        if (limit > new_tail)
 269                diff = limit - new_tail;
 270        else
 271                diff = (limit +
 272                        ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 273        diff /= LDC_PACKET_SIZE;
 274        mss = lp->mss;
 275
 276        if (diff * mss < size)
 277                return 0;
 278
 279        return 1;
 280}
 281
 282static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 283                                             unsigned long *new_tail)
 284{
 285        struct ldc_packet *p;
 286        unsigned long h, t;
 287
 288        h = head_for_data(lp);
 289        t = tx_advance(lp, lp->tx_tail);
 290        if (t == h)
 291                return NULL;
 292
 293        *new_tail = t;
 294
 295        p = lp->tx_base;
 296        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 297}
 298
 299static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 300{
 301        unsigned long orig_tail = lp->tx_tail;
 302        int limit = 1000;
 303
 304        lp->tx_tail = tail;
 305        while (limit-- > 0) {
 306                unsigned long err;
 307
 308                err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 309                if (!err)
 310                        return 0;
 311
 312                if (err != HV_EWOULDBLOCK) {
 313                        lp->tx_tail = orig_tail;
 314                        return -EINVAL;
 315                }
 316                udelay(1);
 317        }
 318
 319        lp->tx_tail = orig_tail;
 320        return -EBUSY;
 321}
 322
 323/* This just updates the head value in the hypervisor using
 324 * a polling loop with a timeout.  The caller takes care of
 325 * upating software state representing the head change, if any.
 326 */
 327static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 328{
 329        int limit = 1000;
 330
 331        while (limit-- > 0) {
 332                unsigned long err;
 333
 334                err = sun4v_ldc_rx_set_qhead(lp->id, head);
 335                if (!err)
 336                        return 0;
 337
 338                if (err != HV_EWOULDBLOCK)
 339                        return -EINVAL;
 340
 341                udelay(1);
 342        }
 343
 344        return -EBUSY;
 345}
 346
 347static int send_tx_packet(struct ldc_channel *lp,
 348                          struct ldc_packet *p,
 349                          unsigned long new_tail)
 350{
 351        BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 352
 353        return set_tx_tail(lp, new_tail);
 354}
 355
 356static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 357                                                 u8 stype, u8 ctrl,
 358                                                 void *data, int dlen,
 359                                                 unsigned long *new_tail)
 360{
 361        struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 362
 363        if (p) {
 364                memset(p, 0, sizeof(*p));
 365                p->type = LDC_CTRL;
 366                p->stype = stype;
 367                p->ctrl = ctrl;
 368                if (data)
 369                        memcpy(p->u.u_data, data, dlen);
 370        }
 371        return p;
 372}
 373
 374static int start_handshake(struct ldc_channel *lp)
 375{
 376        struct ldc_packet *p;
 377        struct ldc_version *ver;
 378        unsigned long new_tail;
 379
 380        ver = &ver_arr[0];
 381
 382        ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 383               ver->major, ver->minor);
 384
 385        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 386                                   ver, sizeof(*ver), &new_tail);
 387        if (p) {
 388                int err = send_tx_packet(lp, p, new_tail);
 389                if (!err)
 390                        lp->flags &= ~LDC_FLAG_RESET;
 391                return err;
 392        }
 393        return -EBUSY;
 394}
 395
 396static int send_version_nack(struct ldc_channel *lp,
 397                             u16 major, u16 minor)
 398{
 399        struct ldc_packet *p;
 400        struct ldc_version ver;
 401        unsigned long new_tail;
 402
 403        ver.major = major;
 404        ver.minor = minor;
 405
 406        p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 407                                   &ver, sizeof(ver), &new_tail);
 408        if (p) {
 409                ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 410                       ver.major, ver.minor);
 411
 412                return send_tx_packet(lp, p, new_tail);
 413        }
 414        return -EBUSY;
 415}
 416
 417static int send_version_ack(struct ldc_channel *lp,
 418                            struct ldc_version *vp)
 419{
 420        struct ldc_packet *p;
 421        unsigned long new_tail;
 422
 423        p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 424                                   vp, sizeof(*vp), &new_tail);
 425        if (p) {
 426                ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 427                       vp->major, vp->minor);
 428
 429                return send_tx_packet(lp, p, new_tail);
 430        }
 431        return -EBUSY;
 432}
 433
 434static int send_rts(struct ldc_channel *lp)
 435{
 436        struct ldc_packet *p;
 437        unsigned long new_tail;
 438
 439        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 440                                   &new_tail);
 441        if (p) {
 442                p->env = lp->cfg.mode;
 443                p->seqid = 0;
 444                lp->rcv_nxt = 0;
 445
 446                ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 447                       p->env, p->seqid);
 448
 449                return send_tx_packet(lp, p, new_tail);
 450        }
 451        return -EBUSY;
 452}
 453
 454static int send_rtr(struct ldc_channel *lp)
 455{
 456        struct ldc_packet *p;
 457        unsigned long new_tail;
 458
 459        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 460                                   &new_tail);
 461        if (p) {
 462                p->env = lp->cfg.mode;
 463                p->seqid = 0;
 464
 465                ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 466                       p->env, p->seqid);
 467
 468                return send_tx_packet(lp, p, new_tail);
 469        }
 470        return -EBUSY;
 471}
 472
 473static int send_rdx(struct ldc_channel *lp)
 474{
 475        struct ldc_packet *p;
 476        unsigned long new_tail;
 477
 478        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 479                                   &new_tail);
 480        if (p) {
 481                p->env = 0;
 482                p->seqid = ++lp->snd_nxt;
 483                p->u.r.ackid = lp->rcv_nxt;
 484
 485                ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 486                       p->env, p->seqid, p->u.r.ackid);
 487
 488                return send_tx_packet(lp, p, new_tail);
 489        }
 490        return -EBUSY;
 491}
 492
 493static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 494{
 495        struct ldc_packet *p;
 496        unsigned long new_tail;
 497        int err;
 498
 499        p = data_get_tx_packet(lp, &new_tail);
 500        if (!p)
 501                return -EBUSY;
 502        memset(p, 0, sizeof(*p));
 503        p->type = data_pkt->type;
 504        p->stype = LDC_NACK;
 505        p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 506        p->seqid = lp->snd_nxt + 1;
 507        p->u.r.ackid = lp->rcv_nxt;
 508
 509        ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 510               p->type, p->ctrl, p->seqid, p->u.r.ackid);
 511
 512        err = send_tx_packet(lp, p, new_tail);
 513        if (!err)
 514                lp->snd_nxt++;
 515
 516        return err;
 517}
 518
 519static int ldc_abort(struct ldc_channel *lp)
 520{
 521        unsigned long hv_err;
 522
 523        ldcdbg(STATE, "ABORT\n");
 524
 525        /* We report but do not act upon the hypervisor errors because
 526         * there really isn't much we can do if they fail at this point.
 527         */
 528        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 529        if (hv_err)
 530                printk(KERN_ERR PFX "ldc_abort: "
 531                       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 532                       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 533
 534        hv_err = sun4v_ldc_tx_get_state(lp->id,
 535                                        &lp->tx_head,
 536                                        &lp->tx_tail,
 537                                        &lp->chan_state);
 538        if (hv_err)
 539                printk(KERN_ERR PFX "ldc_abort: "
 540                       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 541                       lp->id, hv_err);
 542
 543        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 544        if (hv_err)
 545                printk(KERN_ERR PFX "ldc_abort: "
 546                       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 547                       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 548
 549        /* Refetch the RX queue state as well, because we could be invoked
 550         * here in the queue processing context.
 551         */
 552        hv_err = sun4v_ldc_rx_get_state(lp->id,
 553                                        &lp->rx_head,
 554                                        &lp->rx_tail,
 555                                        &lp->chan_state);
 556        if (hv_err)
 557                printk(KERN_ERR PFX "ldc_abort: "
 558                       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 559                       lp->id, hv_err);
 560
 561        return -ECONNRESET;
 562}
 563
 564static struct ldc_version *find_by_major(u16 major)
 565{
 566        struct ldc_version *ret = NULL;
 567        int i;
 568
 569        for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 570                struct ldc_version *v = &ver_arr[i];
 571                if (v->major <= major) {
 572                        ret = v;
 573                        break;
 574                }
 575        }
 576        return ret;
 577}
 578
 579static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 580{
 581        struct ldc_version *vap;
 582        int err;
 583
 584        ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 585               vp->major, vp->minor);
 586
 587        if (lp->hs_state == LDC_HS_GOTVERS) {
 588                lp->hs_state = LDC_HS_OPEN;
 589                memset(&lp->ver, 0, sizeof(lp->ver));
 590        }
 591
 592        vap = find_by_major(vp->major);
 593        if (!vap) {
 594                err = send_version_nack(lp, 0, 0);
 595        } else if (vap->major != vp->major) {
 596                err = send_version_nack(lp, vap->major, vap->minor);
 597        } else {
 598                struct ldc_version ver = *vp;
 599                if (ver.minor > vap->minor)
 600                        ver.minor = vap->minor;
 601                err = send_version_ack(lp, &ver);
 602                if (!err) {
 603                        lp->ver = ver;
 604                        lp->hs_state = LDC_HS_GOTVERS;
 605                }
 606        }
 607        if (err)
 608                return ldc_abort(lp);
 609
 610        return 0;
 611}
 612
 613static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 614{
 615        ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 616               vp->major, vp->minor);
 617
 618        if (lp->hs_state == LDC_HS_GOTVERS) {
 619                if (lp->ver.major != vp->major ||
 620                    lp->ver.minor != vp->minor)
 621                        return ldc_abort(lp);
 622        } else {
 623                lp->ver = *vp;
 624                lp->hs_state = LDC_HS_GOTVERS;
 625        }
 626        if (send_rts(lp))
 627                return ldc_abort(lp);
 628        return 0;
 629}
 630
 631static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 632{
 633        struct ldc_version *vap;
 634        struct ldc_packet *p;
 635        unsigned long new_tail;
 636
 637        if (vp->major == 0 && vp->minor == 0)
 638                return ldc_abort(lp);
 639
 640        vap = find_by_major(vp->major);
 641        if (!vap)
 642                return ldc_abort(lp);
 643
 644        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 645                                           vap, sizeof(*vap),
 646                                           &new_tail);
 647        if (!p)
 648                return ldc_abort(lp);
 649
 650        return send_tx_packet(lp, p, new_tail);
 651}
 652
 653static int process_version(struct ldc_channel *lp,
 654                           struct ldc_packet *p)
 655{
 656        struct ldc_version *vp;
 657
 658        vp = (struct ldc_version *) p->u.u_data;
 659
 660        switch (p->stype) {
 661        case LDC_INFO:
 662                return process_ver_info(lp, vp);
 663
 664        case LDC_ACK:
 665                return process_ver_ack(lp, vp);
 666
 667        case LDC_NACK:
 668                return process_ver_nack(lp, vp);
 669
 670        default:
 671                return ldc_abort(lp);
 672        }
 673}
 674
 675static int process_rts(struct ldc_channel *lp,
 676                       struct ldc_packet *p)
 677{
 678        ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 679               p->stype, p->seqid, p->env);
 680
 681        if (p->stype     != LDC_INFO       ||
 682            lp->hs_state != LDC_HS_GOTVERS ||
 683            p->env       != lp->cfg.mode)
 684                return ldc_abort(lp);
 685
 686        lp->snd_nxt = p->seqid;
 687        lp->rcv_nxt = p->seqid;
 688        lp->hs_state = LDC_HS_SENTRTR;
 689        if (send_rtr(lp))
 690                return ldc_abort(lp);
 691
 692        return 0;
 693}
 694
 695static int process_rtr(struct ldc_channel *lp,
 696                       struct ldc_packet *p)
 697{
 698        ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 699               p->stype, p->seqid, p->env);
 700
 701        if (p->stype     != LDC_INFO ||
 702            p->env       != lp->cfg.mode)
 703                return ldc_abort(lp);
 704
 705        lp->snd_nxt = p->seqid;
 706        lp->hs_state = LDC_HS_COMPLETE;
 707        ldc_set_state(lp, LDC_STATE_CONNECTED);
 708        send_rdx(lp);
 709
 710        return LDC_EVENT_UP;
 711}
 712
 713static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 714{
 715        return lp->rcv_nxt + 1 == seqid;
 716}
 717
 718static int process_rdx(struct ldc_channel *lp,
 719                       struct ldc_packet *p)
 720{
 721        ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 722               p->stype, p->seqid, p->env, p->u.r.ackid);
 723
 724        if (p->stype != LDC_INFO ||
 725            !(rx_seq_ok(lp, p->seqid)))
 726                return ldc_abort(lp);
 727
 728        lp->rcv_nxt = p->seqid;
 729
 730        lp->hs_state = LDC_HS_COMPLETE;
 731        ldc_set_state(lp, LDC_STATE_CONNECTED);
 732
 733        return LDC_EVENT_UP;
 734}
 735
 736static int process_control_frame(struct ldc_channel *lp,
 737                                 struct ldc_packet *p)
 738{
 739        switch (p->ctrl) {
 740        case LDC_VERS:
 741                return process_version(lp, p);
 742
 743        case LDC_RTS:
 744                return process_rts(lp, p);
 745
 746        case LDC_RTR:
 747                return process_rtr(lp, p);
 748
 749        case LDC_RDX:
 750                return process_rdx(lp, p);
 751
 752        default:
 753                return ldc_abort(lp);
 754        }
 755}
 756
 757static int process_error_frame(struct ldc_channel *lp,
 758                               struct ldc_packet *p)
 759{
 760        return ldc_abort(lp);
 761}
 762
 763static int process_data_ack(struct ldc_channel *lp,
 764                            struct ldc_packet *ack)
 765{
 766        unsigned long head = lp->tx_acked;
 767        u32 ackid = ack->u.r.ackid;
 768
 769        while (1) {
 770                struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 771
 772                head = tx_advance(lp, head);
 773
 774                if (p->seqid == ackid) {
 775                        lp->tx_acked = head;
 776                        return 0;
 777                }
 778                if (head == lp->tx_tail)
 779                        return ldc_abort(lp);
 780        }
 781
 782        return 0;
 783}
 784
 785static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 786{
 787        if (event_mask & LDC_EVENT_RESET)
 788                lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 789        if (event_mask & LDC_EVENT_UP)
 790                lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 791        if (event_mask & LDC_EVENT_DATA_READY)
 792                lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 793}
 794
 795static irqreturn_t ldc_rx(int irq, void *dev_id)
 796{
 797        struct ldc_channel *lp = dev_id;
 798        unsigned long orig_state, flags;
 799        unsigned int event_mask;
 800
 801        spin_lock_irqsave(&lp->lock, flags);
 802
 803        orig_state = lp->chan_state;
 804
 805        /* We should probably check for hypervisor errors here and
 806         * reset the LDC channel if we get one.
 807         */
 808        sun4v_ldc_rx_get_state(lp->id,
 809                               &lp->rx_head,
 810                               &lp->rx_tail,
 811                               &lp->chan_state);
 812
 813        ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 814               orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 815
 816        event_mask = 0;
 817
 818        if (lp->cfg.mode == LDC_MODE_RAW &&
 819            lp->chan_state == LDC_CHANNEL_UP) {
 820                lp->hs_state = LDC_HS_COMPLETE;
 821                ldc_set_state(lp, LDC_STATE_CONNECTED);
 822
 823                event_mask |= LDC_EVENT_UP;
 824
 825                orig_state = lp->chan_state;
 826        }
 827
 828        /* If we are in reset state, flush the RX queue and ignore
 829         * everything.
 830         */
 831        if (lp->flags & LDC_FLAG_RESET) {
 832                (void) __set_rx_head(lp, lp->rx_tail);
 833                goto out;
 834        }
 835
 836        /* Once we finish the handshake, we let the ldc_read()
 837         * paths do all of the control frame and state management.
 838         * Just trigger the callback.
 839         */
 840        if (lp->hs_state == LDC_HS_COMPLETE) {
 841handshake_complete:
 842                if (lp->chan_state != orig_state) {
 843                        unsigned int event = LDC_EVENT_RESET;
 844
 845                        if (lp->chan_state == LDC_CHANNEL_UP)
 846                                event = LDC_EVENT_UP;
 847
 848                        event_mask |= event;
 849                }
 850                if (lp->rx_head != lp->rx_tail)
 851                        event_mask |= LDC_EVENT_DATA_READY;
 852
 853                goto out;
 854        }
 855
 856        if (lp->chan_state != orig_state)
 857                goto out;
 858
 859        while (lp->rx_head != lp->rx_tail) {
 860                struct ldc_packet *p;
 861                unsigned long new;
 862                int err;
 863
 864                p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 865
 866                switch (p->type) {
 867                case LDC_CTRL:
 868                        err = process_control_frame(lp, p);
 869                        if (err > 0)
 870                                event_mask |= err;
 871                        break;
 872
 873                case LDC_DATA:
 874                        event_mask |= LDC_EVENT_DATA_READY;
 875                        err = 0;
 876                        break;
 877
 878                case LDC_ERR:
 879                        err = process_error_frame(lp, p);
 880                        break;
 881
 882                default:
 883                        err = ldc_abort(lp);
 884                        break;
 885                }
 886
 887                if (err < 0)
 888                        break;
 889
 890                new = lp->rx_head;
 891                new += LDC_PACKET_SIZE;
 892                if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 893                        new = 0;
 894                lp->rx_head = new;
 895
 896                err = __set_rx_head(lp, new);
 897                if (err < 0) {
 898                        (void) ldc_abort(lp);
 899                        break;
 900                }
 901                if (lp->hs_state == LDC_HS_COMPLETE)
 902                        goto handshake_complete;
 903        }
 904
 905out:
 906        spin_unlock_irqrestore(&lp->lock, flags);
 907
 908        send_events(lp, event_mask);
 909
 910        return IRQ_HANDLED;
 911}
 912
 913static irqreturn_t ldc_tx(int irq, void *dev_id)
 914{
 915        struct ldc_channel *lp = dev_id;
 916        unsigned long flags, orig_state;
 917        unsigned int event_mask = 0;
 918
 919        spin_lock_irqsave(&lp->lock, flags);
 920
 921        orig_state = lp->chan_state;
 922
 923        /* We should probably check for hypervisor errors here and
 924         * reset the LDC channel if we get one.
 925         */
 926        sun4v_ldc_tx_get_state(lp->id,
 927                               &lp->tx_head,
 928                               &lp->tx_tail,
 929                               &lp->chan_state);
 930
 931        ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 932               orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 933
 934        if (lp->cfg.mode == LDC_MODE_RAW &&
 935            lp->chan_state == LDC_CHANNEL_UP) {
 936                lp->hs_state = LDC_HS_COMPLETE;
 937                ldc_set_state(lp, LDC_STATE_CONNECTED);
 938
 939                event_mask |= LDC_EVENT_UP;
 940        }
 941
 942        spin_unlock_irqrestore(&lp->lock, flags);
 943
 944        send_events(lp, event_mask);
 945
 946        return IRQ_HANDLED;
 947}
 948
 949/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 950 * XXX that addition and removal from the ldc_channel_list has
 951 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 952 * XXX totally pointless as another thread can slip into ldc_alloc()
 953 * XXX and add a channel with the same ID.  There also needs to be
 954 * XXX a spinlock for ldc_channel_list.
 955 */
 956static HLIST_HEAD(ldc_channel_list);
 957
 958static int __ldc_channel_exists(unsigned long id)
 959{
 960        struct ldc_channel *lp;
 961
 962        hlist_for_each_entry(lp, &ldc_channel_list, list) {
 963                if (lp->id == id)
 964                        return 1;
 965        }
 966        return 0;
 967}
 968
 969static int alloc_queue(const char *name, unsigned long num_entries,
 970                       struct ldc_packet **base, unsigned long *ra)
 971{
 972        unsigned long size, order;
 973        void *q;
 974
 975        size = num_entries * LDC_PACKET_SIZE;
 976        order = get_order(size);
 977
 978        q = (void *) __get_free_pages(GFP_KERNEL, order);
 979        if (!q) {
 980                printk(KERN_ERR PFX "Alloc of %s queue failed with "
 981                       "size=%lu order=%lu\n", name, size, order);
 982                return -ENOMEM;
 983        }
 984
 985        memset(q, 0, PAGE_SIZE << order);
 986
 987        *base = q;
 988        *ra = __pa(q);
 989
 990        return 0;
 991}
 992
 993static void free_queue(unsigned long num_entries, struct ldc_packet *q)
 994{
 995        unsigned long size, order;
 996
 997        if (!q)
 998                return;
 999
1000        size = num_entries * LDC_PACKET_SIZE;
1001        order = get_order(size);
1002
1003        free_pages((unsigned long)q, order);
1004}
1005
1006static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1007{
1008        u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1009        /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1010
1011        cookie &= ~COOKIE_PGSZ_CODE;
1012
1013        return (cookie >> (13ULL + (szcode * 3ULL)));
1014}
1015
1016static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1017                      unsigned long entry, unsigned long npages)
1018{
1019        struct ldc_mtable_entry *base;
1020        unsigned long i, shift;
1021
1022        shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1023        base = iommu->page_table + entry;
1024        for (i = 0; i < npages; i++) {
1025                if (base->cookie)
1026                        sun4v_ldc_revoke(id, cookie + (i << shift),
1027                                         base->cookie);
1028                base->mte = 0;
1029        }
1030}
1031
1032/* XXX Make this configurable... XXX */
1033#define LDC_IOTABLE_SIZE        (8 * 1024)
1034
1035static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1036{
1037        unsigned long sz, num_tsb_entries, tsbsize, order;
1038        struct ldc_iommu *ldc_iommu = &lp->iommu;
1039        struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1040        struct ldc_mtable_entry *table;
1041        unsigned long hv_err;
1042        int err;
1043
1044        num_tsb_entries = LDC_IOTABLE_SIZE;
1045        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1046        spin_lock_init(&ldc_iommu->lock);
1047
1048        sz = num_tsb_entries / 8;
1049        sz = (sz + 7UL) & ~7UL;
1050        iommu->map = kzalloc(sz, GFP_KERNEL);
1051        if (!iommu->map) {
1052                printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1053                return -ENOMEM;
1054        }
1055        iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1056                            NULL, false /* no large pool */,
1057                            1 /* npools */,
1058                            true /* skip span boundary check */);
1059
1060        order = get_order(tsbsize);
1061
1062        table = (struct ldc_mtable_entry *)
1063                __get_free_pages(GFP_KERNEL, order);
1064        err = -ENOMEM;
1065        if (!table) {
1066                printk(KERN_ERR PFX "Alloc of MTE table failed, "
1067                       "size=%lu order=%lu\n", tsbsize, order);
1068                goto out_free_map;
1069        }
1070
1071        memset(table, 0, PAGE_SIZE << order);
1072
1073        ldc_iommu->page_table = table;
1074
1075        hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1076                                         num_tsb_entries);
1077        err = -EINVAL;
1078        if (hv_err)
1079                goto out_free_table;
1080
1081        return 0;
1082
1083out_free_table:
1084        free_pages((unsigned long) table, order);
1085        ldc_iommu->page_table = NULL;
1086
1087out_free_map:
1088        kfree(iommu->map);
1089        iommu->map = NULL;
1090
1091        return err;
1092}
1093
1094static void ldc_iommu_release(struct ldc_channel *lp)
1095{
1096        struct ldc_iommu *ldc_iommu = &lp->iommu;
1097        struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1098        unsigned long num_tsb_entries, tsbsize, order;
1099
1100        (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1101
1102        num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1103        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1104        order = get_order(tsbsize);
1105
1106        free_pages((unsigned long) ldc_iommu->page_table, order);
1107        ldc_iommu->page_table = NULL;
1108
1109        kfree(iommu->map);
1110        iommu->map = NULL;
1111}
1112
1113struct ldc_channel *ldc_alloc(unsigned long id,
1114                              const struct ldc_channel_config *cfgp,
1115                              void *event_arg,
1116                              const char *name)
1117{
1118        struct ldc_channel *lp;
1119        const struct ldc_mode_ops *mops;
1120        unsigned long dummy1, dummy2, hv_err;
1121        u8 mss, *mssbuf;
1122        int err;
1123
1124        err = -ENODEV;
1125        if (!ldom_domaining_enabled)
1126                goto out_err;
1127
1128        err = -EINVAL;
1129        if (!cfgp)
1130                goto out_err;
1131        if (!name)
1132                goto out_err;
1133
1134        switch (cfgp->mode) {
1135        case LDC_MODE_RAW:
1136                mops = &raw_ops;
1137                mss = LDC_PACKET_SIZE;
1138                break;
1139
1140        case LDC_MODE_UNRELIABLE:
1141                mops = &nonraw_ops;
1142                mss = LDC_PACKET_SIZE - 8;
1143                break;
1144
1145        case LDC_MODE_STREAM:
1146                mops = &stream_ops;
1147                mss = LDC_PACKET_SIZE - 8 - 8;
1148                break;
1149
1150        default:
1151                goto out_err;
1152        }
1153
1154        if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1155                goto out_err;
1156
1157        hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1158        err = -ENODEV;
1159        if (hv_err == HV_ECHANNEL)
1160                goto out_err;
1161
1162        err = -EEXIST;
1163        if (__ldc_channel_exists(id))
1164                goto out_err;
1165
1166        mssbuf = NULL;
1167
1168        lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1169        err = -ENOMEM;
1170        if (!lp)
1171                goto out_err;
1172
1173        spin_lock_init(&lp->lock);
1174
1175        lp->id = id;
1176
1177        err = ldc_iommu_init(name, lp);
1178        if (err)
1179                goto out_free_ldc;
1180
1181        lp->mops = mops;
1182        lp->mss = mss;
1183
1184        lp->cfg = *cfgp;
1185        if (!lp->cfg.mtu)
1186                lp->cfg.mtu = LDC_DEFAULT_MTU;
1187
1188        if (lp->cfg.mode == LDC_MODE_STREAM) {
1189                mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1190                if (!mssbuf) {
1191                        err = -ENOMEM;
1192                        goto out_free_iommu;
1193                }
1194                lp->mssbuf = mssbuf;
1195        }
1196
1197        lp->event_arg = event_arg;
1198
1199        /* XXX allow setting via ldc_channel_config to override defaults
1200         * XXX or use some formula based upon mtu
1201         */
1202        lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1203        lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1204
1205        err = alloc_queue("TX", lp->tx_num_entries,
1206                          &lp->tx_base, &lp->tx_ra);
1207        if (err)
1208                goto out_free_mssbuf;
1209
1210        err = alloc_queue("RX", lp->rx_num_entries,
1211                          &lp->rx_base, &lp->rx_ra);
1212        if (err)
1213                goto out_free_txq;
1214
1215        lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1216
1217        lp->hs_state = LDC_HS_CLOSED;
1218        ldc_set_state(lp, LDC_STATE_INIT);
1219
1220        INIT_HLIST_NODE(&lp->list);
1221        hlist_add_head(&lp->list, &ldc_channel_list);
1222
1223        INIT_HLIST_HEAD(&lp->mh_list);
1224
1225        snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1226        snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1227
1228        err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1229                          lp->rx_irq_name, lp);
1230        if (err)
1231                goto out_free_txq;
1232
1233        err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1234                          lp->tx_irq_name, lp);
1235        if (err) {
1236                free_irq(lp->cfg.rx_irq, lp);
1237                goto out_free_txq;
1238        }
1239
1240        return lp;
1241
1242out_free_txq:
1243        free_queue(lp->tx_num_entries, lp->tx_base);
1244
1245out_free_mssbuf:
1246        kfree(mssbuf);
1247
1248out_free_iommu:
1249        ldc_iommu_release(lp);
1250
1251out_free_ldc:
1252        kfree(lp);
1253
1254out_err:
1255        return ERR_PTR(err);
1256}
1257EXPORT_SYMBOL(ldc_alloc);
1258
1259void ldc_unbind(struct ldc_channel *lp)
1260{
1261        if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1262                free_irq(lp->cfg.rx_irq, lp);
1263                free_irq(lp->cfg.tx_irq, lp);
1264                lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1265        }
1266
1267        if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1268                sun4v_ldc_tx_qconf(lp->id, 0, 0);
1269                sun4v_ldc_rx_qconf(lp->id, 0, 0);
1270                lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1271        }
1272        if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1273                free_queue(lp->tx_num_entries, lp->tx_base);
1274                free_queue(lp->rx_num_entries, lp->rx_base);
1275                lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1276        }
1277
1278        ldc_set_state(lp, LDC_STATE_INIT);
1279}
1280EXPORT_SYMBOL(ldc_unbind);
1281
1282void ldc_free(struct ldc_channel *lp)
1283{
1284        ldc_unbind(lp);
1285        hlist_del(&lp->list);
1286        kfree(lp->mssbuf);
1287        ldc_iommu_release(lp);
1288
1289        kfree(lp);
1290}
1291EXPORT_SYMBOL(ldc_free);
1292
1293/* Bind the channel.  This registers the LDC queues with
1294 * the hypervisor and puts the channel into a pseudo-listening
1295 * state.  This does not initiate a handshake, ldc_connect() does
1296 * that.
1297 */
1298int ldc_bind(struct ldc_channel *lp)
1299{
1300        unsigned long hv_err, flags;
1301        int err = -EINVAL;
1302
1303        if (lp->state != LDC_STATE_INIT)
1304                return -EINVAL;
1305
1306        spin_lock_irqsave(&lp->lock, flags);
1307
1308        enable_irq(lp->cfg.rx_irq);
1309        enable_irq(lp->cfg.tx_irq);
1310
1311        lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1312
1313        err = -ENODEV;
1314        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1315        if (hv_err)
1316                goto out_free_irqs;
1317
1318        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1319        if (hv_err)
1320                goto out_free_irqs;
1321
1322        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1323        if (hv_err)
1324                goto out_unmap_tx;
1325
1326        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1327        if (hv_err)
1328                goto out_unmap_tx;
1329
1330        lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1331
1332        hv_err = sun4v_ldc_tx_get_state(lp->id,
1333                                        &lp->tx_head,
1334                                        &lp->tx_tail,
1335                                        &lp->chan_state);
1336        err = -EBUSY;
1337        if (hv_err)
1338                goto out_unmap_rx;
1339
1340        lp->tx_acked = lp->tx_head;
1341
1342        lp->hs_state = LDC_HS_OPEN;
1343        ldc_set_state(lp, LDC_STATE_BOUND);
1344
1345        spin_unlock_irqrestore(&lp->lock, flags);
1346
1347        return 0;
1348
1349out_unmap_rx:
1350        lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1351        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1352
1353out_unmap_tx:
1354        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1355
1356out_free_irqs:
1357        lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1358        free_irq(lp->cfg.tx_irq, lp);
1359        free_irq(lp->cfg.rx_irq, lp);
1360
1361        spin_unlock_irqrestore(&lp->lock, flags);
1362
1363        return err;
1364}
1365EXPORT_SYMBOL(ldc_bind);
1366
1367int ldc_connect(struct ldc_channel *lp)
1368{
1369        unsigned long flags;
1370        int err;
1371
1372        if (lp->cfg.mode == LDC_MODE_RAW)
1373                return -EINVAL;
1374
1375        spin_lock_irqsave(&lp->lock, flags);
1376
1377        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1378            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1379            lp->hs_state != LDC_HS_OPEN)
1380                err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1381        else
1382                err = start_handshake(lp);
1383
1384        spin_unlock_irqrestore(&lp->lock, flags);
1385
1386        return err;
1387}
1388EXPORT_SYMBOL(ldc_connect);
1389
1390int ldc_disconnect(struct ldc_channel *lp)
1391{
1392        unsigned long hv_err, flags;
1393        int err;
1394
1395        if (lp->cfg.mode == LDC_MODE_RAW)
1396                return -EINVAL;
1397
1398        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1399            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1400                return -EINVAL;
1401
1402        spin_lock_irqsave(&lp->lock, flags);
1403
1404        err = -ENODEV;
1405        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1406        if (hv_err)
1407                goto out_err;
1408
1409        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1410        if (hv_err)
1411                goto out_err;
1412
1413        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1414        if (hv_err)
1415                goto out_err;
1416
1417        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1418        if (hv_err)
1419                goto out_err;
1420
1421        ldc_set_state(lp, LDC_STATE_BOUND);
1422        lp->hs_state = LDC_HS_OPEN;
1423        lp->flags |= LDC_FLAG_RESET;
1424
1425        spin_unlock_irqrestore(&lp->lock, flags);
1426
1427        return 0;
1428
1429out_err:
1430        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1431        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1432        free_irq(lp->cfg.tx_irq, lp);
1433        free_irq(lp->cfg.rx_irq, lp);
1434        lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1435                       LDC_FLAG_REGISTERED_QUEUES);
1436        ldc_set_state(lp, LDC_STATE_INIT);
1437
1438        spin_unlock_irqrestore(&lp->lock, flags);
1439
1440        return err;
1441}
1442EXPORT_SYMBOL(ldc_disconnect);
1443
1444int ldc_state(struct ldc_channel *lp)
1445{
1446        return lp->state;
1447}
1448EXPORT_SYMBOL(ldc_state);
1449
1450static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1451{
1452        struct ldc_packet *p;
1453        unsigned long new_tail;
1454        int err;
1455
1456        if (size > LDC_PACKET_SIZE)
1457                return -EMSGSIZE;
1458
1459        p = data_get_tx_packet(lp, &new_tail);
1460        if (!p)
1461                return -EAGAIN;
1462
1463        memcpy(p, buf, size);
1464
1465        err = send_tx_packet(lp, p, new_tail);
1466        if (!err)
1467                err = size;
1468
1469        return err;
1470}
1471
1472static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1473{
1474        struct ldc_packet *p;
1475        unsigned long hv_err, new;
1476        int err;
1477
1478        if (size < LDC_PACKET_SIZE)
1479                return -EINVAL;
1480
1481        hv_err = sun4v_ldc_rx_get_state(lp->id,
1482                                        &lp->rx_head,
1483                                        &lp->rx_tail,
1484                                        &lp->chan_state);
1485        if (hv_err)
1486                return ldc_abort(lp);
1487
1488        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1489            lp->chan_state == LDC_CHANNEL_RESETTING)
1490                return -ECONNRESET;
1491
1492        if (lp->rx_head == lp->rx_tail)
1493                return 0;
1494
1495        p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1496        memcpy(buf, p, LDC_PACKET_SIZE);
1497
1498        new = rx_advance(lp, lp->rx_head);
1499        lp->rx_head = new;
1500
1501        err = __set_rx_head(lp, new);
1502        if (err < 0)
1503                err = -ECONNRESET;
1504        else
1505                err = LDC_PACKET_SIZE;
1506
1507        return err;
1508}
1509
1510static const struct ldc_mode_ops raw_ops = {
1511        .write          =       write_raw,
1512        .read           =       read_raw,
1513};
1514
1515static int write_nonraw(struct ldc_channel *lp, const void *buf,
1516                        unsigned int size)
1517{
1518        unsigned long hv_err, tail;
1519        unsigned int copied;
1520        u32 seq;
1521        int err;
1522
1523        hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1524                                        &lp->chan_state);
1525        if (unlikely(hv_err))
1526                return -EBUSY;
1527
1528        if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1529                return ldc_abort(lp);
1530
1531        if (!tx_has_space_for(lp, size))
1532                return -EAGAIN;
1533
1534        seq = lp->snd_nxt;
1535        copied = 0;
1536        tail = lp->tx_tail;
1537        while (copied < size) {
1538                struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1539                u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1540                            p->u.u_data :
1541                            p->u.r.r_data);
1542                int data_len;
1543
1544                p->type = LDC_DATA;
1545                p->stype = LDC_INFO;
1546                p->ctrl = 0;
1547
1548                data_len = size - copied;
1549                if (data_len > lp->mss)
1550                        data_len = lp->mss;
1551
1552                BUG_ON(data_len > LDC_LEN);
1553
1554                p->env = (data_len |
1555                          (copied == 0 ? LDC_START : 0) |
1556                          (data_len == size - copied ? LDC_STOP : 0));
1557
1558                p->seqid = ++seq;
1559
1560                ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1561                       p->type,
1562                       p->stype,
1563                       p->ctrl,
1564                       p->env,
1565                       p->seqid);
1566
1567                memcpy(data, buf, data_len);
1568                buf += data_len;
1569                copied += data_len;
1570
1571                tail = tx_advance(lp, tail);
1572        }
1573
1574        err = set_tx_tail(lp, tail);
1575        if (!err) {
1576                lp->snd_nxt = seq;
1577                err = size;
1578        }
1579
1580        return err;
1581}
1582
1583static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1584                      struct ldc_packet *first_frag)
1585{
1586        int err;
1587
1588        if (first_frag)
1589                lp->rcv_nxt = first_frag->seqid - 1;
1590
1591        err = send_data_nack(lp, p);
1592        if (err)
1593                return err;
1594
1595        err = __set_rx_head(lp, lp->rx_tail);
1596        if (err < 0)
1597                return ldc_abort(lp);
1598
1599        return 0;
1600}
1601
1602static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1603{
1604        if (p->stype & LDC_ACK) {
1605                int err = process_data_ack(lp, p);
1606                if (err)
1607                        return err;
1608        }
1609        if (p->stype & LDC_NACK)
1610                return ldc_abort(lp);
1611
1612        return 0;
1613}
1614
1615static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1616{
1617        unsigned long dummy;
1618        int limit = 1000;
1619
1620        ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1621               cur_head, lp->rx_head, lp->rx_tail);
1622        while (limit-- > 0) {
1623                unsigned long hv_err;
1624
1625                hv_err = sun4v_ldc_rx_get_state(lp->id,
1626                                                &dummy,
1627                                                &lp->rx_tail,
1628                                                &lp->chan_state);
1629                if (hv_err)
1630                        return ldc_abort(lp);
1631
1632                if (lp->chan_state == LDC_CHANNEL_DOWN ||
1633                    lp->chan_state == LDC_CHANNEL_RESETTING)
1634                        return -ECONNRESET;
1635
1636                if (cur_head != lp->rx_tail) {
1637                        ldcdbg(DATA, "DATA WAIT DONE "
1638                               "head[%lx] tail[%lx] chan_state[%lx]\n",
1639                               dummy, lp->rx_tail, lp->chan_state);
1640                        return 0;
1641                }
1642
1643                udelay(1);
1644        }
1645        return -EAGAIN;
1646}
1647
1648static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1649{
1650        int err = __set_rx_head(lp, head);
1651
1652        if (err < 0)
1653                return ldc_abort(lp);
1654
1655        lp->rx_head = head;
1656        return 0;
1657}
1658
1659static void send_data_ack(struct ldc_channel *lp)
1660{
1661        unsigned long new_tail;
1662        struct ldc_packet *p;
1663
1664        p = data_get_tx_packet(lp, &new_tail);
1665        if (likely(p)) {
1666                int err;
1667
1668                memset(p, 0, sizeof(*p));
1669                p->type = LDC_DATA;
1670                p->stype = LDC_ACK;
1671                p->ctrl = 0;
1672                p->seqid = lp->snd_nxt + 1;
1673                p->u.r.ackid = lp->rcv_nxt;
1674
1675                err = send_tx_packet(lp, p, new_tail);
1676                if (!err)
1677                        lp->snd_nxt++;
1678        }
1679}
1680
1681static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1682{
1683        struct ldc_packet *first_frag;
1684        unsigned long hv_err, new;
1685        int err, copied;
1686
1687        hv_err = sun4v_ldc_rx_get_state(lp->id,
1688                                        &lp->rx_head,
1689                                        &lp->rx_tail,
1690                                        &lp->chan_state);
1691        if (hv_err)
1692                return ldc_abort(lp);
1693
1694        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1695            lp->chan_state == LDC_CHANNEL_RESETTING)
1696                return -ECONNRESET;
1697
1698        if (lp->rx_head == lp->rx_tail)
1699                return 0;
1700
1701        first_frag = NULL;
1702        copied = err = 0;
1703        new = lp->rx_head;
1704        while (1) {
1705                struct ldc_packet *p;
1706                int pkt_len;
1707
1708                BUG_ON(new == lp->rx_tail);
1709                p = lp->rx_base + (new / LDC_PACKET_SIZE);
1710
1711                ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1712                       "rcv_nxt[%08x]\n",
1713                       p->type,
1714                       p->stype,
1715                       p->ctrl,
1716                       p->env,
1717                       p->seqid,
1718                       p->u.r.ackid,
1719                       lp->rcv_nxt);
1720
1721                if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1722                        err = rx_bad_seq(lp, p, first_frag);
1723                        copied = 0;
1724                        break;
1725                }
1726
1727                if (p->type & LDC_CTRL) {
1728                        err = process_control_frame(lp, p);
1729                        if (err < 0)
1730                                break;
1731                        err = 0;
1732                }
1733
1734                lp->rcv_nxt = p->seqid;
1735
1736                if (!(p->type & LDC_DATA)) {
1737                        new = rx_advance(lp, new);
1738                        goto no_data;
1739                }
1740                if (p->stype & (LDC_ACK | LDC_NACK)) {
1741                        err = data_ack_nack(lp, p);
1742                        if (err)
1743                                break;
1744                }
1745                if (!(p->stype & LDC_INFO)) {
1746                        new = rx_advance(lp, new);
1747                        err = rx_set_head(lp, new);
1748                        if (err)
1749                                break;
1750                        goto no_data;
1751                }
1752
1753                pkt_len = p->env & LDC_LEN;
1754
1755                /* Every initial packet starts with the START bit set.
1756                 *
1757                 * Singleton packets will have both START+STOP set.
1758                 *
1759                 * Fragments will have START set in the first frame, STOP
1760                 * set in the last frame, and neither bit set in middle
1761                 * frames of the packet.
1762                 *
1763                 * Therefore if we are at the beginning of a packet and
1764                 * we don't see START, or we are in the middle of a fragmented
1765                 * packet and do see START, we are unsynchronized and should
1766                 * flush the RX queue.
1767                 */
1768                if ((first_frag == NULL && !(p->env & LDC_START)) ||
1769                    (first_frag != NULL &&  (p->env & LDC_START))) {
1770                        if (!first_frag)
1771                                new = rx_advance(lp, new);
1772
1773                        err = rx_set_head(lp, new);
1774                        if (err)
1775                                break;
1776
1777                        if (!first_frag)
1778                                goto no_data;
1779                }
1780                if (!first_frag)
1781                        first_frag = p;
1782
1783                if (pkt_len > size - copied) {
1784                        /* User didn't give us a big enough buffer,
1785                         * what to do?  This is a pretty serious error.
1786                         *
1787                         * Since we haven't updated the RX ring head to
1788                         * consume any of the packets, signal the error
1789                         * to the user and just leave the RX ring alone.
1790                         *
1791                         * This seems the best behavior because this allows
1792                         * a user of the LDC layer to start with a small
1793                         * RX buffer for ldc_read() calls and use -EMSGSIZE
1794                         * as a cue to enlarge it's read buffer.
1795                         */
1796                        err = -EMSGSIZE;
1797                        break;
1798                }
1799
1800                /* Ok, we are gonna eat this one.  */
1801                new = rx_advance(lp, new);
1802
1803                memcpy(buf,
1804                       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1805                        p->u.u_data : p->u.r.r_data), pkt_len);
1806                buf += pkt_len;
1807                copied += pkt_len;
1808
1809                if (p->env & LDC_STOP)
1810                        break;
1811
1812no_data:
1813                if (new == lp->rx_tail) {
1814                        err = rx_data_wait(lp, new);
1815                        if (err)
1816                                break;
1817                }
1818        }
1819
1820        if (!err)
1821                err = rx_set_head(lp, new);
1822
1823        if (err && first_frag)
1824                lp->rcv_nxt = first_frag->seqid - 1;
1825
1826        if (!err) {
1827                err = copied;
1828                if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1829                        send_data_ack(lp);
1830        }
1831
1832        return err;
1833}
1834
1835static const struct ldc_mode_ops nonraw_ops = {
1836        .write          =       write_nonraw,
1837        .read           =       read_nonraw,
1838};
1839
1840static int write_stream(struct ldc_channel *lp, const void *buf,
1841                        unsigned int size)
1842{
1843        if (size > lp->cfg.mtu)
1844                size = lp->cfg.mtu;
1845        return write_nonraw(lp, buf, size);
1846}
1847
1848static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1849{
1850        if (!lp->mssbuf_len) {
1851                int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1852                if (err < 0)
1853                        return err;
1854
1855                lp->mssbuf_len = err;
1856                lp->mssbuf_off = 0;
1857        }
1858
1859        if (size > lp->mssbuf_len)
1860                size = lp->mssbuf_len;
1861        memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1862
1863        lp->mssbuf_off += size;
1864        lp->mssbuf_len -= size;
1865
1866        return size;
1867}
1868
1869static const struct ldc_mode_ops stream_ops = {
1870        .write          =       write_stream,
1871        .read           =       read_stream,
1872};
1873
1874int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1875{
1876        unsigned long flags;
1877        int err;
1878
1879        if (!buf)
1880                return -EINVAL;
1881
1882        if (!size)
1883                return 0;
1884
1885        spin_lock_irqsave(&lp->lock, flags);
1886
1887        if (lp->hs_state != LDC_HS_COMPLETE)
1888                err = -ENOTCONN;
1889        else
1890                err = lp->mops->write(lp, buf, size);
1891
1892        spin_unlock_irqrestore(&lp->lock, flags);
1893
1894        return err;
1895}
1896EXPORT_SYMBOL(ldc_write);
1897
1898int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1899{
1900        unsigned long flags;
1901        int err;
1902
1903        if (!buf)
1904                return -EINVAL;
1905
1906        if (!size)
1907                return 0;
1908
1909        spin_lock_irqsave(&lp->lock, flags);
1910
1911        if (lp->hs_state != LDC_HS_COMPLETE)
1912                err = -ENOTCONN;
1913        else
1914                err = lp->mops->read(lp, buf, size);
1915
1916        spin_unlock_irqrestore(&lp->lock, flags);
1917
1918        return err;
1919}
1920EXPORT_SYMBOL(ldc_read);
1921
1922static u64 pagesize_code(void)
1923{
1924        switch (PAGE_SIZE) {
1925        default:
1926        case (8ULL * 1024ULL):
1927                return 0;
1928        case (64ULL * 1024ULL):
1929                return 1;
1930        case (512ULL * 1024ULL):
1931                return 2;
1932        case (4ULL * 1024ULL * 1024ULL):
1933                return 3;
1934        case (32ULL * 1024ULL * 1024ULL):
1935                return 4;
1936        case (256ULL * 1024ULL * 1024ULL):
1937                return 5;
1938        }
1939}
1940
1941static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1942{
1943        return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1944                (index << PAGE_SHIFT) |
1945                page_offset);
1946}
1947
1948
1949static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1950                                             unsigned long npages)
1951{
1952        long entry;
1953
1954        entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
1955                                      npages, NULL, (unsigned long)-1, 0);
1956        if (unlikely(entry == IOMMU_ERROR_CODE))
1957                return NULL;
1958
1959        return iommu->page_table + entry;
1960}
1961
1962static u64 perm_to_mte(unsigned int map_perm)
1963{
1964        u64 mte_base;
1965
1966        mte_base = pagesize_code();
1967
1968        if (map_perm & LDC_MAP_SHADOW) {
1969                if (map_perm & LDC_MAP_R)
1970                        mte_base |= LDC_MTE_COPY_R;
1971                if (map_perm & LDC_MAP_W)
1972                        mte_base |= LDC_MTE_COPY_W;
1973        }
1974        if (map_perm & LDC_MAP_DIRECT) {
1975                if (map_perm & LDC_MAP_R)
1976                        mte_base |= LDC_MTE_READ;
1977                if (map_perm & LDC_MAP_W)
1978                        mte_base |= LDC_MTE_WRITE;
1979                if (map_perm & LDC_MAP_X)
1980                        mte_base |= LDC_MTE_EXEC;
1981        }
1982        if (map_perm & LDC_MAP_IO) {
1983                if (map_perm & LDC_MAP_R)
1984                        mte_base |= LDC_MTE_IOMMU_R;
1985                if (map_perm & LDC_MAP_W)
1986                        mte_base |= LDC_MTE_IOMMU_W;
1987        }
1988
1989        return mte_base;
1990}
1991
1992static int pages_in_region(unsigned long base, long len)
1993{
1994        int count = 0;
1995
1996        do {
1997                unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1998
1999                len -= (new - base);
2000                base = new;
2001                count++;
2002        } while (len > 0);
2003
2004        return count;
2005}
2006
2007struct cookie_state {
2008        struct ldc_mtable_entry         *page_table;
2009        struct ldc_trans_cookie         *cookies;
2010        u64                             mte_base;
2011        u64                             prev_cookie;
2012        u32                             pte_idx;
2013        u32                             nc;
2014};
2015
2016static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2017                         unsigned long off, unsigned long len)
2018{
2019        do {
2020                unsigned long tlen, new = pa + PAGE_SIZE;
2021                u64 this_cookie;
2022
2023                sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2024
2025                tlen = PAGE_SIZE;
2026                if (off)
2027                        tlen = PAGE_SIZE - off;
2028                if (tlen > len)
2029                        tlen = len;
2030
2031                this_cookie = make_cookie(sp->pte_idx,
2032                                          pagesize_code(), off);
2033
2034                off = 0;
2035
2036                if (this_cookie == sp->prev_cookie) {
2037                        sp->cookies[sp->nc - 1].cookie_size += tlen;
2038                } else {
2039                        sp->cookies[sp->nc].cookie_addr = this_cookie;
2040                        sp->cookies[sp->nc].cookie_size = tlen;
2041                        sp->nc++;
2042                }
2043                sp->prev_cookie = this_cookie + tlen;
2044
2045                sp->pte_idx++;
2046
2047                len -= tlen;
2048                pa = new;
2049        } while (len > 0);
2050}
2051
2052static int sg_count_one(struct scatterlist *sg)
2053{
2054        unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2055        long len = sg->length;
2056
2057        if ((sg->offset | len) & (8UL - 1))
2058                return -EFAULT;
2059
2060        return pages_in_region(base + sg->offset, len);
2061}
2062
2063static int sg_count_pages(struct scatterlist *sg, int num_sg)
2064{
2065        int count;
2066        int i;
2067
2068        count = 0;
2069        for (i = 0; i < num_sg; i++) {
2070                int err = sg_count_one(sg + i);
2071                if (err < 0)
2072                        return err;
2073                count += err;
2074        }
2075
2076        return count;
2077}
2078
2079int ldc_map_sg(struct ldc_channel *lp,
2080               struct scatterlist *sg, int num_sg,
2081               struct ldc_trans_cookie *cookies, int ncookies,
2082               unsigned int map_perm)
2083{
2084        unsigned long i, npages;
2085        struct ldc_mtable_entry *base;
2086        struct cookie_state state;
2087        struct ldc_iommu *iommu;
2088        int err;
2089        struct scatterlist *s;
2090
2091        if (map_perm & ~LDC_MAP_ALL)
2092                return -EINVAL;
2093
2094        err = sg_count_pages(sg, num_sg);
2095        if (err < 0)
2096                return err;
2097
2098        npages = err;
2099        if (err > ncookies)
2100                return -EMSGSIZE;
2101
2102        iommu = &lp->iommu;
2103
2104        base = alloc_npages(iommu, npages);
2105
2106        if (!base)
2107                return -ENOMEM;
2108
2109        state.page_table = iommu->page_table;
2110        state.cookies = cookies;
2111        state.mte_base = perm_to_mte(map_perm);
2112        state.prev_cookie = ~(u64)0;
2113        state.pte_idx = (base - iommu->page_table);
2114        state.nc = 0;
2115
2116        for_each_sg(sg, s, num_sg, i) {
2117                fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2118                             s->offset, s->length);
2119        }
2120
2121        return state.nc;
2122}
2123EXPORT_SYMBOL(ldc_map_sg);
2124
2125int ldc_map_single(struct ldc_channel *lp,
2126                   void *buf, unsigned int len,
2127                   struct ldc_trans_cookie *cookies, int ncookies,
2128                   unsigned int map_perm)
2129{
2130        unsigned long npages, pa;
2131        struct ldc_mtable_entry *base;
2132        struct cookie_state state;
2133        struct ldc_iommu *iommu;
2134
2135        if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2136                return -EINVAL;
2137
2138        pa = __pa(buf);
2139        if ((pa | len) & (8UL - 1))
2140                return -EFAULT;
2141
2142        npages = pages_in_region(pa, len);
2143
2144        iommu = &lp->iommu;
2145
2146        base = alloc_npages(iommu, npages);
2147
2148        if (!base)
2149                return -ENOMEM;
2150
2151        state.page_table = iommu->page_table;
2152        state.cookies = cookies;
2153        state.mte_base = perm_to_mte(map_perm);
2154        state.prev_cookie = ~(u64)0;
2155        state.pte_idx = (base - iommu->page_table);
2156        state.nc = 0;
2157        fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2158        BUG_ON(state.nc > ncookies);
2159
2160        return state.nc;
2161}
2162EXPORT_SYMBOL(ldc_map_single);
2163
2164
2165static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2166                        u64 cookie, u64 size)
2167{
2168        unsigned long npages, entry;
2169
2170        npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2171
2172        entry = ldc_cookie_to_index(cookie, iommu);
2173        ldc_demap(iommu, id, cookie, entry, npages);
2174        iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
2175}
2176
2177void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2178               int ncookies)
2179{
2180        struct ldc_iommu *iommu = &lp->iommu;
2181        int i;
2182        unsigned long flags;
2183
2184        spin_lock_irqsave(&iommu->lock, flags);
2185        for (i = 0; i < ncookies; i++) {
2186                u64 addr = cookies[i].cookie_addr;
2187                u64 size = cookies[i].cookie_size;
2188
2189                free_npages(lp->id, iommu, addr, size);
2190        }
2191        spin_unlock_irqrestore(&iommu->lock, flags);
2192}
2193EXPORT_SYMBOL(ldc_unmap);
2194
2195int ldc_copy(struct ldc_channel *lp, int copy_dir,
2196             void *buf, unsigned int len, unsigned long offset,
2197             struct ldc_trans_cookie *cookies, int ncookies)
2198{
2199        unsigned int orig_len;
2200        unsigned long ra;
2201        int i;
2202
2203        if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2204                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2205                       lp->id, copy_dir);
2206                return -EINVAL;
2207        }
2208
2209        ra = __pa(buf);
2210        if ((ra | len | offset) & (8UL - 1)) {
2211                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2212                       "ra[%lx] len[%x] offset[%lx]\n",
2213                       lp->id, ra, len, offset);
2214                return -EFAULT;
2215        }
2216
2217        if (lp->hs_state != LDC_HS_COMPLETE ||
2218            (lp->flags & LDC_FLAG_RESET)) {
2219                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2220                       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2221                return -ECONNRESET;
2222        }
2223
2224        orig_len = len;
2225        for (i = 0; i < ncookies; i++) {
2226                unsigned long cookie_raddr = cookies[i].cookie_addr;
2227                unsigned long this_len = cookies[i].cookie_size;
2228                unsigned long actual_len;
2229
2230                if (unlikely(offset)) {
2231                        unsigned long this_off = offset;
2232
2233                        if (this_off > this_len)
2234                                this_off = this_len;
2235
2236                        offset -= this_off;
2237                        this_len -= this_off;
2238                        if (!this_len)
2239                                continue;
2240                        cookie_raddr += this_off;
2241                }
2242
2243                if (this_len > len)
2244                        this_len = len;
2245
2246                while (1) {
2247                        unsigned long hv_err;
2248
2249                        hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2250                                                cookie_raddr, ra,
2251                                                this_len, &actual_len);
2252                        if (unlikely(hv_err)) {
2253                                printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2254                                       "HV error %lu\n",
2255                                       lp->id, hv_err);
2256                                if (lp->hs_state != LDC_HS_COMPLETE ||
2257                                    (lp->flags & LDC_FLAG_RESET))
2258                                        return -ECONNRESET;
2259                                else
2260                                        return -EFAULT;
2261                        }
2262
2263                        cookie_raddr += actual_len;
2264                        ra += actual_len;
2265                        len -= actual_len;
2266                        if (actual_len == this_len)
2267                                break;
2268
2269                        this_len -= actual_len;
2270                }
2271
2272                if (!len)
2273                        break;
2274        }
2275
2276        /* It is caller policy what to do about short copies.
2277         * For example, a networking driver can declare the
2278         * packet a runt and drop it.
2279         */
2280
2281        return orig_len - len;
2282}
2283EXPORT_SYMBOL(ldc_copy);
2284
2285void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2286                          struct ldc_trans_cookie *cookies, int *ncookies,
2287                          unsigned int map_perm)
2288{
2289        void *buf;
2290        int err;
2291
2292        if (len & (8UL - 1))
2293                return ERR_PTR(-EINVAL);
2294
2295        buf = kzalloc(len, GFP_ATOMIC);
2296        if (!buf)
2297                return ERR_PTR(-ENOMEM);
2298
2299        err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2300        if (err < 0) {
2301                kfree(buf);
2302                return ERR_PTR(err);
2303        }
2304        *ncookies = err;
2305
2306        return buf;
2307}
2308EXPORT_SYMBOL(ldc_alloc_exp_dring);
2309
2310void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2311                        struct ldc_trans_cookie *cookies, int ncookies)
2312{
2313        ldc_unmap(lp, cookies, ncookies);
2314        kfree(buf);
2315}
2316EXPORT_SYMBOL(ldc_free_exp_dring);
2317
2318static int __init ldc_init(void)
2319{
2320        unsigned long major, minor;
2321        struct mdesc_handle *hp;
2322        const u64 *v;
2323        int err;
2324        u64 mp;
2325
2326        hp = mdesc_grab();
2327        if (!hp)
2328                return -ENODEV;
2329
2330        mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2331        err = -ENODEV;
2332        if (mp == MDESC_NODE_NULL)
2333                goto out;
2334
2335        v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2336        if (!v)
2337                goto out;
2338
2339        major = 1;
2340        minor = 0;
2341        if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2342                printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2343                goto out;
2344        }
2345
2346        printk(KERN_INFO "%s", version);
2347
2348        if (!*v) {
2349                printk(KERN_INFO PFX "Domaining disabled.\n");
2350                goto out;
2351        }
2352        ldom_domaining_enabled = 1;
2353        err = 0;
2354
2355out:
2356        mdesc_release(hp);
2357        return err;
2358}
2359
2360core_initcall(ldc_init);
2361