linux/arch/sparc/kernel/ldc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* ldc.c: Logical Domain Channel link-layer protocol driver.
   3 *
   4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   5 */
   6
   7#include <linux/kernel.h>
   8#include <linux/export.h>
   9#include <linux/slab.h>
  10#include <linux/spinlock.h>
  11#include <linux/delay.h>
  12#include <linux/errno.h>
  13#include <linux/string.h>
  14#include <linux/scatterlist.h>
  15#include <linux/interrupt.h>
  16#include <linux/list.h>
  17#include <linux/init.h>
  18#include <linux/bitmap.h>
  19#include <asm/iommu-common.h>
  20
  21#include <asm/hypervisor.h>
  22#include <asm/iommu.h>
  23#include <asm/page.h>
  24#include <asm/ldc.h>
  25#include <asm/mdesc.h>
  26
  27#define DRV_MODULE_NAME         "ldc"
  28#define PFX DRV_MODULE_NAME     ": "
  29#define DRV_MODULE_VERSION      "1.1"
  30#define DRV_MODULE_RELDATE      "July 22, 2008"
  31
  32#define COOKIE_PGSZ_CODE        0xf000000000000000ULL
  33#define COOKIE_PGSZ_CODE_SHIFT  60ULL
  34
  35
  36static char version[] =
  37        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  38
  39/* Packet header layout for unreliable and reliable mode frames.
  40 * When in RAW mode, packets are simply straight 64-byte payloads
  41 * with no headers.
  42 */
  43struct ldc_packet {
  44        u8                      type;
  45#define LDC_CTRL                0x01
  46#define LDC_DATA                0x02
  47#define LDC_ERR                 0x10
  48
  49        u8                      stype;
  50#define LDC_INFO                0x01
  51#define LDC_ACK                 0x02
  52#define LDC_NACK                0x04
  53
  54        u8                      ctrl;
  55#define LDC_VERS                0x01 /* Link Version            */
  56#define LDC_RTS                 0x02 /* Request To Send         */
  57#define LDC_RTR                 0x03 /* Ready To Receive        */
  58#define LDC_RDX                 0x04 /* Ready for Data eXchange */
  59#define LDC_CTRL_MSK            0x0f
  60
  61        u8                      env;
  62#define LDC_LEN                 0x3f
  63#define LDC_FRAG_MASK           0xc0
  64#define LDC_START               0x40
  65#define LDC_STOP                0x80
  66
  67        u32                     seqid;
  68
  69        union {
  70                u8              u_data[LDC_PACKET_SIZE - 8];
  71                struct {
  72                        u32     pad;
  73                        u32     ackid;
  74                        u8      r_data[LDC_PACKET_SIZE - 8 - 8];
  75                } r;
  76        } u;
  77};
  78
  79struct ldc_version {
  80        u16 major;
  81        u16 minor;
  82};
  83
  84/* Ordered from largest major to lowest.  */
  85static struct ldc_version ver_arr[] = {
  86        { .major = 1, .minor = 0 },
  87};
  88
  89#define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
  90#define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
  91
  92struct ldc_channel;
  93
  94struct ldc_mode_ops {
  95        int (*write)(struct ldc_channel *, const void *, unsigned int);
  96        int (*read)(struct ldc_channel *, void *, unsigned int);
  97};
  98
  99static const struct ldc_mode_ops raw_ops;
 100static const struct ldc_mode_ops nonraw_ops;
 101static const struct ldc_mode_ops stream_ops;
 102
 103int ldom_domaining_enabled;
 104
 105struct ldc_iommu {
 106        /* Protects ldc_unmap.  */
 107        spinlock_t                      lock;
 108        struct ldc_mtable_entry         *page_table;
 109        struct iommu_map_table          iommu_map_table;
 110};
 111
 112struct ldc_channel {
 113        /* Protects all operations that depend upon channel state.  */
 114        spinlock_t                      lock;
 115
 116        unsigned long                   id;
 117
 118        u8                              *mssbuf;
 119        u32                             mssbuf_len;
 120        u32                             mssbuf_off;
 121
 122        struct ldc_packet               *tx_base;
 123        unsigned long                   tx_head;
 124        unsigned long                   tx_tail;
 125        unsigned long                   tx_num_entries;
 126        unsigned long                   tx_ra;
 127
 128        unsigned long                   tx_acked;
 129
 130        struct ldc_packet               *rx_base;
 131        unsigned long                   rx_head;
 132        unsigned long                   rx_tail;
 133        unsigned long                   rx_num_entries;
 134        unsigned long                   rx_ra;
 135
 136        u32                             rcv_nxt;
 137        u32                             snd_nxt;
 138
 139        unsigned long                   chan_state;
 140
 141        struct ldc_channel_config       cfg;
 142        void                            *event_arg;
 143
 144        const struct ldc_mode_ops       *mops;
 145
 146        struct ldc_iommu                iommu;
 147
 148        struct ldc_version              ver;
 149
 150        u8                              hs_state;
 151#define LDC_HS_CLOSED                   0x00
 152#define LDC_HS_OPEN                     0x01
 153#define LDC_HS_GOTVERS                  0x02
 154#define LDC_HS_SENTRTR                  0x03
 155#define LDC_HS_GOTRTR                   0x04
 156#define LDC_HS_COMPLETE                 0x10
 157
 158        u8                              flags;
 159#define LDC_FLAG_ALLOCED_QUEUES         0x01
 160#define LDC_FLAG_REGISTERED_QUEUES      0x02
 161#define LDC_FLAG_REGISTERED_IRQS        0x04
 162#define LDC_FLAG_RESET                  0x10
 163
 164        u8                              mss;
 165        u8                              state;
 166
 167#define LDC_IRQ_NAME_MAX                32
 168        char                            rx_irq_name[LDC_IRQ_NAME_MAX];
 169        char                            tx_irq_name[LDC_IRQ_NAME_MAX];
 170
 171        struct hlist_head               mh_list;
 172
 173        struct hlist_node               list;
 174};
 175
 176#define ldcdbg(TYPE, f, a...) \
 177do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
 178                printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
 179} while (0)
 180
 181#define LDC_ABORT(lp)   ldc_abort((lp), __func__)
 182
 183static const char *state_to_str(u8 state)
 184{
 185        switch (state) {
 186        case LDC_STATE_INVALID:
 187                return "INVALID";
 188        case LDC_STATE_INIT:
 189                return "INIT";
 190        case LDC_STATE_BOUND:
 191                return "BOUND";
 192        case LDC_STATE_READY:
 193                return "READY";
 194        case LDC_STATE_CONNECTED:
 195                return "CONNECTED";
 196        default:
 197                return "<UNKNOWN>";
 198        }
 199}
 200
 201static unsigned long __advance(unsigned long off, unsigned long num_entries)
 202{
 203        off += LDC_PACKET_SIZE;
 204        if (off == (num_entries * LDC_PACKET_SIZE))
 205                off = 0;
 206
 207        return off;
 208}
 209
 210static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
 211{
 212        return __advance(off, lp->rx_num_entries);
 213}
 214
 215static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
 216{
 217        return __advance(off, lp->tx_num_entries);
 218}
 219
 220static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
 221                                                  unsigned long *new_tail)
 222{
 223        struct ldc_packet *p;
 224        unsigned long t;
 225
 226        t = tx_advance(lp, lp->tx_tail);
 227        if (t == lp->tx_head)
 228                return NULL;
 229
 230        *new_tail = t;
 231
 232        p = lp->tx_base;
 233        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 234}
 235
 236/* When we are in reliable or stream mode, have to track the next packet
 237 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
 238 * to be careful not to stomp over the queue past that point.  During
 239 * the handshake, we don't have TX data packets pending in the queue
 240 * and that's why handshake_get_tx_packet() need not be mindful of
 241 * lp->tx_acked.
 242 */
 243static unsigned long head_for_data(struct ldc_channel *lp)
 244{
 245        if (lp->cfg.mode == LDC_MODE_STREAM)
 246                return lp->tx_acked;
 247        return lp->tx_head;
 248}
 249
 250static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
 251{
 252        unsigned long limit, tail, new_tail, diff;
 253        unsigned int mss;
 254
 255        limit = head_for_data(lp);
 256        tail = lp->tx_tail;
 257        new_tail = tx_advance(lp, tail);
 258        if (new_tail == limit)
 259                return 0;
 260
 261        if (limit > new_tail)
 262                diff = limit - new_tail;
 263        else
 264                diff = (limit +
 265                        ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
 266        diff /= LDC_PACKET_SIZE;
 267        mss = lp->mss;
 268
 269        if (diff * mss < size)
 270                return 0;
 271
 272        return 1;
 273}
 274
 275static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
 276                                             unsigned long *new_tail)
 277{
 278        struct ldc_packet *p;
 279        unsigned long h, t;
 280
 281        h = head_for_data(lp);
 282        t = tx_advance(lp, lp->tx_tail);
 283        if (t == h)
 284                return NULL;
 285
 286        *new_tail = t;
 287
 288        p = lp->tx_base;
 289        return p + (lp->tx_tail / LDC_PACKET_SIZE);
 290}
 291
 292static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
 293{
 294        unsigned long orig_tail = lp->tx_tail;
 295        int limit = 1000;
 296
 297        lp->tx_tail = tail;
 298        while (limit-- > 0) {
 299                unsigned long err;
 300
 301                err = sun4v_ldc_tx_set_qtail(lp->id, tail);
 302                if (!err)
 303                        return 0;
 304
 305                if (err != HV_EWOULDBLOCK) {
 306                        lp->tx_tail = orig_tail;
 307                        return -EINVAL;
 308                }
 309                udelay(1);
 310        }
 311
 312        lp->tx_tail = orig_tail;
 313        return -EBUSY;
 314}
 315
 316/* This just updates the head value in the hypervisor using
 317 * a polling loop with a timeout.  The caller takes care of
 318 * upating software state representing the head change, if any.
 319 */
 320static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
 321{
 322        int limit = 1000;
 323
 324        while (limit-- > 0) {
 325                unsigned long err;
 326
 327                err = sun4v_ldc_rx_set_qhead(lp->id, head);
 328                if (!err)
 329                        return 0;
 330
 331                if (err != HV_EWOULDBLOCK)
 332                        return -EINVAL;
 333
 334                udelay(1);
 335        }
 336
 337        return -EBUSY;
 338}
 339
 340static int send_tx_packet(struct ldc_channel *lp,
 341                          struct ldc_packet *p,
 342                          unsigned long new_tail)
 343{
 344        BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
 345
 346        return set_tx_tail(lp, new_tail);
 347}
 348
 349static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
 350                                                 u8 stype, u8 ctrl,
 351                                                 void *data, int dlen,
 352                                                 unsigned long *new_tail)
 353{
 354        struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
 355
 356        if (p) {
 357                memset(p, 0, sizeof(*p));
 358                p->type = LDC_CTRL;
 359                p->stype = stype;
 360                p->ctrl = ctrl;
 361                if (data)
 362                        memcpy(p->u.u_data, data, dlen);
 363        }
 364        return p;
 365}
 366
 367static int start_handshake(struct ldc_channel *lp)
 368{
 369        struct ldc_packet *p;
 370        struct ldc_version *ver;
 371        unsigned long new_tail;
 372
 373        ver = &ver_arr[0];
 374
 375        ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
 376               ver->major, ver->minor);
 377
 378        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 379                                   ver, sizeof(*ver), &new_tail);
 380        if (p) {
 381                int err = send_tx_packet(lp, p, new_tail);
 382                if (!err)
 383                        lp->flags &= ~LDC_FLAG_RESET;
 384                return err;
 385        }
 386        return -EBUSY;
 387}
 388
 389static int send_version_nack(struct ldc_channel *lp,
 390                             u16 major, u16 minor)
 391{
 392        struct ldc_packet *p;
 393        struct ldc_version ver;
 394        unsigned long new_tail;
 395
 396        ver.major = major;
 397        ver.minor = minor;
 398
 399        p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
 400                                   &ver, sizeof(ver), &new_tail);
 401        if (p) {
 402                ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
 403                       ver.major, ver.minor);
 404
 405                return send_tx_packet(lp, p, new_tail);
 406        }
 407        return -EBUSY;
 408}
 409
 410static int send_version_ack(struct ldc_channel *lp,
 411                            struct ldc_version *vp)
 412{
 413        struct ldc_packet *p;
 414        unsigned long new_tail;
 415
 416        p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
 417                                   vp, sizeof(*vp), &new_tail);
 418        if (p) {
 419                ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
 420                       vp->major, vp->minor);
 421
 422                return send_tx_packet(lp, p, new_tail);
 423        }
 424        return -EBUSY;
 425}
 426
 427static int send_rts(struct ldc_channel *lp)
 428{
 429        struct ldc_packet *p;
 430        unsigned long new_tail;
 431
 432        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
 433                                   &new_tail);
 434        if (p) {
 435                p->env = lp->cfg.mode;
 436                p->seqid = 0;
 437                lp->rcv_nxt = 0;
 438
 439                ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
 440                       p->env, p->seqid);
 441
 442                return send_tx_packet(lp, p, new_tail);
 443        }
 444        return -EBUSY;
 445}
 446
 447static int send_rtr(struct ldc_channel *lp)
 448{
 449        struct ldc_packet *p;
 450        unsigned long new_tail;
 451
 452        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
 453                                   &new_tail);
 454        if (p) {
 455                p->env = lp->cfg.mode;
 456                p->seqid = 0;
 457
 458                ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
 459                       p->env, p->seqid);
 460
 461                return send_tx_packet(lp, p, new_tail);
 462        }
 463        return -EBUSY;
 464}
 465
 466static int send_rdx(struct ldc_channel *lp)
 467{
 468        struct ldc_packet *p;
 469        unsigned long new_tail;
 470
 471        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
 472                                   &new_tail);
 473        if (p) {
 474                p->env = 0;
 475                p->seqid = ++lp->snd_nxt;
 476                p->u.r.ackid = lp->rcv_nxt;
 477
 478                ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
 479                       p->env, p->seqid, p->u.r.ackid);
 480
 481                return send_tx_packet(lp, p, new_tail);
 482        }
 483        return -EBUSY;
 484}
 485
 486static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
 487{
 488        struct ldc_packet *p;
 489        unsigned long new_tail;
 490        int err;
 491
 492        p = data_get_tx_packet(lp, &new_tail);
 493        if (!p)
 494                return -EBUSY;
 495        memset(p, 0, sizeof(*p));
 496        p->type = data_pkt->type;
 497        p->stype = LDC_NACK;
 498        p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
 499        p->seqid = lp->snd_nxt + 1;
 500        p->u.r.ackid = lp->rcv_nxt;
 501
 502        ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
 503               p->type, p->ctrl, p->seqid, p->u.r.ackid);
 504
 505        err = send_tx_packet(lp, p, new_tail);
 506        if (!err)
 507                lp->snd_nxt++;
 508
 509        return err;
 510}
 511
 512static int ldc_abort(struct ldc_channel *lp, const char *msg)
 513{
 514        unsigned long hv_err;
 515
 516        ldcdbg(STATE, "ABORT[%s]\n", msg);
 517        ldc_print(lp);
 518
 519        /* We report but do not act upon the hypervisor errors because
 520         * there really isn't much we can do if they fail at this point.
 521         */
 522        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
 523        if (hv_err)
 524                printk(KERN_ERR PFX "ldc_abort: "
 525                       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 526                       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
 527
 528        hv_err = sun4v_ldc_tx_get_state(lp->id,
 529                                        &lp->tx_head,
 530                                        &lp->tx_tail,
 531                                        &lp->chan_state);
 532        if (hv_err)
 533                printk(KERN_ERR PFX "ldc_abort: "
 534                       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
 535                       lp->id, hv_err);
 536
 537        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
 538        if (hv_err)
 539                printk(KERN_ERR PFX "ldc_abort: "
 540                       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
 541                       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
 542
 543        /* Refetch the RX queue state as well, because we could be invoked
 544         * here in the queue processing context.
 545         */
 546        hv_err = sun4v_ldc_rx_get_state(lp->id,
 547                                        &lp->rx_head,
 548                                        &lp->rx_tail,
 549                                        &lp->chan_state);
 550        if (hv_err)
 551                printk(KERN_ERR PFX "ldc_abort: "
 552                       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
 553                       lp->id, hv_err);
 554
 555        return -ECONNRESET;
 556}
 557
 558static struct ldc_version *find_by_major(u16 major)
 559{
 560        struct ldc_version *ret = NULL;
 561        int i;
 562
 563        for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
 564                struct ldc_version *v = &ver_arr[i];
 565                if (v->major <= major) {
 566                        ret = v;
 567                        break;
 568                }
 569        }
 570        return ret;
 571}
 572
 573static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
 574{
 575        struct ldc_version *vap;
 576        int err;
 577
 578        ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
 579               vp->major, vp->minor);
 580
 581        if (lp->hs_state == LDC_HS_GOTVERS) {
 582                lp->hs_state = LDC_HS_OPEN;
 583                memset(&lp->ver, 0, sizeof(lp->ver));
 584        }
 585
 586        vap = find_by_major(vp->major);
 587        if (!vap) {
 588                err = send_version_nack(lp, 0, 0);
 589        } else if (vap->major != vp->major) {
 590                err = send_version_nack(lp, vap->major, vap->minor);
 591        } else {
 592                struct ldc_version ver = *vp;
 593                if (ver.minor > vap->minor)
 594                        ver.minor = vap->minor;
 595                err = send_version_ack(lp, &ver);
 596                if (!err) {
 597                        lp->ver = ver;
 598                        lp->hs_state = LDC_HS_GOTVERS;
 599                }
 600        }
 601        if (err)
 602                return LDC_ABORT(lp);
 603
 604        return 0;
 605}
 606
 607static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
 608{
 609        ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
 610               vp->major, vp->minor);
 611
 612        if (lp->hs_state == LDC_HS_GOTVERS) {
 613                if (lp->ver.major != vp->major ||
 614                    lp->ver.minor != vp->minor)
 615                        return LDC_ABORT(lp);
 616        } else {
 617                lp->ver = *vp;
 618                lp->hs_state = LDC_HS_GOTVERS;
 619        }
 620        if (send_rts(lp))
 621                return LDC_ABORT(lp);
 622        return 0;
 623}
 624
 625static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
 626{
 627        struct ldc_version *vap;
 628        struct ldc_packet *p;
 629        unsigned long new_tail;
 630
 631        if (vp->major == 0 && vp->minor == 0)
 632                return LDC_ABORT(lp);
 633
 634        vap = find_by_major(vp->major);
 635        if (!vap)
 636                return LDC_ABORT(lp);
 637
 638        p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
 639                                           vap, sizeof(*vap),
 640                                           &new_tail);
 641        if (!p)
 642                return LDC_ABORT(lp);
 643
 644        return send_tx_packet(lp, p, new_tail);
 645}
 646
 647static int process_version(struct ldc_channel *lp,
 648                           struct ldc_packet *p)
 649{
 650        struct ldc_version *vp;
 651
 652        vp = (struct ldc_version *) p->u.u_data;
 653
 654        switch (p->stype) {
 655        case LDC_INFO:
 656                return process_ver_info(lp, vp);
 657
 658        case LDC_ACK:
 659                return process_ver_ack(lp, vp);
 660
 661        case LDC_NACK:
 662                return process_ver_nack(lp, vp);
 663
 664        default:
 665                return LDC_ABORT(lp);
 666        }
 667}
 668
 669static int process_rts(struct ldc_channel *lp,
 670                       struct ldc_packet *p)
 671{
 672        ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
 673               p->stype, p->seqid, p->env);
 674
 675        if (p->stype     != LDC_INFO       ||
 676            lp->hs_state != LDC_HS_GOTVERS ||
 677            p->env       != lp->cfg.mode)
 678                return LDC_ABORT(lp);
 679
 680        lp->snd_nxt = p->seqid;
 681        lp->rcv_nxt = p->seqid;
 682        lp->hs_state = LDC_HS_SENTRTR;
 683        if (send_rtr(lp))
 684                return LDC_ABORT(lp);
 685
 686        return 0;
 687}
 688
 689static int process_rtr(struct ldc_channel *lp,
 690                       struct ldc_packet *p)
 691{
 692        ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
 693               p->stype, p->seqid, p->env);
 694
 695        if (p->stype     != LDC_INFO ||
 696            p->env       != lp->cfg.mode)
 697                return LDC_ABORT(lp);
 698
 699        lp->snd_nxt = p->seqid;
 700        lp->hs_state = LDC_HS_COMPLETE;
 701        ldc_set_state(lp, LDC_STATE_CONNECTED);
 702        send_rdx(lp);
 703
 704        return LDC_EVENT_UP;
 705}
 706
 707static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
 708{
 709        return lp->rcv_nxt + 1 == seqid;
 710}
 711
 712static int process_rdx(struct ldc_channel *lp,
 713                       struct ldc_packet *p)
 714{
 715        ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
 716               p->stype, p->seqid, p->env, p->u.r.ackid);
 717
 718        if (p->stype != LDC_INFO ||
 719            !(rx_seq_ok(lp, p->seqid)))
 720                return LDC_ABORT(lp);
 721
 722        lp->rcv_nxt = p->seqid;
 723
 724        lp->hs_state = LDC_HS_COMPLETE;
 725        ldc_set_state(lp, LDC_STATE_CONNECTED);
 726
 727        return LDC_EVENT_UP;
 728}
 729
 730static int process_control_frame(struct ldc_channel *lp,
 731                                 struct ldc_packet *p)
 732{
 733        switch (p->ctrl) {
 734        case LDC_VERS:
 735                return process_version(lp, p);
 736
 737        case LDC_RTS:
 738                return process_rts(lp, p);
 739
 740        case LDC_RTR:
 741                return process_rtr(lp, p);
 742
 743        case LDC_RDX:
 744                return process_rdx(lp, p);
 745
 746        default:
 747                return LDC_ABORT(lp);
 748        }
 749}
 750
 751static int process_error_frame(struct ldc_channel *lp,
 752                               struct ldc_packet *p)
 753{
 754        return LDC_ABORT(lp);
 755}
 756
 757static int process_data_ack(struct ldc_channel *lp,
 758                            struct ldc_packet *ack)
 759{
 760        unsigned long head = lp->tx_acked;
 761        u32 ackid = ack->u.r.ackid;
 762
 763        while (1) {
 764                struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
 765
 766                head = tx_advance(lp, head);
 767
 768                if (p->seqid == ackid) {
 769                        lp->tx_acked = head;
 770                        return 0;
 771                }
 772                if (head == lp->tx_tail)
 773                        return LDC_ABORT(lp);
 774        }
 775
 776        return 0;
 777}
 778
 779static void send_events(struct ldc_channel *lp, unsigned int event_mask)
 780{
 781        if (event_mask & LDC_EVENT_RESET)
 782                lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
 783        if (event_mask & LDC_EVENT_UP)
 784                lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
 785        if (event_mask & LDC_EVENT_DATA_READY)
 786                lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
 787}
 788
 789static irqreturn_t ldc_rx(int irq, void *dev_id)
 790{
 791        struct ldc_channel *lp = dev_id;
 792        unsigned long orig_state, flags;
 793        unsigned int event_mask;
 794
 795        spin_lock_irqsave(&lp->lock, flags);
 796
 797        orig_state = lp->chan_state;
 798
 799        /* We should probably check for hypervisor errors here and
 800         * reset the LDC channel if we get one.
 801         */
 802        sun4v_ldc_rx_get_state(lp->id,
 803                               &lp->rx_head,
 804                               &lp->rx_tail,
 805                               &lp->chan_state);
 806
 807        ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 808               orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
 809
 810        event_mask = 0;
 811
 812        if (lp->cfg.mode == LDC_MODE_RAW &&
 813            lp->chan_state == LDC_CHANNEL_UP) {
 814                lp->hs_state = LDC_HS_COMPLETE;
 815                ldc_set_state(lp, LDC_STATE_CONNECTED);
 816
 817                /*
 818                 * Generate an LDC_EVENT_UP event if the channel
 819                 * was not already up.
 820                 */
 821                if (orig_state != LDC_CHANNEL_UP) {
 822                        event_mask |= LDC_EVENT_UP;
 823                        orig_state = lp->chan_state;
 824                }
 825        }
 826
 827        /* If we are in reset state, flush the RX queue and ignore
 828         * everything.
 829         */
 830        if (lp->flags & LDC_FLAG_RESET) {
 831                (void) ldc_rx_reset(lp);
 832                goto out;
 833        }
 834
 835        /* Once we finish the handshake, we let the ldc_read()
 836         * paths do all of the control frame and state management.
 837         * Just trigger the callback.
 838         */
 839        if (lp->hs_state == LDC_HS_COMPLETE) {
 840handshake_complete:
 841                if (lp->chan_state != orig_state) {
 842                        unsigned int event = LDC_EVENT_RESET;
 843
 844                        if (lp->chan_state == LDC_CHANNEL_UP)
 845                                event = LDC_EVENT_UP;
 846
 847                        event_mask |= event;
 848                }
 849                if (lp->rx_head != lp->rx_tail)
 850                        event_mask |= LDC_EVENT_DATA_READY;
 851
 852                goto out;
 853        }
 854
 855        if (lp->chan_state != orig_state)
 856                goto out;
 857
 858        while (lp->rx_head != lp->rx_tail) {
 859                struct ldc_packet *p;
 860                unsigned long new;
 861                int err;
 862
 863                p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
 864
 865                switch (p->type) {
 866                case LDC_CTRL:
 867                        err = process_control_frame(lp, p);
 868                        if (err > 0)
 869                                event_mask |= err;
 870                        break;
 871
 872                case LDC_DATA:
 873                        event_mask |= LDC_EVENT_DATA_READY;
 874                        err = 0;
 875                        break;
 876
 877                case LDC_ERR:
 878                        err = process_error_frame(lp, p);
 879                        break;
 880
 881                default:
 882                        err = LDC_ABORT(lp);
 883                        break;
 884                }
 885
 886                if (err < 0)
 887                        break;
 888
 889                new = lp->rx_head;
 890                new += LDC_PACKET_SIZE;
 891                if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
 892                        new = 0;
 893                lp->rx_head = new;
 894
 895                err = __set_rx_head(lp, new);
 896                if (err < 0) {
 897                        (void) LDC_ABORT(lp);
 898                        break;
 899                }
 900                if (lp->hs_state == LDC_HS_COMPLETE)
 901                        goto handshake_complete;
 902        }
 903
 904out:
 905        spin_unlock_irqrestore(&lp->lock, flags);
 906
 907        send_events(lp, event_mask);
 908
 909        return IRQ_HANDLED;
 910}
 911
 912static irqreturn_t ldc_tx(int irq, void *dev_id)
 913{
 914        struct ldc_channel *lp = dev_id;
 915        unsigned long flags, orig_state;
 916        unsigned int event_mask = 0;
 917
 918        spin_lock_irqsave(&lp->lock, flags);
 919
 920        orig_state = lp->chan_state;
 921
 922        /* We should probably check for hypervisor errors here and
 923         * reset the LDC channel if we get one.
 924         */
 925        sun4v_ldc_tx_get_state(lp->id,
 926                               &lp->tx_head,
 927                               &lp->tx_tail,
 928                               &lp->chan_state);
 929
 930        ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
 931               orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
 932
 933        if (lp->cfg.mode == LDC_MODE_RAW &&
 934            lp->chan_state == LDC_CHANNEL_UP) {
 935                lp->hs_state = LDC_HS_COMPLETE;
 936                ldc_set_state(lp, LDC_STATE_CONNECTED);
 937
 938                /*
 939                 * Generate an LDC_EVENT_UP event if the channel
 940                 * was not already up.
 941                 */
 942                if (orig_state != LDC_CHANNEL_UP) {
 943                        event_mask |= LDC_EVENT_UP;
 944                        orig_state = lp->chan_state;
 945                }
 946        }
 947
 948        spin_unlock_irqrestore(&lp->lock, flags);
 949
 950        send_events(lp, event_mask);
 951
 952        return IRQ_HANDLED;
 953}
 954
 955/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
 956 * XXX that addition and removal from the ldc_channel_list has
 957 * XXX atomicity, otherwise the __ldc_channel_exists() check is
 958 * XXX totally pointless as another thread can slip into ldc_alloc()
 959 * XXX and add a channel with the same ID.  There also needs to be
 960 * XXX a spinlock for ldc_channel_list.
 961 */
 962static HLIST_HEAD(ldc_channel_list);
 963
 964static int __ldc_channel_exists(unsigned long id)
 965{
 966        struct ldc_channel *lp;
 967
 968        hlist_for_each_entry(lp, &ldc_channel_list, list) {
 969                if (lp->id == id)
 970                        return 1;
 971        }
 972        return 0;
 973}
 974
 975static int alloc_queue(const char *name, unsigned long num_entries,
 976                       struct ldc_packet **base, unsigned long *ra)
 977{
 978        unsigned long size, order;
 979        void *q;
 980
 981        size = num_entries * LDC_PACKET_SIZE;
 982        order = get_order(size);
 983
 984        q = (void *) __get_free_pages(GFP_KERNEL, order);
 985        if (!q) {
 986                printk(KERN_ERR PFX "Alloc of %s queue failed with "
 987                       "size=%lu order=%lu\n", name, size, order);
 988                return -ENOMEM;
 989        }
 990
 991        memset(q, 0, PAGE_SIZE << order);
 992
 993        *base = q;
 994        *ra = __pa(q);
 995
 996        return 0;
 997}
 998
 999static void free_queue(unsigned long num_entries, struct ldc_packet *q)
1000{
1001        unsigned long size, order;
1002
1003        if (!q)
1004                return;
1005
1006        size = num_entries * LDC_PACKET_SIZE;
1007        order = get_order(size);
1008
1009        free_pages((unsigned long)q, order);
1010}
1011
1012static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1013{
1014        u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1015        /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1016
1017        cookie &= ~COOKIE_PGSZ_CODE;
1018
1019        return (cookie >> (13ULL + (szcode * 3ULL)));
1020}
1021
1022static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1023                      unsigned long entry, unsigned long npages)
1024{
1025        struct ldc_mtable_entry *base;
1026        unsigned long i, shift;
1027
1028        shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1029        base = iommu->page_table + entry;
1030        for (i = 0; i < npages; i++) {
1031                if (base->cookie)
1032                        sun4v_ldc_revoke(id, cookie + (i << shift),
1033                                         base->cookie);
1034                base->mte = 0;
1035        }
1036}
1037
1038/* XXX Make this configurable... XXX */
1039#define LDC_IOTABLE_SIZE        (8 * 1024)
1040
1041static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1042{
1043        unsigned long sz, num_tsb_entries, tsbsize, order;
1044        struct ldc_iommu *ldc_iommu = &lp->iommu;
1045        struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1046        struct ldc_mtable_entry *table;
1047        unsigned long hv_err;
1048        int err;
1049
1050        num_tsb_entries = LDC_IOTABLE_SIZE;
1051        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1052        spin_lock_init(&ldc_iommu->lock);
1053
1054        sz = num_tsb_entries / 8;
1055        sz = (sz + 7UL) & ~7UL;
1056        iommu->map = kzalloc(sz, GFP_KERNEL);
1057        if (!iommu->map) {
1058                printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1059                return -ENOMEM;
1060        }
1061        iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1062                            NULL, false /* no large pool */,
1063                            1 /* npools */,
1064                            true /* skip span boundary check */);
1065
1066        order = get_order(tsbsize);
1067
1068        table = (struct ldc_mtable_entry *)
1069                __get_free_pages(GFP_KERNEL, order);
1070        err = -ENOMEM;
1071        if (!table) {
1072                printk(KERN_ERR PFX "Alloc of MTE table failed, "
1073                       "size=%lu order=%lu\n", tsbsize, order);
1074                goto out_free_map;
1075        }
1076
1077        memset(table, 0, PAGE_SIZE << order);
1078
1079        ldc_iommu->page_table = table;
1080
1081        hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1082                                         num_tsb_entries);
1083        err = -EINVAL;
1084        if (hv_err)
1085                goto out_free_table;
1086
1087        return 0;
1088
1089out_free_table:
1090        free_pages((unsigned long) table, order);
1091        ldc_iommu->page_table = NULL;
1092
1093out_free_map:
1094        kfree(iommu->map);
1095        iommu->map = NULL;
1096
1097        return err;
1098}
1099
1100static void ldc_iommu_release(struct ldc_channel *lp)
1101{
1102        struct ldc_iommu *ldc_iommu = &lp->iommu;
1103        struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1104        unsigned long num_tsb_entries, tsbsize, order;
1105
1106        (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1107
1108        num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1109        tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1110        order = get_order(tsbsize);
1111
1112        free_pages((unsigned long) ldc_iommu->page_table, order);
1113        ldc_iommu->page_table = NULL;
1114
1115        kfree(iommu->map);
1116        iommu->map = NULL;
1117}
1118
1119struct ldc_channel *ldc_alloc(unsigned long id,
1120                              const struct ldc_channel_config *cfgp,
1121                              void *event_arg,
1122                              const char *name)
1123{
1124        struct ldc_channel *lp;
1125        const struct ldc_mode_ops *mops;
1126        unsigned long dummy1, dummy2, hv_err;
1127        u8 mss, *mssbuf;
1128        int err;
1129
1130        err = -ENODEV;
1131        if (!ldom_domaining_enabled)
1132                goto out_err;
1133
1134        err = -EINVAL;
1135        if (!cfgp)
1136                goto out_err;
1137        if (!name)
1138                goto out_err;
1139
1140        switch (cfgp->mode) {
1141        case LDC_MODE_RAW:
1142                mops = &raw_ops;
1143                mss = LDC_PACKET_SIZE;
1144                break;
1145
1146        case LDC_MODE_UNRELIABLE:
1147                mops = &nonraw_ops;
1148                mss = LDC_PACKET_SIZE - 8;
1149                break;
1150
1151        case LDC_MODE_STREAM:
1152                mops = &stream_ops;
1153                mss = LDC_PACKET_SIZE - 8 - 8;
1154                break;
1155
1156        default:
1157                goto out_err;
1158        }
1159
1160        if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1161                goto out_err;
1162
1163        hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1164        err = -ENODEV;
1165        if (hv_err == HV_ECHANNEL)
1166                goto out_err;
1167
1168        err = -EEXIST;
1169        if (__ldc_channel_exists(id))
1170                goto out_err;
1171
1172        mssbuf = NULL;
1173
1174        lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1175        err = -ENOMEM;
1176        if (!lp)
1177                goto out_err;
1178
1179        spin_lock_init(&lp->lock);
1180
1181        lp->id = id;
1182
1183        err = ldc_iommu_init(name, lp);
1184        if (err)
1185                goto out_free_ldc;
1186
1187        lp->mops = mops;
1188        lp->mss = mss;
1189
1190        lp->cfg = *cfgp;
1191        if (!lp->cfg.mtu)
1192                lp->cfg.mtu = LDC_DEFAULT_MTU;
1193
1194        if (lp->cfg.mode == LDC_MODE_STREAM) {
1195                mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1196                if (!mssbuf) {
1197                        err = -ENOMEM;
1198                        goto out_free_iommu;
1199                }
1200                lp->mssbuf = mssbuf;
1201        }
1202
1203        lp->event_arg = event_arg;
1204
1205        /* XXX allow setting via ldc_channel_config to override defaults
1206         * XXX or use some formula based upon mtu
1207         */
1208        lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1209        lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1210
1211        err = alloc_queue("TX", lp->tx_num_entries,
1212                          &lp->tx_base, &lp->tx_ra);
1213        if (err)
1214                goto out_free_mssbuf;
1215
1216        err = alloc_queue("RX", lp->rx_num_entries,
1217                          &lp->rx_base, &lp->rx_ra);
1218        if (err)
1219                goto out_free_txq;
1220
1221        lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1222
1223        lp->hs_state = LDC_HS_CLOSED;
1224        ldc_set_state(lp, LDC_STATE_INIT);
1225
1226        INIT_HLIST_NODE(&lp->list);
1227        hlist_add_head(&lp->list, &ldc_channel_list);
1228
1229        INIT_HLIST_HEAD(&lp->mh_list);
1230
1231        snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1232        snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1233
1234        err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1235                          lp->rx_irq_name, lp);
1236        if (err)
1237                goto out_free_txq;
1238
1239        err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1240                          lp->tx_irq_name, lp);
1241        if (err) {
1242                free_irq(lp->cfg.rx_irq, lp);
1243                goto out_free_txq;
1244        }
1245
1246        return lp;
1247
1248out_free_txq:
1249        free_queue(lp->tx_num_entries, lp->tx_base);
1250
1251out_free_mssbuf:
1252        kfree(mssbuf);
1253
1254out_free_iommu:
1255        ldc_iommu_release(lp);
1256
1257out_free_ldc:
1258        kfree(lp);
1259
1260out_err:
1261        return ERR_PTR(err);
1262}
1263EXPORT_SYMBOL(ldc_alloc);
1264
1265void ldc_unbind(struct ldc_channel *lp)
1266{
1267        if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1268                free_irq(lp->cfg.rx_irq, lp);
1269                free_irq(lp->cfg.tx_irq, lp);
1270                lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1271        }
1272
1273        if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1274                sun4v_ldc_tx_qconf(lp->id, 0, 0);
1275                sun4v_ldc_rx_qconf(lp->id, 0, 0);
1276                lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1277        }
1278        if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1279                free_queue(lp->tx_num_entries, lp->tx_base);
1280                free_queue(lp->rx_num_entries, lp->rx_base);
1281                lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1282        }
1283
1284        ldc_set_state(lp, LDC_STATE_INIT);
1285}
1286EXPORT_SYMBOL(ldc_unbind);
1287
1288void ldc_free(struct ldc_channel *lp)
1289{
1290        ldc_unbind(lp);
1291        hlist_del(&lp->list);
1292        kfree(lp->mssbuf);
1293        ldc_iommu_release(lp);
1294
1295        kfree(lp);
1296}
1297EXPORT_SYMBOL(ldc_free);
1298
1299/* Bind the channel.  This registers the LDC queues with
1300 * the hypervisor and puts the channel into a pseudo-listening
1301 * state.  This does not initiate a handshake, ldc_connect() does
1302 * that.
1303 */
1304int ldc_bind(struct ldc_channel *lp)
1305{
1306        unsigned long hv_err, flags;
1307        int err = -EINVAL;
1308
1309        if (lp->state != LDC_STATE_INIT)
1310                return -EINVAL;
1311
1312        spin_lock_irqsave(&lp->lock, flags);
1313
1314        enable_irq(lp->cfg.rx_irq);
1315        enable_irq(lp->cfg.tx_irq);
1316
1317        lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1318
1319        err = -ENODEV;
1320        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1321        if (hv_err)
1322                goto out_free_irqs;
1323
1324        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1325        if (hv_err)
1326                goto out_free_irqs;
1327
1328        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1329        if (hv_err)
1330                goto out_unmap_tx;
1331
1332        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1333        if (hv_err)
1334                goto out_unmap_tx;
1335
1336        lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1337
1338        hv_err = sun4v_ldc_tx_get_state(lp->id,
1339                                        &lp->tx_head,
1340                                        &lp->tx_tail,
1341                                        &lp->chan_state);
1342        err = -EBUSY;
1343        if (hv_err)
1344                goto out_unmap_rx;
1345
1346        lp->tx_acked = lp->tx_head;
1347
1348        lp->hs_state = LDC_HS_OPEN;
1349        ldc_set_state(lp, LDC_STATE_BOUND);
1350
1351        if (lp->cfg.mode == LDC_MODE_RAW) {
1352                /*
1353                 * There is no handshake in RAW mode, so handshake
1354                 * is completed.
1355                 */
1356                lp->hs_state = LDC_HS_COMPLETE;
1357        }
1358
1359        spin_unlock_irqrestore(&lp->lock, flags);
1360
1361        return 0;
1362
1363out_unmap_rx:
1364        lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1365        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1366
1367out_unmap_tx:
1368        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1369
1370out_free_irqs:
1371        lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1372        free_irq(lp->cfg.tx_irq, lp);
1373        free_irq(lp->cfg.rx_irq, lp);
1374
1375        spin_unlock_irqrestore(&lp->lock, flags);
1376
1377        return err;
1378}
1379EXPORT_SYMBOL(ldc_bind);
1380
1381int ldc_connect(struct ldc_channel *lp)
1382{
1383        unsigned long flags;
1384        int err;
1385
1386        if (lp->cfg.mode == LDC_MODE_RAW)
1387                return -EINVAL;
1388
1389        spin_lock_irqsave(&lp->lock, flags);
1390
1391        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1392            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1393            lp->hs_state != LDC_HS_OPEN)
1394                err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1395        else
1396                err = start_handshake(lp);
1397
1398        spin_unlock_irqrestore(&lp->lock, flags);
1399
1400        return err;
1401}
1402EXPORT_SYMBOL(ldc_connect);
1403
1404int ldc_disconnect(struct ldc_channel *lp)
1405{
1406        unsigned long hv_err, flags;
1407        int err;
1408
1409        if (lp->cfg.mode == LDC_MODE_RAW)
1410                return -EINVAL;
1411
1412        if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1413            !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1414                return -EINVAL;
1415
1416        spin_lock_irqsave(&lp->lock, flags);
1417
1418        err = -ENODEV;
1419        hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1420        if (hv_err)
1421                goto out_err;
1422
1423        hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1424        if (hv_err)
1425                goto out_err;
1426
1427        hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1428        if (hv_err)
1429                goto out_err;
1430
1431        hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1432        if (hv_err)
1433                goto out_err;
1434
1435        ldc_set_state(lp, LDC_STATE_BOUND);
1436        lp->hs_state = LDC_HS_OPEN;
1437        lp->flags |= LDC_FLAG_RESET;
1438
1439        spin_unlock_irqrestore(&lp->lock, flags);
1440
1441        return 0;
1442
1443out_err:
1444        sun4v_ldc_tx_qconf(lp->id, 0, 0);
1445        sun4v_ldc_rx_qconf(lp->id, 0, 0);
1446        free_irq(lp->cfg.tx_irq, lp);
1447        free_irq(lp->cfg.rx_irq, lp);
1448        lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1449                       LDC_FLAG_REGISTERED_QUEUES);
1450        ldc_set_state(lp, LDC_STATE_INIT);
1451
1452        spin_unlock_irqrestore(&lp->lock, flags);
1453
1454        return err;
1455}
1456EXPORT_SYMBOL(ldc_disconnect);
1457
1458int ldc_state(struct ldc_channel *lp)
1459{
1460        return lp->state;
1461}
1462EXPORT_SYMBOL(ldc_state);
1463
1464void ldc_set_state(struct ldc_channel *lp, u8 state)
1465{
1466        ldcdbg(STATE, "STATE (%s) --> (%s)\n",
1467               state_to_str(lp->state),
1468               state_to_str(state));
1469
1470        lp->state = state;
1471}
1472EXPORT_SYMBOL(ldc_set_state);
1473
1474int ldc_mode(struct ldc_channel *lp)
1475{
1476        return lp->cfg.mode;
1477}
1478EXPORT_SYMBOL(ldc_mode);
1479
1480int ldc_rx_reset(struct ldc_channel *lp)
1481{
1482        return __set_rx_head(lp, lp->rx_tail);
1483}
1484EXPORT_SYMBOL(ldc_rx_reset);
1485
1486void __ldc_print(struct ldc_channel *lp, const char *caller)
1487{
1488        pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
1489                "\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
1490                "\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
1491                "\trcv_nxt=%u snd_nxt=%u\n",
1492                caller, lp->id, lp->flags, state_to_str(lp->state),
1493                lp->chan_state, lp->hs_state,
1494                lp->rx_head, lp->rx_tail, lp->rx_num_entries,
1495                lp->tx_head, lp->tx_tail, lp->tx_num_entries,
1496                lp->rcv_nxt, lp->snd_nxt);
1497}
1498EXPORT_SYMBOL(__ldc_print);
1499
1500static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1501{
1502        struct ldc_packet *p;
1503        unsigned long new_tail, hv_err;
1504        int err;
1505
1506        hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1507                                        &lp->chan_state);
1508        if (unlikely(hv_err))
1509                return -EBUSY;
1510
1511        if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1512                return LDC_ABORT(lp);
1513
1514        if (size > LDC_PACKET_SIZE)
1515                return -EMSGSIZE;
1516
1517        p = data_get_tx_packet(lp, &new_tail);
1518        if (!p)
1519                return -EAGAIN;
1520
1521        memcpy(p, buf, size);
1522
1523        err = send_tx_packet(lp, p, new_tail);
1524        if (!err)
1525                err = size;
1526
1527        return err;
1528}
1529
1530static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1531{
1532        struct ldc_packet *p;
1533        unsigned long hv_err, new;
1534        int err;
1535
1536        if (size < LDC_PACKET_SIZE)
1537                return -EINVAL;
1538
1539        hv_err = sun4v_ldc_rx_get_state(lp->id,
1540                                        &lp->rx_head,
1541                                        &lp->rx_tail,
1542                                        &lp->chan_state);
1543        if (hv_err)
1544                return LDC_ABORT(lp);
1545
1546        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1547            lp->chan_state == LDC_CHANNEL_RESETTING)
1548                return -ECONNRESET;
1549
1550        if (lp->rx_head == lp->rx_tail)
1551                return 0;
1552
1553        p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1554        memcpy(buf, p, LDC_PACKET_SIZE);
1555
1556        new = rx_advance(lp, lp->rx_head);
1557        lp->rx_head = new;
1558
1559        err = __set_rx_head(lp, new);
1560        if (err < 0)
1561                err = -ECONNRESET;
1562        else
1563                err = LDC_PACKET_SIZE;
1564
1565        return err;
1566}
1567
1568static const struct ldc_mode_ops raw_ops = {
1569        .write          =       write_raw,
1570        .read           =       read_raw,
1571};
1572
1573static int write_nonraw(struct ldc_channel *lp, const void *buf,
1574                        unsigned int size)
1575{
1576        unsigned long hv_err, tail;
1577        unsigned int copied;
1578        u32 seq;
1579        int err;
1580
1581        hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1582                                        &lp->chan_state);
1583        if (unlikely(hv_err))
1584                return -EBUSY;
1585
1586        if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1587                return LDC_ABORT(lp);
1588
1589        if (!tx_has_space_for(lp, size))
1590                return -EAGAIN;
1591
1592        seq = lp->snd_nxt;
1593        copied = 0;
1594        tail = lp->tx_tail;
1595        while (copied < size) {
1596                struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1597                u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1598                            p->u.u_data :
1599                            p->u.r.r_data);
1600                int data_len;
1601
1602                p->type = LDC_DATA;
1603                p->stype = LDC_INFO;
1604                p->ctrl = 0;
1605
1606                data_len = size - copied;
1607                if (data_len > lp->mss)
1608                        data_len = lp->mss;
1609
1610                BUG_ON(data_len > LDC_LEN);
1611
1612                p->env = (data_len |
1613                          (copied == 0 ? LDC_START : 0) |
1614                          (data_len == size - copied ? LDC_STOP : 0));
1615
1616                p->seqid = ++seq;
1617
1618                ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1619                       p->type,
1620                       p->stype,
1621                       p->ctrl,
1622                       p->env,
1623                       p->seqid);
1624
1625                memcpy(data, buf, data_len);
1626                buf += data_len;
1627                copied += data_len;
1628
1629                tail = tx_advance(lp, tail);
1630        }
1631
1632        err = set_tx_tail(lp, tail);
1633        if (!err) {
1634                lp->snd_nxt = seq;
1635                err = size;
1636        }
1637
1638        return err;
1639}
1640
1641static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1642                      struct ldc_packet *first_frag)
1643{
1644        int err;
1645
1646        if (first_frag)
1647                lp->rcv_nxt = first_frag->seqid - 1;
1648
1649        err = send_data_nack(lp, p);
1650        if (err)
1651                return err;
1652
1653        err = ldc_rx_reset(lp);
1654        if (err < 0)
1655                return LDC_ABORT(lp);
1656
1657        return 0;
1658}
1659
1660static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1661{
1662        if (p->stype & LDC_ACK) {
1663                int err = process_data_ack(lp, p);
1664                if (err)
1665                        return err;
1666        }
1667        if (p->stype & LDC_NACK)
1668                return LDC_ABORT(lp);
1669
1670        return 0;
1671}
1672
1673static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1674{
1675        unsigned long dummy;
1676        int limit = 1000;
1677
1678        ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1679               cur_head, lp->rx_head, lp->rx_tail);
1680        while (limit-- > 0) {
1681                unsigned long hv_err;
1682
1683                hv_err = sun4v_ldc_rx_get_state(lp->id,
1684                                                &dummy,
1685                                                &lp->rx_tail,
1686                                                &lp->chan_state);
1687                if (hv_err)
1688                        return LDC_ABORT(lp);
1689
1690                if (lp->chan_state == LDC_CHANNEL_DOWN ||
1691                    lp->chan_state == LDC_CHANNEL_RESETTING)
1692                        return -ECONNRESET;
1693
1694                if (cur_head != lp->rx_tail) {
1695                        ldcdbg(DATA, "DATA WAIT DONE "
1696                               "head[%lx] tail[%lx] chan_state[%lx]\n",
1697                               dummy, lp->rx_tail, lp->chan_state);
1698                        return 0;
1699                }
1700
1701                udelay(1);
1702        }
1703        return -EAGAIN;
1704}
1705
1706static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1707{
1708        int err = __set_rx_head(lp, head);
1709
1710        if (err < 0)
1711                return LDC_ABORT(lp);
1712
1713        lp->rx_head = head;
1714        return 0;
1715}
1716
1717static void send_data_ack(struct ldc_channel *lp)
1718{
1719        unsigned long new_tail;
1720        struct ldc_packet *p;
1721
1722        p = data_get_tx_packet(lp, &new_tail);
1723        if (likely(p)) {
1724                int err;
1725
1726                memset(p, 0, sizeof(*p));
1727                p->type = LDC_DATA;
1728                p->stype = LDC_ACK;
1729                p->ctrl = 0;
1730                p->seqid = lp->snd_nxt + 1;
1731                p->u.r.ackid = lp->rcv_nxt;
1732
1733                err = send_tx_packet(lp, p, new_tail);
1734                if (!err)
1735                        lp->snd_nxt++;
1736        }
1737}
1738
1739static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1740{
1741        struct ldc_packet *first_frag;
1742        unsigned long hv_err, new;
1743        int err, copied;
1744
1745        hv_err = sun4v_ldc_rx_get_state(lp->id,
1746                                        &lp->rx_head,
1747                                        &lp->rx_tail,
1748                                        &lp->chan_state);
1749        if (hv_err)
1750                return LDC_ABORT(lp);
1751
1752        if (lp->chan_state == LDC_CHANNEL_DOWN ||
1753            lp->chan_state == LDC_CHANNEL_RESETTING)
1754                return -ECONNRESET;
1755
1756        if (lp->rx_head == lp->rx_tail)
1757                return 0;
1758
1759        first_frag = NULL;
1760        copied = err = 0;
1761        new = lp->rx_head;
1762        while (1) {
1763                struct ldc_packet *p;
1764                int pkt_len;
1765
1766                BUG_ON(new == lp->rx_tail);
1767                p = lp->rx_base + (new / LDC_PACKET_SIZE);
1768
1769                ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1770                       "rcv_nxt[%08x]\n",
1771                       p->type,
1772                       p->stype,
1773                       p->ctrl,
1774                       p->env,
1775                       p->seqid,
1776                       p->u.r.ackid,
1777                       lp->rcv_nxt);
1778
1779                if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1780                        err = rx_bad_seq(lp, p, first_frag);
1781                        copied = 0;
1782                        break;
1783                }
1784
1785                if (p->type & LDC_CTRL) {
1786                        err = process_control_frame(lp, p);
1787                        if (err < 0)
1788                                break;
1789                        err = 0;
1790                }
1791
1792                lp->rcv_nxt = p->seqid;
1793
1794                /*
1795                 * If this is a control-only packet, there is nothing
1796                 * else to do but advance the rx queue since the packet
1797                 * was already processed above.
1798                 */
1799                if (!(p->type & LDC_DATA)) {
1800                        new = rx_advance(lp, new);
1801                        break;
1802                }
1803                if (p->stype & (LDC_ACK | LDC_NACK)) {
1804                        err = data_ack_nack(lp, p);
1805                        if (err)
1806                                break;
1807                }
1808                if (!(p->stype & LDC_INFO)) {
1809                        new = rx_advance(lp, new);
1810                        err = rx_set_head(lp, new);
1811                        if (err)
1812                                break;
1813                        goto no_data;
1814                }
1815
1816                pkt_len = p->env & LDC_LEN;
1817
1818                /* Every initial packet starts with the START bit set.
1819                 *
1820                 * Singleton packets will have both START+STOP set.
1821                 *
1822                 * Fragments will have START set in the first frame, STOP
1823                 * set in the last frame, and neither bit set in middle
1824                 * frames of the packet.
1825                 *
1826                 * Therefore if we are at the beginning of a packet and
1827                 * we don't see START, or we are in the middle of a fragmented
1828                 * packet and do see START, we are unsynchronized and should
1829                 * flush the RX queue.
1830                 */
1831                if ((first_frag == NULL && !(p->env & LDC_START)) ||
1832                    (first_frag != NULL &&  (p->env & LDC_START))) {
1833                        if (!first_frag)
1834                                new = rx_advance(lp, new);
1835
1836                        err = rx_set_head(lp, new);
1837                        if (err)
1838                                break;
1839
1840                        if (!first_frag)
1841                                goto no_data;
1842                }
1843                if (!first_frag)
1844                        first_frag = p;
1845
1846                if (pkt_len > size - copied) {
1847                        /* User didn't give us a big enough buffer,
1848                         * what to do?  This is a pretty serious error.
1849                         *
1850                         * Since we haven't updated the RX ring head to
1851                         * consume any of the packets, signal the error
1852                         * to the user and just leave the RX ring alone.
1853                         *
1854                         * This seems the best behavior because this allows
1855                         * a user of the LDC layer to start with a small
1856                         * RX buffer for ldc_read() calls and use -EMSGSIZE
1857                         * as a cue to enlarge it's read buffer.
1858                         */
1859                        err = -EMSGSIZE;
1860                        break;
1861                }
1862
1863                /* Ok, we are gonna eat this one.  */
1864                new = rx_advance(lp, new);
1865
1866                memcpy(buf,
1867                       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1868                        p->u.u_data : p->u.r.r_data), pkt_len);
1869                buf += pkt_len;
1870                copied += pkt_len;
1871
1872                if (p->env & LDC_STOP)
1873                        break;
1874
1875no_data:
1876                if (new == lp->rx_tail) {
1877                        err = rx_data_wait(lp, new);
1878                        if (err)
1879                                break;
1880                }
1881        }
1882
1883        if (!err)
1884                err = rx_set_head(lp, new);
1885
1886        if (err && first_frag)
1887                lp->rcv_nxt = first_frag->seqid - 1;
1888
1889        if (!err) {
1890                err = copied;
1891                if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1892                        send_data_ack(lp);
1893        }
1894
1895        return err;
1896}
1897
1898static const struct ldc_mode_ops nonraw_ops = {
1899        .write          =       write_nonraw,
1900        .read           =       read_nonraw,
1901};
1902
1903static int write_stream(struct ldc_channel *lp, const void *buf,
1904                        unsigned int size)
1905{
1906        if (size > lp->cfg.mtu)
1907                size = lp->cfg.mtu;
1908        return write_nonraw(lp, buf, size);
1909}
1910
1911static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1912{
1913        if (!lp->mssbuf_len) {
1914                int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1915                if (err < 0)
1916                        return err;
1917
1918                lp->mssbuf_len = err;
1919                lp->mssbuf_off = 0;
1920        }
1921
1922        if (size > lp->mssbuf_len)
1923                size = lp->mssbuf_len;
1924        memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1925
1926        lp->mssbuf_off += size;
1927        lp->mssbuf_len -= size;
1928
1929        return size;
1930}
1931
1932static const struct ldc_mode_ops stream_ops = {
1933        .write          =       write_stream,
1934        .read           =       read_stream,
1935};
1936
1937int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1938{
1939        unsigned long flags;
1940        int err;
1941
1942        if (!buf)
1943                return -EINVAL;
1944
1945        if (!size)
1946                return 0;
1947
1948        spin_lock_irqsave(&lp->lock, flags);
1949
1950        if (lp->hs_state != LDC_HS_COMPLETE)
1951                err = -ENOTCONN;
1952        else
1953                err = lp->mops->write(lp, buf, size);
1954
1955        spin_unlock_irqrestore(&lp->lock, flags);
1956
1957        return err;
1958}
1959EXPORT_SYMBOL(ldc_write);
1960
1961int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1962{
1963        unsigned long flags;
1964        int err;
1965
1966        ldcdbg(RX, "%s: entered size=%d\n", __func__, size);
1967
1968        if (!buf)
1969                return -EINVAL;
1970
1971        if (!size)
1972                return 0;
1973
1974        spin_lock_irqsave(&lp->lock, flags);
1975
1976        if (lp->hs_state != LDC_HS_COMPLETE)
1977                err = -ENOTCONN;
1978        else
1979                err = lp->mops->read(lp, buf, size);
1980
1981        spin_unlock_irqrestore(&lp->lock, flags);
1982
1983        ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__,
1984               lp->cfg.mode, lp->rx_head, lp->rx_tail, err);
1985
1986        return err;
1987}
1988EXPORT_SYMBOL(ldc_read);
1989
1990static u64 pagesize_code(void)
1991{
1992        switch (PAGE_SIZE) {
1993        default:
1994        case (8ULL * 1024ULL):
1995                return 0;
1996        case (64ULL * 1024ULL):
1997                return 1;
1998        case (512ULL * 1024ULL):
1999                return 2;
2000        case (4ULL * 1024ULL * 1024ULL):
2001                return 3;
2002        case (32ULL * 1024ULL * 1024ULL):
2003                return 4;
2004        case (256ULL * 1024ULL * 1024ULL):
2005                return 5;
2006        }
2007}
2008
2009static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
2010{
2011        return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
2012                (index << PAGE_SHIFT) |
2013                page_offset);
2014}
2015
2016
2017static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
2018                                             unsigned long npages)
2019{
2020        long entry;
2021
2022        entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
2023                                      npages, NULL, (unsigned long)-1, 0);
2024        if (unlikely(entry == IOMMU_ERROR_CODE))
2025                return NULL;
2026
2027        return iommu->page_table + entry;
2028}
2029
2030static u64 perm_to_mte(unsigned int map_perm)
2031{
2032        u64 mte_base;
2033
2034        mte_base = pagesize_code();
2035
2036        if (map_perm & LDC_MAP_SHADOW) {
2037                if (map_perm & LDC_MAP_R)
2038                        mte_base |= LDC_MTE_COPY_R;
2039                if (map_perm & LDC_MAP_W)
2040                        mte_base |= LDC_MTE_COPY_W;
2041        }
2042        if (map_perm & LDC_MAP_DIRECT) {
2043                if (map_perm & LDC_MAP_R)
2044                        mte_base |= LDC_MTE_READ;
2045                if (map_perm & LDC_MAP_W)
2046                        mte_base |= LDC_MTE_WRITE;
2047                if (map_perm & LDC_MAP_X)
2048                        mte_base |= LDC_MTE_EXEC;
2049        }
2050        if (map_perm & LDC_MAP_IO) {
2051                if (map_perm & LDC_MAP_R)
2052                        mte_base |= LDC_MTE_IOMMU_R;
2053                if (map_perm & LDC_MAP_W)
2054                        mte_base |= LDC_MTE_IOMMU_W;
2055        }
2056
2057        return mte_base;
2058}
2059
2060static int pages_in_region(unsigned long base, long len)
2061{
2062        int count = 0;
2063
2064        do {
2065                unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2066
2067                len -= (new - base);
2068                base = new;
2069                count++;
2070        } while (len > 0);
2071
2072        return count;
2073}
2074
2075struct cookie_state {
2076        struct ldc_mtable_entry         *page_table;
2077        struct ldc_trans_cookie         *cookies;
2078        u64                             mte_base;
2079        u64                             prev_cookie;
2080        u32                             pte_idx;
2081        u32                             nc;
2082};
2083
2084static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2085                         unsigned long off, unsigned long len)
2086{
2087        do {
2088                unsigned long tlen, new = pa + PAGE_SIZE;
2089                u64 this_cookie;
2090
2091                sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2092
2093                tlen = PAGE_SIZE;
2094                if (off)
2095                        tlen = PAGE_SIZE - off;
2096                if (tlen > len)
2097                        tlen = len;
2098
2099                this_cookie = make_cookie(sp->pte_idx,
2100                                          pagesize_code(), off);
2101
2102                off = 0;
2103
2104                if (this_cookie == sp->prev_cookie) {
2105                        sp->cookies[sp->nc - 1].cookie_size += tlen;
2106                } else {
2107                        sp->cookies[sp->nc].cookie_addr = this_cookie;
2108                        sp->cookies[sp->nc].cookie_size = tlen;
2109                        sp->nc++;
2110                }
2111                sp->prev_cookie = this_cookie + tlen;
2112
2113                sp->pte_idx++;
2114
2115                len -= tlen;
2116                pa = new;
2117        } while (len > 0);
2118}
2119
2120static int sg_count_one(struct scatterlist *sg)
2121{
2122        unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2123        long len = sg->length;
2124
2125        if ((sg->offset | len) & (8UL - 1))
2126                return -EFAULT;
2127
2128        return pages_in_region(base + sg->offset, len);
2129}
2130
2131static int sg_count_pages(struct scatterlist *sg, int num_sg)
2132{
2133        int count;
2134        int i;
2135
2136        count = 0;
2137        for (i = 0; i < num_sg; i++) {
2138                int err = sg_count_one(sg + i);
2139                if (err < 0)
2140                        return err;
2141                count += err;
2142        }
2143
2144        return count;
2145}
2146
2147int ldc_map_sg(struct ldc_channel *lp,
2148               struct scatterlist *sg, int num_sg,
2149               struct ldc_trans_cookie *cookies, int ncookies,
2150               unsigned int map_perm)
2151{
2152        unsigned long i, npages;
2153        struct ldc_mtable_entry *base;
2154        struct cookie_state state;
2155        struct ldc_iommu *iommu;
2156        int err;
2157        struct scatterlist *s;
2158
2159        if (map_perm & ~LDC_MAP_ALL)
2160                return -EINVAL;
2161
2162        err = sg_count_pages(sg, num_sg);
2163        if (err < 0)
2164                return err;
2165
2166        npages = err;
2167        if (err > ncookies)
2168                return -EMSGSIZE;
2169
2170        iommu = &lp->iommu;
2171
2172        base = alloc_npages(iommu, npages);
2173
2174        if (!base)
2175                return -ENOMEM;
2176
2177        state.page_table = iommu->page_table;
2178        state.cookies = cookies;
2179        state.mte_base = perm_to_mte(map_perm);
2180        state.prev_cookie = ~(u64)0;
2181        state.pte_idx = (base - iommu->page_table);
2182        state.nc = 0;
2183
2184        for_each_sg(sg, s, num_sg, i) {
2185                fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2186                             s->offset, s->length);
2187        }
2188
2189        return state.nc;
2190}
2191EXPORT_SYMBOL(ldc_map_sg);
2192
2193int ldc_map_single(struct ldc_channel *lp,
2194                   void *buf, unsigned int len,
2195                   struct ldc_trans_cookie *cookies, int ncookies,
2196                   unsigned int map_perm)
2197{
2198        unsigned long npages, pa;
2199        struct ldc_mtable_entry *base;
2200        struct cookie_state state;
2201        struct ldc_iommu *iommu;
2202
2203        if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2204                return -EINVAL;
2205
2206        pa = __pa(buf);
2207        if ((pa | len) & (8UL - 1))
2208                return -EFAULT;
2209
2210        npages = pages_in_region(pa, len);
2211
2212        iommu = &lp->iommu;
2213
2214        base = alloc_npages(iommu, npages);
2215
2216        if (!base)
2217                return -ENOMEM;
2218
2219        state.page_table = iommu->page_table;
2220        state.cookies = cookies;
2221        state.mte_base = perm_to_mte(map_perm);
2222        state.prev_cookie = ~(u64)0;
2223        state.pte_idx = (base - iommu->page_table);
2224        state.nc = 0;
2225        fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2226        BUG_ON(state.nc > ncookies);
2227
2228        return state.nc;
2229}
2230EXPORT_SYMBOL(ldc_map_single);
2231
2232
2233static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2234                        u64 cookie, u64 size)
2235{
2236        unsigned long npages, entry;
2237
2238        npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2239
2240        entry = ldc_cookie_to_index(cookie, iommu);
2241        ldc_demap(iommu, id, cookie, entry, npages);
2242        iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
2243}
2244
2245void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2246               int ncookies)
2247{
2248        struct ldc_iommu *iommu = &lp->iommu;
2249        int i;
2250        unsigned long flags;
2251
2252        spin_lock_irqsave(&iommu->lock, flags);
2253        for (i = 0; i < ncookies; i++) {
2254                u64 addr = cookies[i].cookie_addr;
2255                u64 size = cookies[i].cookie_size;
2256
2257                free_npages(lp->id, iommu, addr, size);
2258        }
2259        spin_unlock_irqrestore(&iommu->lock, flags);
2260}
2261EXPORT_SYMBOL(ldc_unmap);
2262
2263int ldc_copy(struct ldc_channel *lp, int copy_dir,
2264             void *buf, unsigned int len, unsigned long offset,
2265             struct ldc_trans_cookie *cookies, int ncookies)
2266{
2267        unsigned int orig_len;
2268        unsigned long ra;
2269        int i;
2270
2271        if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2272                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2273                       lp->id, copy_dir);
2274                return -EINVAL;
2275        }
2276
2277        ra = __pa(buf);
2278        if ((ra | len | offset) & (8UL - 1)) {
2279                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2280                       "ra[%lx] len[%x] offset[%lx]\n",
2281                       lp->id, ra, len, offset);
2282                return -EFAULT;
2283        }
2284
2285        if (lp->hs_state != LDC_HS_COMPLETE ||
2286            (lp->flags & LDC_FLAG_RESET)) {
2287                printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2288                       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2289                return -ECONNRESET;
2290        }
2291
2292        orig_len = len;
2293        for (i = 0; i < ncookies; i++) {
2294                unsigned long cookie_raddr = cookies[i].cookie_addr;
2295                unsigned long this_len = cookies[i].cookie_size;
2296                unsigned long actual_len;
2297
2298                if (unlikely(offset)) {
2299                        unsigned long this_off = offset;
2300
2301                        if (this_off > this_len)
2302                                this_off = this_len;
2303
2304                        offset -= this_off;
2305                        this_len -= this_off;
2306                        if (!this_len)
2307                                continue;
2308                        cookie_raddr += this_off;
2309                }
2310
2311                if (this_len > len)
2312                        this_len = len;
2313
2314                while (1) {
2315                        unsigned long hv_err;
2316
2317                        hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2318                                                cookie_raddr, ra,
2319                                                this_len, &actual_len);
2320                        if (unlikely(hv_err)) {
2321                                printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2322                                       "HV error %lu\n",
2323                                       lp->id, hv_err);
2324                                if (lp->hs_state != LDC_HS_COMPLETE ||
2325                                    (lp->flags & LDC_FLAG_RESET))
2326                                        return -ECONNRESET;
2327                                else
2328                                        return -EFAULT;
2329                        }
2330
2331                        cookie_raddr += actual_len;
2332                        ra += actual_len;
2333                        len -= actual_len;
2334                        if (actual_len == this_len)
2335                                break;
2336
2337                        this_len -= actual_len;
2338                }
2339
2340                if (!len)
2341                        break;
2342        }
2343
2344        /* It is caller policy what to do about short copies.
2345         * For example, a networking driver can declare the
2346         * packet a runt and drop it.
2347         */
2348
2349        return orig_len - len;
2350}
2351EXPORT_SYMBOL(ldc_copy);
2352
2353void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2354                          struct ldc_trans_cookie *cookies, int *ncookies,
2355                          unsigned int map_perm)
2356{
2357        void *buf;
2358        int err;
2359
2360        if (len & (8UL - 1))
2361                return ERR_PTR(-EINVAL);
2362
2363        buf = kzalloc(len, GFP_ATOMIC);
2364        if (!buf)
2365                return ERR_PTR(-ENOMEM);
2366
2367        err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2368        if (err < 0) {
2369                kfree(buf);
2370                return ERR_PTR(err);
2371        }
2372        *ncookies = err;
2373
2374        return buf;
2375}
2376EXPORT_SYMBOL(ldc_alloc_exp_dring);
2377
2378void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2379                        struct ldc_trans_cookie *cookies, int ncookies)
2380{
2381        ldc_unmap(lp, cookies, ncookies);
2382        kfree(buf);
2383}
2384EXPORT_SYMBOL(ldc_free_exp_dring);
2385
2386static int __init ldc_init(void)
2387{
2388        unsigned long major, minor;
2389        struct mdesc_handle *hp;
2390        const u64 *v;
2391        int err;
2392        u64 mp;
2393
2394        hp = mdesc_grab();
2395        if (!hp)
2396                return -ENODEV;
2397
2398        mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2399        err = -ENODEV;
2400        if (mp == MDESC_NODE_NULL)
2401                goto out;
2402
2403        v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2404        if (!v)
2405                goto out;
2406
2407        major = 1;
2408        minor = 0;
2409        if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2410                printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2411                goto out;
2412        }
2413
2414        printk(KERN_INFO "%s", version);
2415
2416        if (!*v) {
2417                printk(KERN_INFO PFX "Domaining disabled.\n");
2418                goto out;
2419        }
2420        ldom_domaining_enabled = 1;
2421        err = 0;
2422
2423out:
2424        mdesc_release(hp);
2425        return err;
2426}
2427
2428core_initcall(ldc_init);
2429