qemu/hw/misc/milkymist-pfpu.c
<<
>>
Prefs
   1/*
   2 *  QEMU model of the Milkymist programmable FPU.
   3 *
   4 *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 *
  19 *
  20 * Specification available at:
  21 *   http://milkymist.walle.cc/socdoc/pfpu.pdf
  22 *
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "hw/hw.h"
  27#include "hw/sysbus.h"
  28#include "trace.h"
  29#include "qemu/log.h"
  30#include "qemu/module.h"
  31#include "qemu/error-report.h"
  32#include <math.h>
  33
  34/* #define TRACE_EXEC */
  35
  36#ifdef TRACE_EXEC
  37#    define D_EXEC(x) x
  38#else
  39#    define D_EXEC(x)
  40#endif
  41
  42enum {
  43    R_CTL = 0,
  44    R_MESHBASE,
  45    R_HMESHLAST,
  46    R_VMESHLAST,
  47    R_CODEPAGE,
  48    R_VERTICES,
  49    R_COLLISIONS,
  50    R_STRAYWRITES,
  51    R_LASTDMA,
  52    R_PC,
  53    R_DREGBASE,
  54    R_CODEBASE,
  55    R_MAX
  56};
  57
  58enum {
  59    CTL_START_BUSY = (1<<0),
  60};
  61
  62enum {
  63    OP_NOP = 0,
  64    OP_FADD,
  65    OP_FSUB,
  66    OP_FMUL,
  67    OP_FABS,
  68    OP_F2I,
  69    OP_I2F,
  70    OP_VECTOUT,
  71    OP_SIN,
  72    OP_COS,
  73    OP_ABOVE,
  74    OP_EQUAL,
  75    OP_COPY,
  76    OP_IF,
  77    OP_TSIGN,
  78    OP_QUAKE,
  79};
  80
  81enum {
  82    GPR_X = 0,
  83    GPR_Y = 1,
  84    GPR_FLAGS = 2,
  85};
  86
  87enum {
  88    LATENCY_FADD = 5,
  89    LATENCY_FSUB = 5,
  90    LATENCY_FMUL = 7,
  91    LATENCY_FABS = 2,
  92    LATENCY_F2I = 2,
  93    LATENCY_I2F = 3,
  94    LATENCY_VECTOUT = 0,
  95    LATENCY_SIN = 4,
  96    LATENCY_COS = 4,
  97    LATENCY_ABOVE = 2,
  98    LATENCY_EQUAL = 2,
  99    LATENCY_COPY = 2,
 100    LATENCY_IF = 2,
 101    LATENCY_TSIGN = 2,
 102    LATENCY_QUAKE = 2,
 103    MAX_LATENCY = 7
 104};
 105
 106#define GPR_BEGIN       0x100
 107#define GPR_END         0x17f
 108#define MICROCODE_BEGIN 0x200
 109#define MICROCODE_END   0x3ff
 110#define MICROCODE_WORDS 2048
 111
 112#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
 113
 114#ifdef TRACE_EXEC
 115static const char *opcode_to_str[] = {
 116    "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
 117    "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
 118};
 119#endif
 120
 121#define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
 122#define MILKYMIST_PFPU(obj) \
 123    OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU)
 124
 125struct MilkymistPFPUState {
 126    SysBusDevice parent_obj;
 127
 128    MemoryRegion regs_region;
 129    Chardev *chr;
 130    qemu_irq irq;
 131
 132    uint32_t regs[R_MAX];
 133    uint32_t gp_regs[128];
 134    uint32_t microcode[MICROCODE_WORDS];
 135
 136    int output_queue_pos;
 137    uint32_t output_queue[MAX_LATENCY];
 138};
 139typedef struct MilkymistPFPUState MilkymistPFPUState;
 140
 141static inline uint32_t
 142get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 143{
 144    return base + 8 * (128 * y + x);
 145}
 146
 147static inline void
 148output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
 149{
 150    s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
 151}
 152
 153static inline uint32_t
 154output_queue_remove(MilkymistPFPUState *s)
 155{
 156    return s->output_queue[s->output_queue_pos];
 157}
 158
 159static inline void
 160output_queue_advance(MilkymistPFPUState *s)
 161{
 162    s->output_queue[s->output_queue_pos] = 0;
 163    s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
 164}
 165
 166static int pfpu_decode_insn(MilkymistPFPUState *s)
 167{
 168    uint32_t pc = s->regs[R_PC];
 169    uint32_t insn = s->microcode[pc];
 170    uint32_t reg_a = (insn >> 18) & 0x7f;
 171    uint32_t reg_b = (insn >> 11) & 0x7f;
 172    uint32_t op = (insn >> 7) & 0xf;
 173    uint32_t reg_d = insn & 0x7f;
 174    uint32_t r = 0;
 175    int latency = 0;
 176
 177    switch (op) {
 178    case OP_NOP:
 179        break;
 180    case OP_FADD:
 181    {
 182        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 183        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 184        float t = a + b;
 185        r = REINTERPRET_CAST(uint32_t, t);
 186        latency = LATENCY_FADD;
 187        D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 188    } break;
 189    case OP_FSUB:
 190    {
 191        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 192        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 193        float t = a - b;
 194        r = REINTERPRET_CAST(uint32_t, t);
 195        latency = LATENCY_FSUB;
 196        D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 197    } break;
 198    case OP_FMUL:
 199    {
 200        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 201        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 202        float t = a * b;
 203        r = REINTERPRET_CAST(uint32_t, t);
 204        latency = LATENCY_FMUL;
 205        D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 206    } break;
 207    case OP_FABS:
 208    {
 209        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 210        float t = fabsf(a);
 211        r = REINTERPRET_CAST(uint32_t, t);
 212        latency = LATENCY_FABS;
 213        D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
 214    } break;
 215    case OP_F2I:
 216    {
 217        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 218        int32_t t = a;
 219        r = REINTERPRET_CAST(uint32_t, t);
 220        latency = LATENCY_F2I;
 221        D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
 222    } break;
 223    case OP_I2F:
 224    {
 225        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 226        float t = a;
 227        r = REINTERPRET_CAST(uint32_t, t);
 228        latency = LATENCY_I2F;
 229        D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
 230    } break;
 231    case OP_VECTOUT:
 232    {
 233        uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
 234        uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
 235        hwaddr dma_ptr =
 236            get_dma_address(s->regs[R_MESHBASE],
 237                    s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
 238        cpu_physical_memory_write(dma_ptr, &a, 4);
 239        cpu_physical_memory_write(dma_ptr + 4, &b, 4);
 240        s->regs[R_LASTDMA] = dma_ptr + 4;
 241        D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
 242        trace_milkymist_pfpu_vectout(a, b, dma_ptr);
 243    } break;
 244    case OP_SIN:
 245    {
 246        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 247        float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
 248        r = REINTERPRET_CAST(uint32_t, t);
 249        latency = LATENCY_SIN;
 250        D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
 251    } break;
 252    case OP_COS:
 253    {
 254        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 255        float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
 256        r = REINTERPRET_CAST(uint32_t, t);
 257        latency = LATENCY_COS;
 258        D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
 259    } break;
 260    case OP_ABOVE:
 261    {
 262        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 263        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 264        float t = (a > b) ? 1.0f : 0.0f;
 265        r = REINTERPRET_CAST(uint32_t, t);
 266        latency = LATENCY_ABOVE;
 267        D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 268    } break;
 269    case OP_EQUAL:
 270    {
 271        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 272        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 273        float t = (a == b) ? 1.0f : 0.0f;
 274        r = REINTERPRET_CAST(uint32_t, t);
 275        latency = LATENCY_EQUAL;
 276        D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 277    } break;
 278    case OP_COPY:
 279    {
 280        r = s->gp_regs[reg_a];
 281        latency = LATENCY_COPY;
 282        D_EXEC(qemu_log("COPY"));
 283    } break;
 284    case OP_IF:
 285    {
 286        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 287        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 288        uint32_t f = s->gp_regs[GPR_FLAGS];
 289        float t = (f != 0) ? a : b;
 290        r = REINTERPRET_CAST(uint32_t, t);
 291        latency = LATENCY_IF;
 292        D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
 293    } break;
 294    case OP_TSIGN:
 295    {
 296        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 297        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 298        float t = (b < 0) ? -a : a;
 299        r = REINTERPRET_CAST(uint32_t, t);
 300        latency = LATENCY_TSIGN;
 301        D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 302    } break;
 303    case OP_QUAKE:
 304    {
 305        uint32_t a = s->gp_regs[reg_a];
 306        r = 0x5f3759df - (a >> 1);
 307        latency = LATENCY_QUAKE;
 308        D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
 309    } break;
 310
 311    default:
 312        error_report("milkymist_pfpu: unknown opcode %d", op);
 313        break;
 314    }
 315
 316    if (!reg_d) {
 317        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
 318                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 319                    s->regs[R_PC] + latency));
 320    } else {
 321        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
 322                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 323                    s->regs[R_PC] + latency, reg_d));
 324    }
 325
 326    if (op == OP_VECTOUT) {
 327        return 0;
 328    }
 329
 330    /* store output for this cycle */
 331    if (reg_d) {
 332        uint32_t val = output_queue_remove(s);
 333        D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
 334        s->gp_regs[reg_d] = val;
 335    }
 336
 337    output_queue_advance(s);
 338
 339    /* store op output */
 340    if (op != OP_NOP) {
 341        output_queue_insert(s, r, latency-1);
 342    }
 343
 344    /* advance PC */
 345    s->regs[R_PC]++;
 346
 347    return 1;
 348};
 349
 350static void pfpu_start(MilkymistPFPUState *s)
 351{
 352    int x, y;
 353    int i;
 354
 355    for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
 356        for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
 357            D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
 358
 359            /* set current position */
 360            s->gp_regs[GPR_X] = x;
 361            s->gp_regs[GPR_Y] = y;
 362
 363            /* run microcode on this position */
 364            i = 0;
 365            while (pfpu_decode_insn(s)) {
 366                /* decode at most MICROCODE_WORDS instructions */
 367                if (++i >= MICROCODE_WORDS) {
 368                    error_report("milkymist_pfpu: too many instructions "
 369                            "executed in microcode. No VECTOUT?");
 370                    break;
 371                }
 372            }
 373
 374            /* reset pc for next run */
 375            s->regs[R_PC] = 0;
 376        }
 377    }
 378
 379    s->regs[R_VERTICES] = x * y;
 380
 381    trace_milkymist_pfpu_pulse_irq();
 382    qemu_irq_pulse(s->irq);
 383}
 384
 385static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
 386{
 387    return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
 388}
 389
 390static uint64_t pfpu_read(void *opaque, hwaddr addr,
 391                          unsigned size)
 392{
 393    MilkymistPFPUState *s = opaque;
 394    uint32_t r = 0;
 395
 396    addr >>= 2;
 397    switch (addr) {
 398    case R_CTL:
 399    case R_MESHBASE:
 400    case R_HMESHLAST:
 401    case R_VMESHLAST:
 402    case R_CODEPAGE:
 403    case R_VERTICES:
 404    case R_COLLISIONS:
 405    case R_STRAYWRITES:
 406    case R_LASTDMA:
 407    case R_PC:
 408    case R_DREGBASE:
 409    case R_CODEBASE:
 410        r = s->regs[addr];
 411        break;
 412    case GPR_BEGIN ... GPR_END:
 413        r = s->gp_regs[addr - GPR_BEGIN];
 414        break;
 415    case MICROCODE_BEGIN ...  MICROCODE_END:
 416        r = s->microcode[get_microcode_address(s, addr)];
 417        break;
 418
 419    default:
 420        error_report("milkymist_pfpu: read access to unknown register 0x"
 421                TARGET_FMT_plx, addr << 2);
 422        break;
 423    }
 424
 425    trace_milkymist_pfpu_memory_read(addr << 2, r);
 426
 427    return r;
 428}
 429
 430static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
 431                       unsigned size)
 432{
 433    MilkymistPFPUState *s = opaque;
 434
 435    trace_milkymist_pfpu_memory_write(addr, value);
 436
 437    addr >>= 2;
 438    switch (addr) {
 439    case R_CTL:
 440        if (value & CTL_START_BUSY) {
 441            pfpu_start(s);
 442        }
 443        break;
 444    case R_MESHBASE:
 445    case R_HMESHLAST:
 446    case R_VMESHLAST:
 447    case R_CODEPAGE:
 448    case R_VERTICES:
 449    case R_COLLISIONS:
 450    case R_STRAYWRITES:
 451    case R_LASTDMA:
 452    case R_PC:
 453    case R_DREGBASE:
 454    case R_CODEBASE:
 455        s->regs[addr] = value;
 456        break;
 457    case GPR_BEGIN ...  GPR_END:
 458        s->gp_regs[addr - GPR_BEGIN] = value;
 459        break;
 460    case MICROCODE_BEGIN ...  MICROCODE_END:
 461        s->microcode[get_microcode_address(s, addr)] = value;
 462        break;
 463
 464    default:
 465        error_report("milkymist_pfpu: write access to unknown register 0x"
 466                TARGET_FMT_plx, addr << 2);
 467        break;
 468    }
 469}
 470
 471static const MemoryRegionOps pfpu_mmio_ops = {
 472    .read = pfpu_read,
 473    .write = pfpu_write,
 474    .valid = {
 475        .min_access_size = 4,
 476        .max_access_size = 4,
 477    },
 478    .endianness = DEVICE_NATIVE_ENDIAN,
 479};
 480
 481static void milkymist_pfpu_reset(DeviceState *d)
 482{
 483    MilkymistPFPUState *s = MILKYMIST_PFPU(d);
 484    int i;
 485
 486    for (i = 0; i < R_MAX; i++) {
 487        s->regs[i] = 0;
 488    }
 489    for (i = 0; i < 128; i++) {
 490        s->gp_regs[i] = 0;
 491    }
 492    for (i = 0; i < MICROCODE_WORDS; i++) {
 493        s->microcode[i] = 0;
 494    }
 495    s->output_queue_pos = 0;
 496    for (i = 0; i < MAX_LATENCY; i++) {
 497        s->output_queue[i] = 0;
 498    }
 499}
 500
 501static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
 502{
 503    MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
 504    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 505
 506    sysbus_init_irq(sbd, &s->irq);
 507
 508    memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
 509            "milkymist-pfpu", MICROCODE_END * 4);
 510    sysbus_init_mmio(sbd, &s->regs_region);
 511}
 512
 513static const VMStateDescription vmstate_milkymist_pfpu = {
 514    .name = "milkymist-pfpu",
 515    .version_id = 1,
 516    .minimum_version_id = 1,
 517    .fields = (VMStateField[]) {
 518        VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
 519        VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
 520        VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
 521        VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
 522        VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
 523        VMSTATE_END_OF_LIST()
 524    }
 525};
 526
 527static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
 528{
 529    DeviceClass *dc = DEVICE_CLASS(klass);
 530
 531    dc->realize = milkymist_pfpu_realize;
 532    dc->reset = milkymist_pfpu_reset;
 533    dc->vmsd = &vmstate_milkymist_pfpu;
 534}
 535
 536static const TypeInfo milkymist_pfpu_info = {
 537    .name          = TYPE_MILKYMIST_PFPU,
 538    .parent        = TYPE_SYS_BUS_DEVICE,
 539    .instance_size = sizeof(MilkymistPFPUState),
 540    .class_init    = milkymist_pfpu_class_init,
 541};
 542
 543static void milkymist_pfpu_register_types(void)
 544{
 545    type_register_static(&milkymist_pfpu_info);
 546}
 547
 548type_init(milkymist_pfpu_register_types)
 549