qemu/hw/misc/milkymist-pfpu.c
<<
>>
Prefs
   1/*
   2 *  QEMU model of the Milkymist programmable FPU.
   3 *
   4 *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 *
  19 *
  20 * Specification available at:
  21 *   http://milkymist.walle.cc/socdoc/pfpu.pdf
  22 *
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "hw/irq.h"
  27#include "hw/sysbus.h"
  28#include "migration/vmstate.h"
  29#include "trace.h"
  30#include "qemu/log.h"
  31#include "qemu/module.h"
  32#include "qemu/error-report.h"
  33#include <math.h>
  34
  35/* #define TRACE_EXEC */
  36
  37#ifdef TRACE_EXEC
  38#    define D_EXEC(x) x
  39#else
  40#    define D_EXEC(x)
  41#endif
  42
  43enum {
  44    R_CTL = 0,
  45    R_MESHBASE,
  46    R_HMESHLAST,
  47    R_VMESHLAST,
  48    R_CODEPAGE,
  49    R_VERTICES,
  50    R_COLLISIONS,
  51    R_STRAYWRITES,
  52    R_LASTDMA,
  53    R_PC,
  54    R_DREGBASE,
  55    R_CODEBASE,
  56    R_MAX
  57};
  58
  59enum {
  60    CTL_START_BUSY = (1<<0),
  61};
  62
  63enum {
  64    OP_NOP = 0,
  65    OP_FADD,
  66    OP_FSUB,
  67    OP_FMUL,
  68    OP_FABS,
  69    OP_F2I,
  70    OP_I2F,
  71    OP_VECTOUT,
  72    OP_SIN,
  73    OP_COS,
  74    OP_ABOVE,
  75    OP_EQUAL,
  76    OP_COPY,
  77    OP_IF,
  78    OP_TSIGN,
  79    OP_QUAKE,
  80};
  81
  82enum {
  83    GPR_X = 0,
  84    GPR_Y = 1,
  85    GPR_FLAGS = 2,
  86};
  87
  88enum {
  89    LATENCY_FADD = 5,
  90    LATENCY_FSUB = 5,
  91    LATENCY_FMUL = 7,
  92    LATENCY_FABS = 2,
  93    LATENCY_F2I = 2,
  94    LATENCY_I2F = 3,
  95    LATENCY_VECTOUT = 0,
  96    LATENCY_SIN = 4,
  97    LATENCY_COS = 4,
  98    LATENCY_ABOVE = 2,
  99    LATENCY_EQUAL = 2,
 100    LATENCY_COPY = 2,
 101    LATENCY_IF = 2,
 102    LATENCY_TSIGN = 2,
 103    LATENCY_QUAKE = 2,
 104    MAX_LATENCY = 7
 105};
 106
 107#define GPR_BEGIN       0x100
 108#define GPR_END         0x17f
 109#define MICROCODE_BEGIN 0x200
 110#define MICROCODE_END   0x3ff
 111#define MICROCODE_WORDS 2048
 112
 113#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
 114
 115#ifdef TRACE_EXEC
 116static const char *opcode_to_str[] = {
 117    "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
 118    "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
 119};
 120#endif
 121
 122#define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
 123#define MILKYMIST_PFPU(obj) \
 124    OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU)
 125
 126struct MilkymistPFPUState {
 127    SysBusDevice parent_obj;
 128
 129    MemoryRegion regs_region;
 130    Chardev *chr;
 131    qemu_irq irq;
 132
 133    uint32_t regs[R_MAX];
 134    uint32_t gp_regs[128];
 135    uint32_t microcode[MICROCODE_WORDS];
 136
 137    int output_queue_pos;
 138    uint32_t output_queue[MAX_LATENCY];
 139};
 140typedef struct MilkymistPFPUState MilkymistPFPUState;
 141
 142static inline uint32_t
 143get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 144{
 145    return base + 8 * (128 * y + x);
 146}
 147
 148static inline void
 149output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
 150{
 151    s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
 152}
 153
 154static inline uint32_t
 155output_queue_remove(MilkymistPFPUState *s)
 156{
 157    return s->output_queue[s->output_queue_pos];
 158}
 159
 160static inline void
 161output_queue_advance(MilkymistPFPUState *s)
 162{
 163    s->output_queue[s->output_queue_pos] = 0;
 164    s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
 165}
 166
 167static int pfpu_decode_insn(MilkymistPFPUState *s)
 168{
 169    uint32_t pc = s->regs[R_PC];
 170    uint32_t insn = s->microcode[pc];
 171    uint32_t reg_a = (insn >> 18) & 0x7f;
 172    uint32_t reg_b = (insn >> 11) & 0x7f;
 173    uint32_t op = (insn >> 7) & 0xf;
 174    uint32_t reg_d = insn & 0x7f;
 175    uint32_t r = 0;
 176    int latency = 0;
 177
 178    switch (op) {
 179    case OP_NOP:
 180        break;
 181    case OP_FADD:
 182    {
 183        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 184        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 185        float t = a + b;
 186        r = REINTERPRET_CAST(uint32_t, t);
 187        latency = LATENCY_FADD;
 188        D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 189    } break;
 190    case OP_FSUB:
 191    {
 192        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 193        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 194        float t = a - b;
 195        r = REINTERPRET_CAST(uint32_t, t);
 196        latency = LATENCY_FSUB;
 197        D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 198    } break;
 199    case OP_FMUL:
 200    {
 201        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 202        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 203        float t = a * b;
 204        r = REINTERPRET_CAST(uint32_t, t);
 205        latency = LATENCY_FMUL;
 206        D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 207    } break;
 208    case OP_FABS:
 209    {
 210        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 211        float t = fabsf(a);
 212        r = REINTERPRET_CAST(uint32_t, t);
 213        latency = LATENCY_FABS;
 214        D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
 215    } break;
 216    case OP_F2I:
 217    {
 218        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 219        int32_t t = a;
 220        r = REINTERPRET_CAST(uint32_t, t);
 221        latency = LATENCY_F2I;
 222        D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
 223    } break;
 224    case OP_I2F:
 225    {
 226        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 227        float t = a;
 228        r = REINTERPRET_CAST(uint32_t, t);
 229        latency = LATENCY_I2F;
 230        D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
 231    } break;
 232    case OP_VECTOUT:
 233    {
 234        uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
 235        uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
 236        hwaddr dma_ptr =
 237            get_dma_address(s->regs[R_MESHBASE],
 238                    s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
 239        cpu_physical_memory_write(dma_ptr, &a, 4);
 240        cpu_physical_memory_write(dma_ptr + 4, &b, 4);
 241        s->regs[R_LASTDMA] = dma_ptr + 4;
 242        D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
 243        trace_milkymist_pfpu_vectout(a, b, dma_ptr);
 244    } break;
 245    case OP_SIN:
 246    {
 247        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 248        float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
 249        r = REINTERPRET_CAST(uint32_t, t);
 250        latency = LATENCY_SIN;
 251        D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
 252    } break;
 253    case OP_COS:
 254    {
 255        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 256        float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
 257        r = REINTERPRET_CAST(uint32_t, t);
 258        latency = LATENCY_COS;
 259        D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
 260    } break;
 261    case OP_ABOVE:
 262    {
 263        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 264        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 265        float t = (a > b) ? 1.0f : 0.0f;
 266        r = REINTERPRET_CAST(uint32_t, t);
 267        latency = LATENCY_ABOVE;
 268        D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 269    } break;
 270    case OP_EQUAL:
 271    {
 272        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 273        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 274        float t = (a == b) ? 1.0f : 0.0f;
 275        r = REINTERPRET_CAST(uint32_t, t);
 276        latency = LATENCY_EQUAL;
 277        D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 278    } break;
 279    case OP_COPY:
 280    {
 281        r = s->gp_regs[reg_a];
 282        latency = LATENCY_COPY;
 283        D_EXEC(qemu_log("COPY"));
 284    } break;
 285    case OP_IF:
 286    {
 287        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 288        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 289        uint32_t f = s->gp_regs[GPR_FLAGS];
 290        float t = (f != 0) ? a : b;
 291        r = REINTERPRET_CAST(uint32_t, t);
 292        latency = LATENCY_IF;
 293        D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
 294    } break;
 295    case OP_TSIGN:
 296    {
 297        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 298        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 299        float t = (b < 0) ? -a : a;
 300        r = REINTERPRET_CAST(uint32_t, t);
 301        latency = LATENCY_TSIGN;
 302        D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 303    } break;
 304    case OP_QUAKE:
 305    {
 306        uint32_t a = s->gp_regs[reg_a];
 307        r = 0x5f3759df - (a >> 1);
 308        latency = LATENCY_QUAKE;
 309        D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
 310    } break;
 311
 312    default:
 313        error_report("milkymist_pfpu: unknown opcode %d", op);
 314        break;
 315    }
 316
 317    if (!reg_d) {
 318        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
 319                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 320                    s->regs[R_PC] + latency));
 321    } else {
 322        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
 323                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 324                    s->regs[R_PC] + latency, reg_d));
 325    }
 326
 327    if (op == OP_VECTOUT) {
 328        return 0;
 329    }
 330
 331    /* store output for this cycle */
 332    if (reg_d) {
 333        uint32_t val = output_queue_remove(s);
 334        D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
 335        s->gp_regs[reg_d] = val;
 336    }
 337
 338    output_queue_advance(s);
 339
 340    /* store op output */
 341    if (op != OP_NOP) {
 342        output_queue_insert(s, r, latency-1);
 343    }
 344
 345    /* advance PC */
 346    s->regs[R_PC]++;
 347
 348    return 1;
 349};
 350
 351static void pfpu_start(MilkymistPFPUState *s)
 352{
 353    int x, y;
 354    int i;
 355
 356    for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
 357        for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
 358            D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
 359
 360            /* set current position */
 361            s->gp_regs[GPR_X] = x;
 362            s->gp_regs[GPR_Y] = y;
 363
 364            /* run microcode on this position */
 365            i = 0;
 366            while (pfpu_decode_insn(s)) {
 367                /* decode at most MICROCODE_WORDS instructions */
 368                if (++i >= MICROCODE_WORDS) {
 369                    error_report("milkymist_pfpu: too many instructions "
 370                            "executed in microcode. No VECTOUT?");
 371                    break;
 372                }
 373            }
 374
 375            /* reset pc for next run */
 376            s->regs[R_PC] = 0;
 377        }
 378    }
 379
 380    s->regs[R_VERTICES] = x * y;
 381
 382    trace_milkymist_pfpu_pulse_irq();
 383    qemu_irq_pulse(s->irq);
 384}
 385
 386static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
 387{
 388    return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
 389}
 390
 391static uint64_t pfpu_read(void *opaque, hwaddr addr,
 392                          unsigned size)
 393{
 394    MilkymistPFPUState *s = opaque;
 395    uint32_t r = 0;
 396
 397    addr >>= 2;
 398    switch (addr) {
 399    case R_CTL:
 400    case R_MESHBASE:
 401    case R_HMESHLAST:
 402    case R_VMESHLAST:
 403    case R_CODEPAGE:
 404    case R_VERTICES:
 405    case R_COLLISIONS:
 406    case R_STRAYWRITES:
 407    case R_LASTDMA:
 408    case R_PC:
 409    case R_DREGBASE:
 410    case R_CODEBASE:
 411        r = s->regs[addr];
 412        break;
 413    case GPR_BEGIN ... GPR_END:
 414        r = s->gp_regs[addr - GPR_BEGIN];
 415        break;
 416    case MICROCODE_BEGIN ...  MICROCODE_END:
 417        r = s->microcode[get_microcode_address(s, addr)];
 418        break;
 419
 420    default:
 421        error_report("milkymist_pfpu: read access to unknown register 0x"
 422                TARGET_FMT_plx, addr << 2);
 423        break;
 424    }
 425
 426    trace_milkymist_pfpu_memory_read(addr << 2, r);
 427
 428    return r;
 429}
 430
 431static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
 432                       unsigned size)
 433{
 434    MilkymistPFPUState *s = opaque;
 435
 436    trace_milkymist_pfpu_memory_write(addr, value);
 437
 438    addr >>= 2;
 439    switch (addr) {
 440    case R_CTL:
 441        if (value & CTL_START_BUSY) {
 442            pfpu_start(s);
 443        }
 444        break;
 445    case R_MESHBASE:
 446    case R_HMESHLAST:
 447    case R_VMESHLAST:
 448    case R_CODEPAGE:
 449    case R_VERTICES:
 450    case R_COLLISIONS:
 451    case R_STRAYWRITES:
 452    case R_LASTDMA:
 453    case R_PC:
 454    case R_DREGBASE:
 455    case R_CODEBASE:
 456        s->regs[addr] = value;
 457        break;
 458    case GPR_BEGIN ...  GPR_END:
 459        s->gp_regs[addr - GPR_BEGIN] = value;
 460        break;
 461    case MICROCODE_BEGIN ...  MICROCODE_END:
 462        s->microcode[get_microcode_address(s, addr)] = value;
 463        break;
 464
 465    default:
 466        error_report("milkymist_pfpu: write access to unknown register 0x"
 467                TARGET_FMT_plx, addr << 2);
 468        break;
 469    }
 470}
 471
 472static const MemoryRegionOps pfpu_mmio_ops = {
 473    .read = pfpu_read,
 474    .write = pfpu_write,
 475    .valid = {
 476        .min_access_size = 4,
 477        .max_access_size = 4,
 478    },
 479    .endianness = DEVICE_NATIVE_ENDIAN,
 480};
 481
 482static void milkymist_pfpu_reset(DeviceState *d)
 483{
 484    MilkymistPFPUState *s = MILKYMIST_PFPU(d);
 485    int i;
 486
 487    for (i = 0; i < R_MAX; i++) {
 488        s->regs[i] = 0;
 489    }
 490    for (i = 0; i < 128; i++) {
 491        s->gp_regs[i] = 0;
 492    }
 493    for (i = 0; i < MICROCODE_WORDS; i++) {
 494        s->microcode[i] = 0;
 495    }
 496    s->output_queue_pos = 0;
 497    for (i = 0; i < MAX_LATENCY; i++) {
 498        s->output_queue[i] = 0;
 499    }
 500}
 501
 502static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
 503{
 504    MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
 505    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 506
 507    sysbus_init_irq(sbd, &s->irq);
 508
 509    memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
 510            "milkymist-pfpu", MICROCODE_END * 4);
 511    sysbus_init_mmio(sbd, &s->regs_region);
 512}
 513
 514static const VMStateDescription vmstate_milkymist_pfpu = {
 515    .name = "milkymist-pfpu",
 516    .version_id = 1,
 517    .minimum_version_id = 1,
 518    .fields = (VMStateField[]) {
 519        VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
 520        VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
 521        VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
 522        VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
 523        VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
 524        VMSTATE_END_OF_LIST()
 525    }
 526};
 527
 528static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
 529{
 530    DeviceClass *dc = DEVICE_CLASS(klass);
 531
 532    dc->realize = milkymist_pfpu_realize;
 533    dc->reset = milkymist_pfpu_reset;
 534    dc->vmsd = &vmstate_milkymist_pfpu;
 535}
 536
 537static const TypeInfo milkymist_pfpu_info = {
 538    .name          = TYPE_MILKYMIST_PFPU,
 539    .parent        = TYPE_SYS_BUS_DEVICE,
 540    .instance_size = sizeof(MilkymistPFPUState),
 541    .class_init    = milkymist_pfpu_class_init,
 542};
 543
 544static void milkymist_pfpu_register_types(void)
 545{
 546    type_register_static(&milkymist_pfpu_info);
 547}
 548
 549type_init(milkymist_pfpu_register_types)
 550