qemu/hw/misc/milkymist-pfpu.c
<<
>>
Prefs
   1/*
   2 *  QEMU model of the Milkymist programmable FPU.
   3 *
   4 *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 *
  19 *
  20 * Specification available at:
  21 *   http://www.milkymist.org/socdoc/pfpu.pdf
  22 *
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "hw/hw.h"
  27#include "hw/sysbus.h"
  28#include "trace.h"
  29#include "qemu/log.h"
  30#include "qemu/error-report.h"
  31#include <math.h>
  32
  33/* #define TRACE_EXEC */
  34
  35#ifdef TRACE_EXEC
  36#    define D_EXEC(x) x
  37#else
  38#    define D_EXEC(x)
  39#endif
  40
  41enum {
  42    R_CTL = 0,
  43    R_MESHBASE,
  44    R_HMESHLAST,
  45    R_VMESHLAST,
  46    R_CODEPAGE,
  47    R_VERTICES,
  48    R_COLLISIONS,
  49    R_STRAYWRITES,
  50    R_LASTDMA,
  51    R_PC,
  52    R_DREGBASE,
  53    R_CODEBASE,
  54    R_MAX
  55};
  56
  57enum {
  58    CTL_START_BUSY = (1<<0),
  59};
  60
  61enum {
  62    OP_NOP = 0,
  63    OP_FADD,
  64    OP_FSUB,
  65    OP_FMUL,
  66    OP_FABS,
  67    OP_F2I,
  68    OP_I2F,
  69    OP_VECTOUT,
  70    OP_SIN,
  71    OP_COS,
  72    OP_ABOVE,
  73    OP_EQUAL,
  74    OP_COPY,
  75    OP_IF,
  76    OP_TSIGN,
  77    OP_QUAKE,
  78};
  79
  80enum {
  81    GPR_X = 0,
  82    GPR_Y = 1,
  83    GPR_FLAGS = 2,
  84};
  85
  86enum {
  87    LATENCY_FADD = 5,
  88    LATENCY_FSUB = 5,
  89    LATENCY_FMUL = 7,
  90    LATENCY_FABS = 2,
  91    LATENCY_F2I = 2,
  92    LATENCY_I2F = 3,
  93    LATENCY_VECTOUT = 0,
  94    LATENCY_SIN = 4,
  95    LATENCY_COS = 4,
  96    LATENCY_ABOVE = 2,
  97    LATENCY_EQUAL = 2,
  98    LATENCY_COPY = 2,
  99    LATENCY_IF = 2,
 100    LATENCY_TSIGN = 2,
 101    LATENCY_QUAKE = 2,
 102    MAX_LATENCY = 7
 103};
 104
 105#define GPR_BEGIN       0x100
 106#define GPR_END         0x17f
 107#define MICROCODE_BEGIN 0x200
 108#define MICROCODE_END   0x3ff
 109#define MICROCODE_WORDS 2048
 110
 111#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
 112
 113#ifdef TRACE_EXEC
 114static const char *opcode_to_str[] = {
 115    "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
 116    "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
 117};
 118#endif
 119
 120#define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
 121#define MILKYMIST_PFPU(obj) \
 122    OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU)
 123
 124struct MilkymistPFPUState {
 125    SysBusDevice parent_obj;
 126
 127    MemoryRegion regs_region;
 128    CharDriverState *chr;
 129    qemu_irq irq;
 130
 131    uint32_t regs[R_MAX];
 132    uint32_t gp_regs[128];
 133    uint32_t microcode[MICROCODE_WORDS];
 134
 135    int output_queue_pos;
 136    uint32_t output_queue[MAX_LATENCY];
 137};
 138typedef struct MilkymistPFPUState MilkymistPFPUState;
 139
 140static inline hwaddr
 141get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 142{
 143    return base + 8 * (128 * y + x);
 144}
 145
 146static inline void
 147output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
 148{
 149    s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
 150}
 151
 152static inline uint32_t
 153output_queue_remove(MilkymistPFPUState *s)
 154{
 155    return s->output_queue[s->output_queue_pos];
 156}
 157
 158static inline void
 159output_queue_advance(MilkymistPFPUState *s)
 160{
 161    s->output_queue[s->output_queue_pos] = 0;
 162    s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
 163}
 164
 165static int pfpu_decode_insn(MilkymistPFPUState *s)
 166{
 167    uint32_t pc = s->regs[R_PC];
 168    uint32_t insn = s->microcode[pc];
 169    uint32_t reg_a = (insn >> 18) & 0x7f;
 170    uint32_t reg_b = (insn >> 11) & 0x7f;
 171    uint32_t op = (insn >> 7) & 0xf;
 172    uint32_t reg_d = insn & 0x7f;
 173    uint32_t r = 0;
 174    int latency = 0;
 175
 176    switch (op) {
 177    case OP_NOP:
 178        break;
 179    case OP_FADD:
 180    {
 181        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 182        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 183        float t = a + b;
 184        r = REINTERPRET_CAST(uint32_t, t);
 185        latency = LATENCY_FADD;
 186        D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 187    } break;
 188    case OP_FSUB:
 189    {
 190        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 191        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 192        float t = a - b;
 193        r = REINTERPRET_CAST(uint32_t, t);
 194        latency = LATENCY_FSUB;
 195        D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 196    } break;
 197    case OP_FMUL:
 198    {
 199        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 200        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 201        float t = a * b;
 202        r = REINTERPRET_CAST(uint32_t, t);
 203        latency = LATENCY_FMUL;
 204        D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 205    } break;
 206    case OP_FABS:
 207    {
 208        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 209        float t = fabsf(a);
 210        r = REINTERPRET_CAST(uint32_t, t);
 211        latency = LATENCY_FABS;
 212        D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
 213    } break;
 214    case OP_F2I:
 215    {
 216        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 217        int32_t t = a;
 218        r = REINTERPRET_CAST(uint32_t, t);
 219        latency = LATENCY_F2I;
 220        D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
 221    } break;
 222    case OP_I2F:
 223    {
 224        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 225        float t = a;
 226        r = REINTERPRET_CAST(uint32_t, t);
 227        latency = LATENCY_I2F;
 228        D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
 229    } break;
 230    case OP_VECTOUT:
 231    {
 232        uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
 233        uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
 234        hwaddr dma_ptr =
 235            get_dma_address(s->regs[R_MESHBASE],
 236                    s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
 237        cpu_physical_memory_write(dma_ptr, &a, 4);
 238        cpu_physical_memory_write(dma_ptr + 4, &b, 4);
 239        s->regs[R_LASTDMA] = dma_ptr + 4;
 240        D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
 241        trace_milkymist_pfpu_vectout(a, b, dma_ptr);
 242    } break;
 243    case OP_SIN:
 244    {
 245        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 246        float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
 247        r = REINTERPRET_CAST(uint32_t, t);
 248        latency = LATENCY_SIN;
 249        D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
 250    } break;
 251    case OP_COS:
 252    {
 253        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 254        float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
 255        r = REINTERPRET_CAST(uint32_t, t);
 256        latency = LATENCY_COS;
 257        D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
 258    } break;
 259    case OP_ABOVE:
 260    {
 261        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 262        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 263        float t = (a > b) ? 1.0f : 0.0f;
 264        r = REINTERPRET_CAST(uint32_t, t);
 265        latency = LATENCY_ABOVE;
 266        D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 267    } break;
 268    case OP_EQUAL:
 269    {
 270        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 271        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 272        float t = (a == b) ? 1.0f : 0.0f;
 273        r = REINTERPRET_CAST(uint32_t, t);
 274        latency = LATENCY_EQUAL;
 275        D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 276    } break;
 277    case OP_COPY:
 278    {
 279        r = s->gp_regs[reg_a];
 280        latency = LATENCY_COPY;
 281        D_EXEC(qemu_log("COPY"));
 282    } break;
 283    case OP_IF:
 284    {
 285        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 286        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 287        uint32_t f = s->gp_regs[GPR_FLAGS];
 288        float t = (f != 0) ? a : b;
 289        r = REINTERPRET_CAST(uint32_t, t);
 290        latency = LATENCY_IF;
 291        D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
 292    } break;
 293    case OP_TSIGN:
 294    {
 295        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 296        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 297        float t = (b < 0) ? -a : a;
 298        r = REINTERPRET_CAST(uint32_t, t);
 299        latency = LATENCY_TSIGN;
 300        D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 301    } break;
 302    case OP_QUAKE:
 303    {
 304        uint32_t a = s->gp_regs[reg_a];
 305        r = 0x5f3759df - (a >> 1);
 306        latency = LATENCY_QUAKE;
 307        D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
 308    } break;
 309
 310    default:
 311        error_report("milkymist_pfpu: unknown opcode %d", op);
 312        break;
 313    }
 314
 315    if (!reg_d) {
 316        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
 317                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 318                    s->regs[R_PC] + latency));
 319    } else {
 320        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
 321                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 322                    s->regs[R_PC] + latency, reg_d));
 323    }
 324
 325    if (op == OP_VECTOUT) {
 326        return 0;
 327    }
 328
 329    /* store output for this cycle */
 330    if (reg_d) {
 331        uint32_t val = output_queue_remove(s);
 332        D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
 333        s->gp_regs[reg_d] = val;
 334    }
 335
 336    output_queue_advance(s);
 337
 338    /* store op output */
 339    if (op != OP_NOP) {
 340        output_queue_insert(s, r, latency-1);
 341    }
 342
 343    /* advance PC */
 344    s->regs[R_PC]++;
 345
 346    return 1;
 347};
 348
 349static void pfpu_start(MilkymistPFPUState *s)
 350{
 351    int x, y;
 352    int i;
 353
 354    for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
 355        for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
 356            D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
 357
 358            /* set current position */
 359            s->gp_regs[GPR_X] = x;
 360            s->gp_regs[GPR_Y] = y;
 361
 362            /* run microcode on this position */
 363            i = 0;
 364            while (pfpu_decode_insn(s)) {
 365                /* decode at most MICROCODE_WORDS instructions */
 366                if (++i >= MICROCODE_WORDS) {
 367                    error_report("milkymist_pfpu: too many instructions "
 368                            "executed in microcode. No VECTOUT?");
 369                    break;
 370                }
 371            }
 372
 373            /* reset pc for next run */
 374            s->regs[R_PC] = 0;
 375        }
 376    }
 377
 378    s->regs[R_VERTICES] = x * y;
 379
 380    trace_milkymist_pfpu_pulse_irq();
 381    qemu_irq_pulse(s->irq);
 382}
 383
 384static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
 385{
 386    return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
 387}
 388
 389static uint64_t pfpu_read(void *opaque, hwaddr addr,
 390                          unsigned size)
 391{
 392    MilkymistPFPUState *s = opaque;
 393    uint32_t r = 0;
 394
 395    addr >>= 2;
 396    switch (addr) {
 397    case R_CTL:
 398    case R_MESHBASE:
 399    case R_HMESHLAST:
 400    case R_VMESHLAST:
 401    case R_CODEPAGE:
 402    case R_VERTICES:
 403    case R_COLLISIONS:
 404    case R_STRAYWRITES:
 405    case R_LASTDMA:
 406    case R_PC:
 407    case R_DREGBASE:
 408    case R_CODEBASE:
 409        r = s->regs[addr];
 410        break;
 411    case GPR_BEGIN ... GPR_END:
 412        r = s->gp_regs[addr - GPR_BEGIN];
 413        break;
 414    case MICROCODE_BEGIN ...  MICROCODE_END:
 415        r = s->microcode[get_microcode_address(s, addr)];
 416        break;
 417
 418    default:
 419        error_report("milkymist_pfpu: read access to unknown register 0x"
 420                TARGET_FMT_plx, addr << 2);
 421        break;
 422    }
 423
 424    trace_milkymist_pfpu_memory_read(addr << 2, r);
 425
 426    return r;
 427}
 428
 429static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
 430                       unsigned size)
 431{
 432    MilkymistPFPUState *s = opaque;
 433
 434    trace_milkymist_pfpu_memory_write(addr, value);
 435
 436    addr >>= 2;
 437    switch (addr) {
 438    case R_CTL:
 439        if (value & CTL_START_BUSY) {
 440            pfpu_start(s);
 441        }
 442        break;
 443    case R_MESHBASE:
 444    case R_HMESHLAST:
 445    case R_VMESHLAST:
 446    case R_CODEPAGE:
 447    case R_VERTICES:
 448    case R_COLLISIONS:
 449    case R_STRAYWRITES:
 450    case R_LASTDMA:
 451    case R_PC:
 452    case R_DREGBASE:
 453    case R_CODEBASE:
 454        s->regs[addr] = value;
 455        break;
 456    case GPR_BEGIN ...  GPR_END:
 457        s->gp_regs[addr - GPR_BEGIN] = value;
 458        break;
 459    case MICROCODE_BEGIN ...  MICROCODE_END:
 460        s->microcode[get_microcode_address(s, addr)] = value;
 461        break;
 462
 463    default:
 464        error_report("milkymist_pfpu: write access to unknown register 0x"
 465                TARGET_FMT_plx, addr << 2);
 466        break;
 467    }
 468}
 469
 470static const MemoryRegionOps pfpu_mmio_ops = {
 471    .read = pfpu_read,
 472    .write = pfpu_write,
 473    .valid = {
 474        .min_access_size = 4,
 475        .max_access_size = 4,
 476    },
 477    .endianness = DEVICE_NATIVE_ENDIAN,
 478};
 479
 480static void milkymist_pfpu_reset(DeviceState *d)
 481{
 482    MilkymistPFPUState *s = MILKYMIST_PFPU(d);
 483    int i;
 484
 485    for (i = 0; i < R_MAX; i++) {
 486        s->regs[i] = 0;
 487    }
 488    for (i = 0; i < 128; i++) {
 489        s->gp_regs[i] = 0;
 490    }
 491    for (i = 0; i < MICROCODE_WORDS; i++) {
 492        s->microcode[i] = 0;
 493    }
 494    s->output_queue_pos = 0;
 495    for (i = 0; i < MAX_LATENCY; i++) {
 496        s->output_queue[i] = 0;
 497    }
 498}
 499
 500static int milkymist_pfpu_init(SysBusDevice *dev)
 501{
 502    MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
 503
 504    sysbus_init_irq(dev, &s->irq);
 505
 506    memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
 507            "milkymist-pfpu", MICROCODE_END * 4);
 508    sysbus_init_mmio(dev, &s->regs_region);
 509
 510    return 0;
 511}
 512
 513static const VMStateDescription vmstate_milkymist_pfpu = {
 514    .name = "milkymist-pfpu",
 515    .version_id = 1,
 516    .minimum_version_id = 1,
 517    .fields = (VMStateField[]) {
 518        VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
 519        VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
 520        VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
 521        VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
 522        VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
 523        VMSTATE_END_OF_LIST()
 524    }
 525};
 526
 527static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
 528{
 529    DeviceClass *dc = DEVICE_CLASS(klass);
 530    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 531
 532    k->init = milkymist_pfpu_init;
 533    dc->reset = milkymist_pfpu_reset;
 534    dc->vmsd = &vmstate_milkymist_pfpu;
 535}
 536
 537static const TypeInfo milkymist_pfpu_info = {
 538    .name          = TYPE_MILKYMIST_PFPU,
 539    .parent        = TYPE_SYS_BUS_DEVICE,
 540    .instance_size = sizeof(MilkymistPFPUState),
 541    .class_init    = milkymist_pfpu_class_init,
 542};
 543
 544static void milkymist_pfpu_register_types(void)
 545{
 546    type_register_static(&milkymist_pfpu_info);
 547}
 548
 549type_init(milkymist_pfpu_register_types)
 550