qemu/hw/milkymist-pfpu.c
<<
>>
Prefs
   1/*
   2 *  QEMU model of the Milkymist programmable FPU.
   3 *
   4 *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 *
  19 *
  20 * Specification available at:
  21 *   http://www.milkymist.org/socdoc/pfpu.pdf
  22 *
  23 */
  24
  25#include "hw.h"
  26#include "sysbus.h"
  27#include "trace.h"
  28#include "qemu-log.h"
  29#include "qemu-error.h"
  30#include <math.h>
  31
  32/* #define TRACE_EXEC */
  33
  34#ifdef TRACE_EXEC
  35#    define D_EXEC(x) x
  36#else
  37#    define D_EXEC(x)
  38#endif
  39
  40enum {
  41    R_CTL = 0,
  42    R_MESHBASE,
  43    R_HMESHLAST,
  44    R_VMESHLAST,
  45    R_CODEPAGE,
  46    R_VERTICES,
  47    R_COLLISIONS,
  48    R_STRAYWRITES,
  49    R_LASTDMA,
  50    R_PC,
  51    R_DREGBASE,
  52    R_CODEBASE,
  53    R_MAX
  54};
  55
  56enum {
  57    CTL_START_BUSY = (1<<0),
  58};
  59
  60enum {
  61    OP_NOP = 0,
  62    OP_FADD,
  63    OP_FSUB,
  64    OP_FMUL,
  65    OP_FABS,
  66    OP_F2I,
  67    OP_I2F,
  68    OP_VECTOUT,
  69    OP_SIN,
  70    OP_COS,
  71    OP_ABOVE,
  72    OP_EQUAL,
  73    OP_COPY,
  74    OP_IF,
  75    OP_TSIGN,
  76    OP_QUAKE,
  77};
  78
  79enum {
  80    GPR_X = 0,
  81    GPR_Y = 1,
  82    GPR_FLAGS = 2,
  83};
  84
  85enum {
  86    LATENCY_FADD = 5,
  87    LATENCY_FSUB = 5,
  88    LATENCY_FMUL = 7,
  89    LATENCY_FABS = 2,
  90    LATENCY_F2I = 2,
  91    LATENCY_I2F = 3,
  92    LATENCY_VECTOUT = 0,
  93    LATENCY_SIN = 4,
  94    LATENCY_COS = 4,
  95    LATENCY_ABOVE = 2,
  96    LATENCY_EQUAL = 2,
  97    LATENCY_COPY = 2,
  98    LATENCY_IF = 2,
  99    LATENCY_TSIGN = 2,
 100    LATENCY_QUAKE = 2,
 101    MAX_LATENCY = 7
 102};
 103
 104#define GPR_BEGIN       0x100
 105#define GPR_END         0x17f
 106#define MICROCODE_BEGIN 0x200
 107#define MICROCODE_END   0x3ff
 108#define MICROCODE_WORDS 2048
 109
 110#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
 111
 112#ifdef TRACE_EXEC
 113static const char *opcode_to_str[] = {
 114    "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
 115    "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
 116};
 117#endif
 118
 119struct MilkymistPFPUState {
 120    SysBusDevice busdev;
 121    MemoryRegion regs_region;
 122    CharDriverState *chr;
 123    qemu_irq irq;
 124
 125    uint32_t regs[R_MAX];
 126    uint32_t gp_regs[128];
 127    uint32_t microcode[MICROCODE_WORDS];
 128
 129    int output_queue_pos;
 130    uint32_t output_queue[MAX_LATENCY];
 131};
 132typedef struct MilkymistPFPUState MilkymistPFPUState;
 133
 134static inline target_phys_addr_t
 135get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 136{
 137    return base + 8 * (128 * y + x);
 138}
 139
 140static inline void
 141output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
 142{
 143    s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
 144}
 145
 146static inline uint32_t
 147output_queue_remove(MilkymistPFPUState *s)
 148{
 149    return s->output_queue[s->output_queue_pos];
 150}
 151
 152static inline void
 153output_queue_advance(MilkymistPFPUState *s)
 154{
 155    s->output_queue[s->output_queue_pos] = 0;
 156    s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
 157}
 158
 159static int pfpu_decode_insn(MilkymistPFPUState *s)
 160{
 161    uint32_t pc = s->regs[R_PC];
 162    uint32_t insn = s->microcode[pc];
 163    uint32_t reg_a = (insn >> 18) & 0x7f;
 164    uint32_t reg_b = (insn >> 11) & 0x7f;
 165    uint32_t op = (insn >> 7) & 0xf;
 166    uint32_t reg_d = insn & 0x7f;
 167    uint32_t r = 0;
 168    int latency = 0;
 169
 170    switch (op) {
 171    case OP_NOP:
 172        break;
 173    case OP_FADD:
 174    {
 175        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 176        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 177        float t = a + b;
 178        r = REINTERPRET_CAST(uint32_t, t);
 179        latency = LATENCY_FADD;
 180        D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 181    } break;
 182    case OP_FSUB:
 183    {
 184        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 185        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 186        float t = a - b;
 187        r = REINTERPRET_CAST(uint32_t, t);
 188        latency = LATENCY_FSUB;
 189        D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 190    } break;
 191    case OP_FMUL:
 192    {
 193        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 194        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 195        float t = a * b;
 196        r = REINTERPRET_CAST(uint32_t, t);
 197        latency = LATENCY_FMUL;
 198        D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 199    } break;
 200    case OP_FABS:
 201    {
 202        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 203        float t = fabsf(a);
 204        r = REINTERPRET_CAST(uint32_t, t);
 205        latency = LATENCY_FABS;
 206        D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
 207    } break;
 208    case OP_F2I:
 209    {
 210        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 211        int32_t t = a;
 212        r = REINTERPRET_CAST(uint32_t, t);
 213        latency = LATENCY_F2I;
 214        D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
 215    } break;
 216    case OP_I2F:
 217    {
 218        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 219        float t = a;
 220        r = REINTERPRET_CAST(uint32_t, t);
 221        latency = LATENCY_I2F;
 222        D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
 223    } break;
 224    case OP_VECTOUT:
 225    {
 226        uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
 227        uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
 228        target_phys_addr_t dma_ptr =
 229            get_dma_address(s->regs[R_MESHBASE],
 230                    s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
 231        cpu_physical_memory_write(dma_ptr, (uint8_t *)&a, 4);
 232        cpu_physical_memory_write(dma_ptr + 4, (uint8_t *)&b, 4);
 233        s->regs[R_LASTDMA] = dma_ptr + 4;
 234        D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
 235        trace_milkymist_pfpu_vectout(a, b, dma_ptr);
 236    } break;
 237    case OP_SIN:
 238    {
 239        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 240        float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
 241        r = REINTERPRET_CAST(uint32_t, t);
 242        latency = LATENCY_SIN;
 243        D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
 244    } break;
 245    case OP_COS:
 246    {
 247        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 248        float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
 249        r = REINTERPRET_CAST(uint32_t, t);
 250        latency = LATENCY_COS;
 251        D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
 252    } break;
 253    case OP_ABOVE:
 254    {
 255        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 256        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 257        float t = (a > b) ? 1.0f : 0.0f;
 258        r = REINTERPRET_CAST(uint32_t, t);
 259        latency = LATENCY_ABOVE;
 260        D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 261    } break;
 262    case OP_EQUAL:
 263    {
 264        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 265        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 266        float t = (a == b) ? 1.0f : 0.0f;
 267        r = REINTERPRET_CAST(uint32_t, t);
 268        latency = LATENCY_EQUAL;
 269        D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 270    } break;
 271    case OP_COPY:
 272    {
 273        r = s->gp_regs[reg_a];
 274        latency = LATENCY_COPY;
 275        D_EXEC(qemu_log("COPY"));
 276    } break;
 277    case OP_IF:
 278    {
 279        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 280        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 281        uint32_t f = s->gp_regs[GPR_FLAGS];
 282        float t = (f != 0) ? a : b;
 283        r = REINTERPRET_CAST(uint32_t, t);
 284        latency = LATENCY_IF;
 285        D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
 286    } break;
 287    case OP_TSIGN:
 288    {
 289        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 290        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 291        float t = (b < 0) ? -a : a;
 292        r = REINTERPRET_CAST(uint32_t, t);
 293        latency = LATENCY_TSIGN;
 294        D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 295    } break;
 296    case OP_QUAKE:
 297    {
 298        uint32_t a = s->gp_regs[reg_a];
 299        r = 0x5f3759df - (a >> 1);
 300        latency = LATENCY_QUAKE;
 301        D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
 302    } break;
 303
 304    default:
 305        error_report("milkymist_pfpu: unknown opcode %d", op);
 306        break;
 307    }
 308
 309    if (!reg_d) {
 310        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
 311                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 312                    s->regs[R_PC] + latency));
 313    } else {
 314        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
 315                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 316                    s->regs[R_PC] + latency, reg_d));
 317    }
 318
 319    if (op == OP_VECTOUT) {
 320        return 0;
 321    }
 322
 323    /* store output for this cycle */
 324    if (reg_d) {
 325        uint32_t val = output_queue_remove(s);
 326        D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
 327        s->gp_regs[reg_d] = val;
 328    }
 329
 330    output_queue_advance(s);
 331
 332    /* store op output */
 333    if (op != OP_NOP) {
 334        output_queue_insert(s, r, latency-1);
 335    }
 336
 337    /* advance PC */
 338    s->regs[R_PC]++;
 339
 340    return 1;
 341};
 342
 343static void pfpu_start(MilkymistPFPUState *s)
 344{
 345    int x, y;
 346    int i;
 347
 348    for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
 349        for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
 350            D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
 351
 352            /* set current position */
 353            s->gp_regs[GPR_X] = x;
 354            s->gp_regs[GPR_Y] = y;
 355
 356            /* run microcode on this position */
 357            i = 0;
 358            while (pfpu_decode_insn(s)) {
 359                /* decode at most MICROCODE_WORDS instructions */
 360                if (i++ >= MICROCODE_WORDS) {
 361                    error_report("milkymist_pfpu: too many instructions "
 362                            "executed in microcode. No VECTOUT?");
 363                    break;
 364                }
 365            }
 366
 367            /* reset pc for next run */
 368            s->regs[R_PC] = 0;
 369        }
 370    }
 371
 372    s->regs[R_VERTICES] = x * y;
 373
 374    trace_milkymist_pfpu_pulse_irq();
 375    qemu_irq_pulse(s->irq);
 376}
 377
 378static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
 379{
 380    return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
 381}
 382
 383static uint64_t pfpu_read(void *opaque, target_phys_addr_t addr,
 384                          unsigned size)
 385{
 386    MilkymistPFPUState *s = opaque;
 387    uint32_t r = 0;
 388
 389    addr >>= 2;
 390    switch (addr) {
 391    case R_CTL:
 392    case R_MESHBASE:
 393    case R_HMESHLAST:
 394    case R_VMESHLAST:
 395    case R_CODEPAGE:
 396    case R_VERTICES:
 397    case R_COLLISIONS:
 398    case R_STRAYWRITES:
 399    case R_LASTDMA:
 400    case R_PC:
 401    case R_DREGBASE:
 402    case R_CODEBASE:
 403        r = s->regs[addr];
 404        break;
 405    case GPR_BEGIN ... GPR_END:
 406        r = s->gp_regs[addr - GPR_BEGIN];
 407        break;
 408    case MICROCODE_BEGIN ...  MICROCODE_END:
 409        r = s->microcode[get_microcode_address(s, addr)];
 410        break;
 411
 412    default:
 413        error_report("milkymist_pfpu: read access to unknown register 0x"
 414                TARGET_FMT_plx, addr << 2);
 415        break;
 416    }
 417
 418    trace_milkymist_pfpu_memory_read(addr << 2, r);
 419
 420    return r;
 421}
 422
 423static void pfpu_write(void *opaque, target_phys_addr_t addr, uint64_t value,
 424                       unsigned size)
 425{
 426    MilkymistPFPUState *s = opaque;
 427
 428    trace_milkymist_pfpu_memory_write(addr, value);
 429
 430    addr >>= 2;
 431    switch (addr) {
 432    case R_CTL:
 433        if (value & CTL_START_BUSY) {
 434            pfpu_start(s);
 435        }
 436        break;
 437    case R_MESHBASE:
 438    case R_HMESHLAST:
 439    case R_VMESHLAST:
 440    case R_CODEPAGE:
 441    case R_VERTICES:
 442    case R_COLLISIONS:
 443    case R_STRAYWRITES:
 444    case R_LASTDMA:
 445    case R_PC:
 446    case R_DREGBASE:
 447    case R_CODEBASE:
 448        s->regs[addr] = value;
 449        break;
 450    case GPR_BEGIN ...  GPR_END:
 451        s->gp_regs[addr - GPR_BEGIN] = value;
 452        break;
 453    case MICROCODE_BEGIN ...  MICROCODE_END:
 454        s->microcode[get_microcode_address(s, addr)] = value;
 455        break;
 456
 457    default:
 458        error_report("milkymist_pfpu: write access to unknown register 0x"
 459                TARGET_FMT_plx, addr << 2);
 460        break;
 461    }
 462}
 463
 464static const MemoryRegionOps pfpu_mmio_ops = {
 465    .read = pfpu_read,
 466    .write = pfpu_write,
 467    .valid = {
 468        .min_access_size = 4,
 469        .max_access_size = 4,
 470    },
 471    .endianness = DEVICE_NATIVE_ENDIAN,
 472};
 473
 474static void milkymist_pfpu_reset(DeviceState *d)
 475{
 476    MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev);
 477    int i;
 478
 479    for (i = 0; i < R_MAX; i++) {
 480        s->regs[i] = 0;
 481    }
 482    for (i = 0; i < 128; i++) {
 483        s->gp_regs[i] = 0;
 484    }
 485    for (i = 0; i < MICROCODE_WORDS; i++) {
 486        s->microcode[i] = 0;
 487    }
 488    s->output_queue_pos = 0;
 489    for (i = 0; i < MAX_LATENCY; i++) {
 490        s->output_queue[i] = 0;
 491    }
 492}
 493
 494static int milkymist_pfpu_init(SysBusDevice *dev)
 495{
 496    MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev);
 497
 498    sysbus_init_irq(dev, &s->irq);
 499
 500    memory_region_init_io(&s->regs_region, &pfpu_mmio_ops, s,
 501            "milkymist-pfpu", MICROCODE_END * 4);
 502    sysbus_init_mmio_region(dev, &s->regs_region);
 503
 504    return 0;
 505}
 506
 507static const VMStateDescription vmstate_milkymist_pfpu = {
 508    .name = "milkymist-pfpu",
 509    .version_id = 1,
 510    .minimum_version_id = 1,
 511    .minimum_version_id_old = 1,
 512    .fields      = (VMStateField[]) {
 513        VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
 514        VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
 515        VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
 516        VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
 517        VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
 518        VMSTATE_END_OF_LIST()
 519    }
 520};
 521
 522static SysBusDeviceInfo milkymist_pfpu_info = {
 523    .init = milkymist_pfpu_init,
 524    .qdev.name  = "milkymist-pfpu",
 525    .qdev.size  = sizeof(MilkymistPFPUState),
 526    .qdev.vmsd  = &vmstate_milkymist_pfpu,
 527    .qdev.reset = milkymist_pfpu_reset,
 528};
 529
 530static void milkymist_pfpu_register(void)
 531{
 532    sysbus_register_withprop(&milkymist_pfpu_info);
 533}
 534
 535device_init(milkymist_pfpu_register)
 536