1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "hw/hw.h"
27#include "hw/sysbus.h"
28#include "trace.h"
29#include "qemu/log.h"
30#include "qemu/error-report.h"
31#include <math.h>
32
33
34
35#ifdef TRACE_EXEC
36# define D_EXEC(x) x
37#else
38# define D_EXEC(x)
39#endif
40
41enum {
42 R_CTL = 0,
43 R_MESHBASE,
44 R_HMESHLAST,
45 R_VMESHLAST,
46 R_CODEPAGE,
47 R_VERTICES,
48 R_COLLISIONS,
49 R_STRAYWRITES,
50 R_LASTDMA,
51 R_PC,
52 R_DREGBASE,
53 R_CODEBASE,
54 R_MAX
55};
56
57enum {
58 CTL_START_BUSY = (1<<0),
59};
60
61enum {
62 OP_NOP = 0,
63 OP_FADD,
64 OP_FSUB,
65 OP_FMUL,
66 OP_FABS,
67 OP_F2I,
68 OP_I2F,
69 OP_VECTOUT,
70 OP_SIN,
71 OP_COS,
72 OP_ABOVE,
73 OP_EQUAL,
74 OP_COPY,
75 OP_IF,
76 OP_TSIGN,
77 OP_QUAKE,
78};
79
80enum {
81 GPR_X = 0,
82 GPR_Y = 1,
83 GPR_FLAGS = 2,
84};
85
86enum {
87 LATENCY_FADD = 5,
88 LATENCY_FSUB = 5,
89 LATENCY_FMUL = 7,
90 LATENCY_FABS = 2,
91 LATENCY_F2I = 2,
92 LATENCY_I2F = 3,
93 LATENCY_VECTOUT = 0,
94 LATENCY_SIN = 4,
95 LATENCY_COS = 4,
96 LATENCY_ABOVE = 2,
97 LATENCY_EQUAL = 2,
98 LATENCY_COPY = 2,
99 LATENCY_IF = 2,
100 LATENCY_TSIGN = 2,
101 LATENCY_QUAKE = 2,
102 MAX_LATENCY = 7
103};
104
105#define GPR_BEGIN 0x100
106#define GPR_END 0x17f
107#define MICROCODE_BEGIN 0x200
108#define MICROCODE_END 0x3ff
109#define MICROCODE_WORDS 2048
110
111#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
112
113#ifdef TRACE_EXEC
114static const char *opcode_to_str[] = {
115 "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
116 "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
117};
118#endif
119
120#define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
121#define MILKYMIST_PFPU(obj) \
122 OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU)
123
124struct MilkymistPFPUState {
125 SysBusDevice parent_obj;
126
127 MemoryRegion regs_region;
128 CharDriverState *chr;
129 qemu_irq irq;
130
131 uint32_t regs[R_MAX];
132 uint32_t gp_regs[128];
133 uint32_t microcode[MICROCODE_WORDS];
134
135 int output_queue_pos;
136 uint32_t output_queue[MAX_LATENCY];
137};
138typedef struct MilkymistPFPUState MilkymistPFPUState;
139
140static inline hwaddr
141get_dma_address(uint32_t base, uint32_t x, uint32_t y)
142{
143 return base + 8 * (128 * y + x);
144}
145
146static inline void
147output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
148{
149 s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
150}
151
152static inline uint32_t
153output_queue_remove(MilkymistPFPUState *s)
154{
155 return s->output_queue[s->output_queue_pos];
156}
157
158static inline void
159output_queue_advance(MilkymistPFPUState *s)
160{
161 s->output_queue[s->output_queue_pos] = 0;
162 s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
163}
164
165static int pfpu_decode_insn(MilkymistPFPUState *s)
166{
167 uint32_t pc = s->regs[R_PC];
168 uint32_t insn = s->microcode[pc];
169 uint32_t reg_a = (insn >> 18) & 0x7f;
170 uint32_t reg_b = (insn >> 11) & 0x7f;
171 uint32_t op = (insn >> 7) & 0xf;
172 uint32_t reg_d = insn & 0x7f;
173 uint32_t r = 0;
174 int latency = 0;
175
176 switch (op) {
177 case OP_NOP:
178 break;
179 case OP_FADD:
180 {
181 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
182 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
183 float t = a + b;
184 r = REINTERPRET_CAST(uint32_t, t);
185 latency = LATENCY_FADD;
186 D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
187 } break;
188 case OP_FSUB:
189 {
190 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
191 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
192 float t = a - b;
193 r = REINTERPRET_CAST(uint32_t, t);
194 latency = LATENCY_FSUB;
195 D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
196 } break;
197 case OP_FMUL:
198 {
199 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
200 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
201 float t = a * b;
202 r = REINTERPRET_CAST(uint32_t, t);
203 latency = LATENCY_FMUL;
204 D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
205 } break;
206 case OP_FABS:
207 {
208 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
209 float t = fabsf(a);
210 r = REINTERPRET_CAST(uint32_t, t);
211 latency = LATENCY_FABS;
212 D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
213 } break;
214 case OP_F2I:
215 {
216 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
217 int32_t t = a;
218 r = REINTERPRET_CAST(uint32_t, t);
219 latency = LATENCY_F2I;
220 D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
221 } break;
222 case OP_I2F:
223 {
224 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
225 float t = a;
226 r = REINTERPRET_CAST(uint32_t, t);
227 latency = LATENCY_I2F;
228 D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
229 } break;
230 case OP_VECTOUT:
231 {
232 uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
233 uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
234 hwaddr dma_ptr =
235 get_dma_address(s->regs[R_MESHBASE],
236 s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
237 cpu_physical_memory_write(dma_ptr, &a, 4);
238 cpu_physical_memory_write(dma_ptr + 4, &b, 4);
239 s->regs[R_LASTDMA] = dma_ptr + 4;
240 D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
241 trace_milkymist_pfpu_vectout(a, b, dma_ptr);
242 } break;
243 case OP_SIN:
244 {
245 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
246 float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
247 r = REINTERPRET_CAST(uint32_t, t);
248 latency = LATENCY_SIN;
249 D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
250 } break;
251 case OP_COS:
252 {
253 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
254 float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
255 r = REINTERPRET_CAST(uint32_t, t);
256 latency = LATENCY_COS;
257 D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
258 } break;
259 case OP_ABOVE:
260 {
261 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
262 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
263 float t = (a > b) ? 1.0f : 0.0f;
264 r = REINTERPRET_CAST(uint32_t, t);
265 latency = LATENCY_ABOVE;
266 D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
267 } break;
268 case OP_EQUAL:
269 {
270 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
271 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
272 float t = (a == b) ? 1.0f : 0.0f;
273 r = REINTERPRET_CAST(uint32_t, t);
274 latency = LATENCY_EQUAL;
275 D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
276 } break;
277 case OP_COPY:
278 {
279 r = s->gp_regs[reg_a];
280 latency = LATENCY_COPY;
281 D_EXEC(qemu_log("COPY"));
282 } break;
283 case OP_IF:
284 {
285 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
286 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
287 uint32_t f = s->gp_regs[GPR_FLAGS];
288 float t = (f != 0) ? a : b;
289 r = REINTERPRET_CAST(uint32_t, t);
290 latency = LATENCY_IF;
291 D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
292 } break;
293 case OP_TSIGN:
294 {
295 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
296 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
297 float t = (b < 0) ? -a : a;
298 r = REINTERPRET_CAST(uint32_t, t);
299 latency = LATENCY_TSIGN;
300 D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
301 } break;
302 case OP_QUAKE:
303 {
304 uint32_t a = s->gp_regs[reg_a];
305 r = 0x5f3759df - (a >> 1);
306 latency = LATENCY_QUAKE;
307 D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
308 } break;
309
310 default:
311 error_report("milkymist_pfpu: unknown opcode %d", op);
312 break;
313 }
314
315 if (!reg_d) {
316 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
317 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
318 s->regs[R_PC] + latency));
319 } else {
320 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
321 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
322 s->regs[R_PC] + latency, reg_d));
323 }
324
325 if (op == OP_VECTOUT) {
326 return 0;
327 }
328
329
330 if (reg_d) {
331 uint32_t val = output_queue_remove(s);
332 D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
333 s->gp_regs[reg_d] = val;
334 }
335
336 output_queue_advance(s);
337
338
339 if (op != OP_NOP) {
340 output_queue_insert(s, r, latency-1);
341 }
342
343
344 s->regs[R_PC]++;
345
346 return 1;
347};
348
349static void pfpu_start(MilkymistPFPUState *s)
350{
351 int x, y;
352 int i;
353
354 for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
355 for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
356 D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
357
358
359 s->gp_regs[GPR_X] = x;
360 s->gp_regs[GPR_Y] = y;
361
362
363 i = 0;
364 while (pfpu_decode_insn(s)) {
365
366 if (++i >= MICROCODE_WORDS) {
367 error_report("milkymist_pfpu: too many instructions "
368 "executed in microcode. No VECTOUT?");
369 break;
370 }
371 }
372
373
374 s->regs[R_PC] = 0;
375 }
376 }
377
378 s->regs[R_VERTICES] = x * y;
379
380 trace_milkymist_pfpu_pulse_irq();
381 qemu_irq_pulse(s->irq);
382}
383
384static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
385{
386 return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
387}
388
389static uint64_t pfpu_read(void *opaque, hwaddr addr,
390 unsigned size)
391{
392 MilkymistPFPUState *s = opaque;
393 uint32_t r = 0;
394
395 addr >>= 2;
396 switch (addr) {
397 case R_CTL:
398 case R_MESHBASE:
399 case R_HMESHLAST:
400 case R_VMESHLAST:
401 case R_CODEPAGE:
402 case R_VERTICES:
403 case R_COLLISIONS:
404 case R_STRAYWRITES:
405 case R_LASTDMA:
406 case R_PC:
407 case R_DREGBASE:
408 case R_CODEBASE:
409 r = s->regs[addr];
410 break;
411 case GPR_BEGIN ... GPR_END:
412 r = s->gp_regs[addr - GPR_BEGIN];
413 break;
414 case MICROCODE_BEGIN ... MICROCODE_END:
415 r = s->microcode[get_microcode_address(s, addr)];
416 break;
417
418 default:
419 error_report("milkymist_pfpu: read access to unknown register 0x"
420 TARGET_FMT_plx, addr << 2);
421 break;
422 }
423
424 trace_milkymist_pfpu_memory_read(addr << 2, r);
425
426 return r;
427}
428
429static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
430 unsigned size)
431{
432 MilkymistPFPUState *s = opaque;
433
434 trace_milkymist_pfpu_memory_write(addr, value);
435
436 addr >>= 2;
437 switch (addr) {
438 case R_CTL:
439 if (value & CTL_START_BUSY) {
440 pfpu_start(s);
441 }
442 break;
443 case R_MESHBASE:
444 case R_HMESHLAST:
445 case R_VMESHLAST:
446 case R_CODEPAGE:
447 case R_VERTICES:
448 case R_COLLISIONS:
449 case R_STRAYWRITES:
450 case R_LASTDMA:
451 case R_PC:
452 case R_DREGBASE:
453 case R_CODEBASE:
454 s->regs[addr] = value;
455 break;
456 case GPR_BEGIN ... GPR_END:
457 s->gp_regs[addr - GPR_BEGIN] = value;
458 break;
459 case MICROCODE_BEGIN ... MICROCODE_END:
460 s->microcode[get_microcode_address(s, addr)] = value;
461 break;
462
463 default:
464 error_report("milkymist_pfpu: write access to unknown register 0x"
465 TARGET_FMT_plx, addr << 2);
466 break;
467 }
468}
469
470static const MemoryRegionOps pfpu_mmio_ops = {
471 .read = pfpu_read,
472 .write = pfpu_write,
473 .valid = {
474 .min_access_size = 4,
475 .max_access_size = 4,
476 },
477 .endianness = DEVICE_NATIVE_ENDIAN,
478};
479
480static void milkymist_pfpu_reset(DeviceState *d)
481{
482 MilkymistPFPUState *s = MILKYMIST_PFPU(d);
483 int i;
484
485 for (i = 0; i < R_MAX; i++) {
486 s->regs[i] = 0;
487 }
488 for (i = 0; i < 128; i++) {
489 s->gp_regs[i] = 0;
490 }
491 for (i = 0; i < MICROCODE_WORDS; i++) {
492 s->microcode[i] = 0;
493 }
494 s->output_queue_pos = 0;
495 for (i = 0; i < MAX_LATENCY; i++) {
496 s->output_queue[i] = 0;
497 }
498}
499
500static int milkymist_pfpu_init(SysBusDevice *dev)
501{
502 MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
503
504 sysbus_init_irq(dev, &s->irq);
505
506 memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
507 "milkymist-pfpu", MICROCODE_END * 4);
508 sysbus_init_mmio(dev, &s->regs_region);
509
510 return 0;
511}
512
513static const VMStateDescription vmstate_milkymist_pfpu = {
514 .name = "milkymist-pfpu",
515 .version_id = 1,
516 .minimum_version_id = 1,
517 .fields = (VMStateField[]) {
518 VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
519 VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
520 VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
521 VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
522 VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
523 VMSTATE_END_OF_LIST()
524 }
525};
526
527static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
528{
529 DeviceClass *dc = DEVICE_CLASS(klass);
530 SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
531
532 k->init = milkymist_pfpu_init;
533 dc->reset = milkymist_pfpu_reset;
534 dc->vmsd = &vmstate_milkymist_pfpu;
535}
536
537static const TypeInfo milkymist_pfpu_info = {
538 .name = TYPE_MILKYMIST_PFPU,
539 .parent = TYPE_SYS_BUS_DEVICE,
540 .instance_size = sizeof(MilkymistPFPUState),
541 .class_init = milkymist_pfpu_class_init,
542};
543
544static void milkymist_pfpu_register_types(void)
545{
546 type_register_static(&milkymist_pfpu_info);
547}
548
549type_init(milkymist_pfpu_register_types)
550