1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include "isp.h"
16#include "vmem.h"
17#include "vmem_local.h"
18
19#if !defined(HRT_MEMORY_ACCESS)
20#include "ia_css_device_access.h"
21#endif
22#include "assert_support.h"
23#include "platform_support.h"
24
25typedef unsigned long long hive_uedge;
26typedef hive_uedge *hive_wide;
27
28
29
30
31#define SUBWORD(w, start, end) (((w) & (((1ULL << ((end)-1))-1) << 1 | 1)) >> (start))
32
33
34#define INV_SUBWORD(w, start, end) ((w) & (~(((1ULL << ((end)-1))-1) << 1 | 1) | ((1ULL << (start))-1)) )
35
36#define uedge_bits (8*sizeof(hive_uedge))
37#define move_lower_bits(target, target_bit, src, src_bit) move_subword(target, target_bit, src, 0, src_bit)
38#define move_upper_bits(target, target_bit, src, src_bit) move_subword(target, target_bit, src, src_bit, uedge_bits)
39#define move_word(target, target_bit, src) move_subword(target, target_bit, src, 0, uedge_bits)
40
41static void
42move_subword (
43 hive_uedge *target,
44 unsigned target_bit,
45 hive_uedge src,
46 unsigned src_start,
47 unsigned src_end)
48{
49 unsigned int start_elem = target_bit / uedge_bits;
50 unsigned int start_bit = target_bit % uedge_bits;
51 unsigned subword_width = src_end - src_start;
52
53 hive_uedge src_subword = SUBWORD(src, src_start, src_end);
54
55 if (subword_width + start_bit > uedge_bits) {
56 hive_uedge old_val1;
57 hive_uedge old_val0 = INV_SUBWORD(target[start_elem], start_bit, uedge_bits);
58 target[start_elem] = old_val0 | (src_subword << start_bit);
59 old_val1 = INV_SUBWORD(target[start_elem+1], 0, subword_width + start_bit - uedge_bits);
60 target[start_elem+1] = old_val1 | (src_subword >> ( uedge_bits - start_bit));
61 } else {
62 hive_uedge old_val = INV_SUBWORD(target[start_elem], start_bit, start_bit + subword_width);
63 target[start_elem] = old_val | (src_subword << start_bit);
64 }
65}
66
67static void
68hive_sim_wide_unpack(
69 hive_wide vector,
70 hive_wide elem,
71 hive_uint elem_bits,
72 hive_uint index)
73{
74
75 unsigned int start_elem = (elem_bits * index) / uedge_bits;
76 unsigned int start_bit = (elem_bits * index) % uedge_bits;
77 unsigned int end_elem = (elem_bits * (index + 1) - 1) / uedge_bits;
78 unsigned int end_bit = ((elem_bits * (index + 1) - 1) % uedge_bits) + 1;
79
80 if (elem_bits == uedge_bits) {
81
82 elem[0] = vector[index];
83 } else if (start_elem == end_elem) {
84
85 move_subword(elem, 0, vector[start_elem], start_bit, end_bit);
86 } else {
87
88 unsigned int bits_written = 0;
89 unsigned int i;
90 move_upper_bits(elem, bits_written, vector[start_elem], start_bit);
91 bits_written += (64 - start_bit);
92 for(i = start_elem+1; i < end_elem; i++) {
93 move_word(elem, bits_written, vector[i]);
94 bits_written += uedge_bits;
95 }
96 move_lower_bits(elem, bits_written , vector[end_elem], end_bit);
97 }
98}
99
100static void
101hive_sim_wide_pack(
102 hive_wide vector,
103 hive_wide elem,
104 hive_uint elem_bits,
105 hive_uint index)
106{
107
108 unsigned int start_elem = (elem_bits * index) / uedge_bits;
109
110
111 if (elem_bits == uedge_bits) {
112 vector[start_elem] = elem[0];
113 } else if (elem_bits > uedge_bits) {
114 unsigned bits_to_write = elem_bits;
115 unsigned start_bit = elem_bits * index;
116 unsigned i = 0;
117 for(; bits_to_write > uedge_bits; bits_to_write -= uedge_bits, i++, start_bit += uedge_bits) {
118 move_word(vector, start_bit, elem[i]);
119 }
120 move_lower_bits(vector, start_bit, elem[i], bits_to_write);
121 } else {
122
123 move_lower_bits(vector, elem_bits * index, elem[0], elem_bits);
124 }
125}
126
127static void load_vector (
128 const isp_ID_t ID,
129 t_vmem_elem *to,
130 const t_vmem_elem *from)
131{
132 unsigned i;
133 hive_uedge *data;
134 unsigned size = sizeof(short)*ISP_NWAY;
135 VMEM_ARRAY(v, 2*ISP_NWAY);
136 assert(ISP_BAMEM_BASE[ID] != (hrt_address)-1);
137#if !defined(HRT_MEMORY_ACCESS)
138 ia_css_device_load(ISP_BAMEM_BASE[ID] + (unsigned long)from, &v[0][0], size);
139#else
140 hrt_master_port_load(ISP_BAMEM_BASE[ID] + (unsigned long)from, &v[0][0], size);
141#endif
142 data = (hive_uedge *)v;
143 for (i = 0; i < ISP_NWAY; i++) {
144 hive_uedge elem = 0;
145 hive_sim_wide_unpack(data, &elem, ISP_VEC_ELEMBITS, i);
146 to[i] = elem;
147 }
148 hrt_sleep();
149}
150
151static void store_vector (
152 const isp_ID_t ID,
153 t_vmem_elem *to,
154 const t_vmem_elem *from)
155{
156 unsigned i;
157 unsigned size = sizeof(short)*ISP_NWAY;
158 VMEM_ARRAY(v, 2*ISP_NWAY);
159
160 hive_uedge *data = (hive_uedge *)v;
161 for (i = 0; i < ISP_NWAY; i++) {
162 hive_sim_wide_pack(data, (hive_wide)&from[i], ISP_VEC_ELEMBITS, i);
163 }
164 assert(ISP_BAMEM_BASE[ID] != (hrt_address)-1);
165#if !defined(HRT_MEMORY_ACCESS)
166 ia_css_device_store(ISP_BAMEM_BASE[ID] + (unsigned long)to, &v, size);
167#else
168
169 hrt_master_port_store(ISP_BAMEM_BASE[ID] + (unsigned long)to, &v, size);
170#endif
171 hrt_sleep();
172}
173
174void isp_vmem_load(
175 const isp_ID_t ID,
176 const t_vmem_elem *from,
177 t_vmem_elem *to,
178 unsigned elems)
179{
180 unsigned c;
181 const t_vmem_elem *vp = from;
182 assert(ID < N_ISP_ID);
183 assert((unsigned long)from % ISP_VEC_ALIGN == 0);
184 assert(elems % ISP_NWAY == 0);
185 for (c = 0; c < elems; c += ISP_NWAY) {
186 load_vector(ID, &to[c], vp);
187 vp = (t_vmem_elem *)((char*)vp + ISP_VEC_ALIGN);
188 }
189}
190
191void isp_vmem_store(
192 const isp_ID_t ID,
193 t_vmem_elem *to,
194 const t_vmem_elem *from,
195 unsigned elems)
196{
197 unsigned c;
198 t_vmem_elem *vp = to;
199 assert(ID < N_ISP_ID);
200 assert((unsigned long)to % ISP_VEC_ALIGN == 0);
201 assert(elems % ISP_NWAY == 0);
202 for (c = 0; c < elems; c += ISP_NWAY) {
203 store_vector (ID, vp, &from[c]);
204 vp = (t_vmem_elem *)((char*)vp + ISP_VEC_ALIGN);
205 }
206}
207
208void isp_vmem_2d_load (
209 const isp_ID_t ID,
210 const t_vmem_elem *from,
211 t_vmem_elem *to,
212 unsigned height,
213 unsigned width,
214 unsigned stride_to,
215 unsigned stride_from )
216{
217 unsigned h;
218
219 assert(ID < N_ISP_ID);
220 assert((unsigned long)from % ISP_VEC_ALIGN == 0);
221 assert(width % ISP_NWAY == 0);
222 assert(stride_from % ISP_NWAY == 0);
223 for (h = 0; h < height; h++) {
224 unsigned c;
225 const t_vmem_elem *vp = from;
226 for (c = 0; c < width; c += ISP_NWAY) {
227 load_vector(ID, &to[stride_to*h + c], vp);
228 vp = (t_vmem_elem *)((char*)vp + ISP_VEC_ALIGN);
229 }
230 from = (const t_vmem_elem *)((const char *)from + stride_from/ISP_NWAY*ISP_VEC_ALIGN);
231 }
232}
233
234void isp_vmem_2d_store (
235 const isp_ID_t ID,
236 t_vmem_elem *to,
237 const t_vmem_elem *from,
238 unsigned height,
239 unsigned width,
240 unsigned stride_to,
241 unsigned stride_from )
242{
243 unsigned h;
244
245 assert(ID < N_ISP_ID);
246 assert((unsigned long)to % ISP_VEC_ALIGN == 0);
247 assert(width % ISP_NWAY == 0);
248 assert(stride_to % ISP_NWAY == 0);
249 for (h = 0; h < height; h++) {
250 unsigned c;
251 t_vmem_elem *vp = to;
252 for (c = 0; c < width; c += ISP_NWAY) {
253 store_vector (ID, vp, &from[stride_from*h + c]);
254 vp = (t_vmem_elem *)((char*)vp + ISP_VEC_ALIGN);
255 }
256 to = (t_vmem_elem *)((char *)to + stride_to/ISP_NWAY*ISP_VEC_ALIGN);
257 }
258}
259