linux/drivers/gpu/drm/vc4/vc4_validate.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2014 Broadcom
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 */
  23
  24/**
  25 * DOC: Command list validator for VC4.
  26 *
  27 * Since the VC4 has no IOMMU between it and system memory, a user
  28 * with access to execute command lists could escalate privilege by
  29 * overwriting system memory (drawing to it as a framebuffer) or
  30 * reading system memory it shouldn't (reading it as a vertex buffer
  31 * or index buffer)
  32 *
  33 * We validate binner command lists to ensure that all accesses are
  34 * within the bounds of the GEM objects referenced by the submitted
  35 * job.  It explicitly whitelists packets, and looks at the offsets in
  36 * any address fields to make sure they're contained within the BOs
  37 * they reference.
  38 *
  39 * Note that because CL validation is already reading the
  40 * user-submitted CL and writing the validated copy out to the memory
  41 * that the GPU will actually read, this is also where GEM relocation
  42 * processing (turning BO references into actual addresses for the GPU
  43 * to use) happens.
  44 */
  45
  46#include "uapi/drm/vc4_drm.h"
  47#include "vc4_drv.h"
  48#include "vc4_packet.h"
  49
  50#define VALIDATE_ARGS \
  51        struct vc4_exec_info *exec,                     \
  52        void *validated,                                \
  53        void *untrusted
  54
  55/** Return the width in pixels of a 64-byte microtile. */
  56static uint32_t
  57utile_width(int cpp)
  58{
  59        switch (cpp) {
  60        case 1:
  61        case 2:
  62                return 8;
  63        case 4:
  64                return 4;
  65        case 8:
  66                return 2;
  67        default:
  68                DRM_ERROR("unknown cpp: %d\n", cpp);
  69                return 1;
  70        }
  71}
  72
  73/** Return the height in pixels of a 64-byte microtile. */
  74static uint32_t
  75utile_height(int cpp)
  76{
  77        switch (cpp) {
  78        case 1:
  79                return 8;
  80        case 2:
  81        case 4:
  82        case 8:
  83                return 4;
  84        default:
  85                DRM_ERROR("unknown cpp: %d\n", cpp);
  86                return 1;
  87        }
  88}
  89
  90/**
  91 * size_is_lt() - Returns whether a miplevel of the given size will
  92 * use the lineartile (LT) tiling layout rather than the normal T
  93 * tiling layout.
  94 * @width: Width in pixels of the miplevel
  95 * @height: Height in pixels of the miplevel
  96 * @cpp: Bytes per pixel of the pixel format
  97 */
  98static bool
  99size_is_lt(uint32_t width, uint32_t height, int cpp)
 100{
 101        return (width <= 4 * utile_width(cpp) ||
 102                height <= 4 * utile_height(cpp));
 103}
 104
 105struct drm_gem_cma_object *
 106vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
 107{
 108        struct vc4_dev *vc4 = exec->dev;
 109        struct drm_gem_cma_object *obj;
 110        struct vc4_bo *bo;
 111
 112        if (WARN_ON_ONCE(vc4->is_vc5))
 113                return NULL;
 114
 115        if (hindex >= exec->bo_count) {
 116                DRM_DEBUG("BO index %d greater than BO count %d\n",
 117                          hindex, exec->bo_count);
 118                return NULL;
 119        }
 120        obj = exec->bo[hindex];
 121        bo = to_vc4_bo(&obj->base);
 122
 123        if (bo->validated_shader) {
 124                DRM_DEBUG("Trying to use shader BO as something other than "
 125                          "a shader\n");
 126                return NULL;
 127        }
 128
 129        return obj;
 130}
 131
 132static struct drm_gem_cma_object *
 133vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
 134{
 135        return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
 136}
 137
 138static bool
 139validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
 140{
 141        /* Note that the untrusted pointer passed to these functions is
 142         * incremented past the packet byte.
 143         */
 144        return (untrusted - 1 == exec->bin_u + pos);
 145}
 146
 147static uint32_t
 148gl_shader_rec_size(uint32_t pointer_bits)
 149{
 150        uint32_t attribute_count = pointer_bits & 7;
 151        bool extended = pointer_bits & 8;
 152
 153        if (attribute_count == 0)
 154                attribute_count = 8;
 155
 156        if (extended)
 157                return 100 + attribute_count * 4;
 158        else
 159                return 36 + attribute_count * 8;
 160}
 161
 162bool
 163vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 164                   uint32_t offset, uint8_t tiling_format,
 165                   uint32_t width, uint32_t height, uint8_t cpp)
 166{
 167        struct vc4_dev *vc4 = exec->dev;
 168        uint32_t aligned_width, aligned_height, stride, size;
 169        uint32_t utile_w = utile_width(cpp);
 170        uint32_t utile_h = utile_height(cpp);
 171
 172        if (WARN_ON_ONCE(vc4->is_vc5))
 173                return false;
 174
 175        /* The shaded vertex format stores signed 12.4 fixed point
 176         * (-2048,2047) offsets from the viewport center, so we should
 177         * never have a render target larger than 4096.  The texture
 178         * unit can only sample from 2048x2048, so it's even more
 179         * restricted.  This lets us avoid worrying about overflow in
 180         * our math.
 181         */
 182        if (width > 4096 || height > 4096) {
 183                DRM_DEBUG("Surface dimensions (%d,%d) too large",
 184                          width, height);
 185                return false;
 186        }
 187
 188        switch (tiling_format) {
 189        case VC4_TILING_FORMAT_LINEAR:
 190                aligned_width = round_up(width, utile_w);
 191                aligned_height = height;
 192                break;
 193        case VC4_TILING_FORMAT_T:
 194                aligned_width = round_up(width, utile_w * 8);
 195                aligned_height = round_up(height, utile_h * 8);
 196                break;
 197        case VC4_TILING_FORMAT_LT:
 198                aligned_width = round_up(width, utile_w);
 199                aligned_height = round_up(height, utile_h);
 200                break;
 201        default:
 202                DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
 203                return false;
 204        }
 205
 206        stride = aligned_width * cpp;
 207        size = stride * aligned_height;
 208
 209        if (size + offset < size ||
 210            size + offset > fbo->base.size) {
 211                DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
 212                          width, height,
 213                          aligned_width, aligned_height,
 214                          size, offset, fbo->base.size);
 215                return false;
 216        }
 217
 218        return true;
 219}
 220
 221static int
 222validate_flush(VALIDATE_ARGS)
 223{
 224        if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
 225                DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
 226                return -EINVAL;
 227        }
 228        exec->found_flush = true;
 229
 230        return 0;
 231}
 232
 233static int
 234validate_start_tile_binning(VALIDATE_ARGS)
 235{
 236        if (exec->found_start_tile_binning_packet) {
 237                DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
 238                return -EINVAL;
 239        }
 240        exec->found_start_tile_binning_packet = true;
 241
 242        if (!exec->found_tile_binning_mode_config_packet) {
 243                DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 244                return -EINVAL;
 245        }
 246
 247        return 0;
 248}
 249
 250static int
 251validate_increment_semaphore(VALIDATE_ARGS)
 252{
 253        if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
 254                DRM_DEBUG("Bin CL must end with "
 255                          "VC4_PACKET_INCREMENT_SEMAPHORE\n");
 256                return -EINVAL;
 257        }
 258        exec->found_increment_semaphore_packet = true;
 259
 260        return 0;
 261}
 262
 263static int
 264validate_indexed_prim_list(VALIDATE_ARGS)
 265{
 266        struct drm_gem_cma_object *ib;
 267        uint32_t length = *(uint32_t *)(untrusted + 1);
 268        uint32_t offset = *(uint32_t *)(untrusted + 5);
 269        uint32_t max_index = *(uint32_t *)(untrusted + 9);
 270        uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
 271        struct vc4_shader_state *shader_state;
 272
 273        /* Check overflow condition */
 274        if (exec->shader_state_count == 0) {
 275                DRM_DEBUG("shader state must precede primitives\n");
 276                return -EINVAL;
 277        }
 278        shader_state = &exec->shader_state[exec->shader_state_count - 1];
 279
 280        if (max_index > shader_state->max_index)
 281                shader_state->max_index = max_index;
 282
 283        ib = vc4_use_handle(exec, 0);
 284        if (!ib)
 285                return -EINVAL;
 286
 287        exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 288                                  to_vc4_bo(&ib->base)->write_seqno);
 289
 290        if (offset > ib->base.size ||
 291            (ib->base.size - offset) / index_size < length) {
 292                DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
 293                          offset, length, index_size, ib->base.size);
 294                return -EINVAL;
 295        }
 296
 297        *(uint32_t *)(validated + 5) = ib->paddr + offset;
 298
 299        return 0;
 300}
 301
 302static int
 303validate_gl_array_primitive(VALIDATE_ARGS)
 304{
 305        uint32_t length = *(uint32_t *)(untrusted + 1);
 306        uint32_t base_index = *(uint32_t *)(untrusted + 5);
 307        uint32_t max_index;
 308        struct vc4_shader_state *shader_state;
 309
 310        /* Check overflow condition */
 311        if (exec->shader_state_count == 0) {
 312                DRM_DEBUG("shader state must precede primitives\n");
 313                return -EINVAL;
 314        }
 315        shader_state = &exec->shader_state[exec->shader_state_count - 1];
 316
 317        if (length + base_index < length) {
 318                DRM_DEBUG("primitive vertex count overflow\n");
 319                return -EINVAL;
 320        }
 321        max_index = length + base_index - 1;
 322
 323        if (max_index > shader_state->max_index)
 324                shader_state->max_index = max_index;
 325
 326        return 0;
 327}
 328
 329static int
 330validate_gl_shader_state(VALIDATE_ARGS)
 331{
 332        uint32_t i = exec->shader_state_count++;
 333
 334        if (i >= exec->shader_state_size) {
 335                DRM_DEBUG("More requests for shader states than declared\n");
 336                return -EINVAL;
 337        }
 338
 339        exec->shader_state[i].addr = *(uint32_t *)untrusted;
 340        exec->shader_state[i].max_index = 0;
 341
 342        if (exec->shader_state[i].addr & ~0xf) {
 343                DRM_DEBUG("high bits set in GL shader rec reference\n");
 344                return -EINVAL;
 345        }
 346
 347        *(uint32_t *)validated = (exec->shader_rec_p +
 348                                  exec->shader_state[i].addr);
 349
 350        exec->shader_rec_p +=
 351                roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
 352
 353        return 0;
 354}
 355
 356static int
 357validate_tile_binning_config(VALIDATE_ARGS)
 358{
 359        struct drm_device *dev = exec->exec_bo->base.dev;
 360        struct vc4_dev *vc4 = to_vc4_dev(dev);
 361        uint8_t flags;
 362        uint32_t tile_state_size;
 363        uint32_t tile_count, bin_addr;
 364        int bin_slot;
 365
 366        if (exec->found_tile_binning_mode_config_packet) {
 367                DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 368                return -EINVAL;
 369        }
 370        exec->found_tile_binning_mode_config_packet = true;
 371
 372        exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
 373        exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
 374        tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
 375        flags = *(uint8_t *)(untrusted + 14);
 376
 377        if (exec->bin_tiles_x == 0 ||
 378            exec->bin_tiles_y == 0) {
 379                DRM_DEBUG("Tile binning config of %dx%d too small\n",
 380                          exec->bin_tiles_x, exec->bin_tiles_y);
 381                return -EINVAL;
 382        }
 383
 384        if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
 385                     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
 386                DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
 387                return -EINVAL;
 388        }
 389
 390        bin_slot = vc4_v3d_get_bin_slot(vc4);
 391        if (bin_slot < 0) {
 392                if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
 393                        DRM_ERROR("Failed to allocate binner memory: %d\n",
 394                                  bin_slot);
 395                }
 396                return bin_slot;
 397        }
 398
 399        /* The slot we allocated will only be used by this job, and is
 400         * free when the job completes rendering.
 401         */
 402        exec->bin_slots |= BIT(bin_slot);
 403        bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size;
 404
 405        /* The tile state data array is 48 bytes per tile, and we put it at
 406         * the start of a BO containing both it and the tile alloc.
 407         */
 408        tile_state_size = 48 * tile_count;
 409
 410        /* Since the tile alloc array will follow us, align. */
 411        exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
 412
 413        *(uint8_t *)(validated + 14) =
 414                ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
 415                            VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
 416                 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
 417                 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
 418                               VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
 419                 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
 420                               VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
 421
 422        /* tile alloc address. */
 423        *(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
 424        /* tile alloc size. */
 425        *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
 426                                        exec->tile_alloc_offset);
 427        /* tile state address. */
 428        *(uint32_t *)(validated + 8) = bin_addr;
 429
 430        return 0;
 431}
 432
 433static int
 434validate_gem_handles(VALIDATE_ARGS)
 435{
 436        memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
 437        return 0;
 438}
 439
 440#define VC4_DEFINE_PACKET(packet, func) \
 441        [packet] = { packet ## _SIZE, #packet, func }
 442
 443static const struct cmd_info {
 444        uint16_t len;
 445        const char *name;
 446        int (*func)(struct vc4_exec_info *exec, void *validated,
 447                    void *untrusted);
 448} cmd_info[] = {
 449        VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
 450        VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
 451        VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
 452        VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
 453        VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
 454                          validate_start_tile_binning),
 455        VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
 456                          validate_increment_semaphore),
 457
 458        VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
 459                          validate_indexed_prim_list),
 460        VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
 461                          validate_gl_array_primitive),
 462
 463        VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
 464
 465        VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
 466
 467        VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
 468        VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
 469        VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
 470        VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
 471        VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
 472        VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
 473        VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
 474        VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
 475        VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
 476        /* Note: The docs say this was also 105, but it was 106 in the
 477         * initial userland code drop.
 478         */
 479        VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
 480
 481        VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
 482                          validate_tile_binning_config),
 483
 484        VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
 485};
 486
 487int
 488vc4_validate_bin_cl(struct drm_device *dev,
 489                    void *validated,
 490                    void *unvalidated,
 491                    struct vc4_exec_info *exec)
 492{
 493        struct vc4_dev *vc4 = to_vc4_dev(dev);
 494        uint32_t len = exec->args->bin_cl_size;
 495        uint32_t dst_offset = 0;
 496        uint32_t src_offset = 0;
 497
 498        if (WARN_ON_ONCE(vc4->is_vc5))
 499                return -ENODEV;
 500
 501        while (src_offset < len) {
 502                void *dst_pkt = validated + dst_offset;
 503                void *src_pkt = unvalidated + src_offset;
 504                u8 cmd = *(uint8_t *)src_pkt;
 505                const struct cmd_info *info;
 506
 507                if (cmd >= ARRAY_SIZE(cmd_info)) {
 508                        DRM_DEBUG("0x%08x: packet %d out of bounds\n",
 509                                  src_offset, cmd);
 510                        return -EINVAL;
 511                }
 512
 513                info = &cmd_info[cmd];
 514                if (!info->name) {
 515                        DRM_DEBUG("0x%08x: packet %d invalid\n",
 516                                  src_offset, cmd);
 517                        return -EINVAL;
 518                }
 519
 520                if (src_offset + info->len > len) {
 521                        DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
 522                                  "exceeds bounds (0x%08x)\n",
 523                                  src_offset, cmd, info->name, info->len,
 524                                  src_offset + len);
 525                        return -EINVAL;
 526                }
 527
 528                if (cmd != VC4_PACKET_GEM_HANDLES)
 529                        memcpy(dst_pkt, src_pkt, info->len);
 530
 531                if (info->func && info->func(exec,
 532                                             dst_pkt + 1,
 533                                             src_pkt + 1)) {
 534                        DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
 535                                  src_offset, cmd, info->name);
 536                        return -EINVAL;
 537                }
 538
 539                src_offset += info->len;
 540                /* GEM handle loading doesn't produce HW packets. */
 541                if (cmd != VC4_PACKET_GEM_HANDLES)
 542                        dst_offset += info->len;
 543
 544                /* When the CL hits halt, it'll stop reading anything else. */
 545                if (cmd == VC4_PACKET_HALT)
 546                        break;
 547        }
 548
 549        exec->ct0ea = exec->ct0ca + dst_offset;
 550
 551        if (!exec->found_start_tile_binning_packet) {
 552                DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
 553                return -EINVAL;
 554        }
 555
 556        /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
 557         * semaphore is used to trigger the render CL to start up, and the
 558         * FLUSH is what caps the bin lists with
 559         * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
 560         * render CL when they get called to) and actually triggers the queued
 561         * semaphore increment.
 562         */
 563        if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
 564                DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
 565                          "VC4_PACKET_FLUSH\n");
 566                return -EINVAL;
 567        }
 568
 569        return 0;
 570}
 571
 572static bool
 573reloc_tex(struct vc4_exec_info *exec,
 574          void *uniform_data_u,
 575          struct vc4_texture_sample_info *sample,
 576          uint32_t texture_handle_index, bool is_cs)
 577{
 578        struct drm_gem_cma_object *tex;
 579        uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
 580        uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
 581        uint32_t p2 = (sample->p_offset[2] != ~0 ?
 582                       *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
 583        uint32_t p3 = (sample->p_offset[3] != ~0 ?
 584                       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
 585        uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
 586        uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
 587        uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
 588        uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
 589        uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
 590        uint32_t cpp, tiling_format, utile_w, utile_h;
 591        uint32_t i;
 592        uint32_t cube_map_stride = 0;
 593        enum vc4_texture_data_type type;
 594
 595        tex = vc4_use_bo(exec, texture_handle_index);
 596        if (!tex)
 597                return false;
 598
 599        if (sample->is_direct) {
 600                uint32_t remaining_size = tex->base.size - p0;
 601
 602                if (p0 > tex->base.size - 4) {
 603                        DRM_DEBUG("UBO offset greater than UBO size\n");
 604                        goto fail;
 605                }
 606                if (p1 > remaining_size - 4) {
 607                        DRM_DEBUG("UBO clamp would allow reads "
 608                                  "outside of UBO\n");
 609                        goto fail;
 610                }
 611                *validated_p0 = tex->paddr + p0;
 612                return true;
 613        }
 614
 615        if (width == 0)
 616                width = 2048;
 617        if (height == 0)
 618                height = 2048;
 619
 620        if (p0 & VC4_TEX_P0_CMMODE_MASK) {
 621                if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
 622                    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
 623                        cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
 624                if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
 625                    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
 626                        if (cube_map_stride) {
 627                                DRM_DEBUG("Cube map stride set twice\n");
 628                                goto fail;
 629                        }
 630
 631                        cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
 632                }
 633                if (!cube_map_stride) {
 634                        DRM_DEBUG("Cube map stride not set\n");
 635                        goto fail;
 636                }
 637        }
 638
 639        type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
 640                (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
 641
 642        switch (type) {
 643        case VC4_TEXTURE_TYPE_RGBA8888:
 644        case VC4_TEXTURE_TYPE_RGBX8888:
 645        case VC4_TEXTURE_TYPE_RGBA32R:
 646                cpp = 4;
 647                break;
 648        case VC4_TEXTURE_TYPE_RGBA4444:
 649        case VC4_TEXTURE_TYPE_RGBA5551:
 650        case VC4_TEXTURE_TYPE_RGB565:
 651        case VC4_TEXTURE_TYPE_LUMALPHA:
 652        case VC4_TEXTURE_TYPE_S16F:
 653        case VC4_TEXTURE_TYPE_S16:
 654                cpp = 2;
 655                break;
 656        case VC4_TEXTURE_TYPE_LUMINANCE:
 657        case VC4_TEXTURE_TYPE_ALPHA:
 658        case VC4_TEXTURE_TYPE_S8:
 659                cpp = 1;
 660                break;
 661        case VC4_TEXTURE_TYPE_ETC1:
 662                /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
 663                 * pixels.
 664                 */
 665                cpp = 8;
 666                width = (width + 3) >> 2;
 667                height = (height + 3) >> 2;
 668                break;
 669        case VC4_TEXTURE_TYPE_BW1:
 670        case VC4_TEXTURE_TYPE_A4:
 671        case VC4_TEXTURE_TYPE_A1:
 672        case VC4_TEXTURE_TYPE_RGBA64:
 673        case VC4_TEXTURE_TYPE_YUV422R:
 674        default:
 675                DRM_DEBUG("Texture format %d unsupported\n", type);
 676                goto fail;
 677        }
 678        utile_w = utile_width(cpp);
 679        utile_h = utile_height(cpp);
 680
 681        if (type == VC4_TEXTURE_TYPE_RGBA32R) {
 682                tiling_format = VC4_TILING_FORMAT_LINEAR;
 683        } else {
 684                if (size_is_lt(width, height, cpp))
 685                        tiling_format = VC4_TILING_FORMAT_LT;
 686                else
 687                        tiling_format = VC4_TILING_FORMAT_T;
 688        }
 689
 690        if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
 691                                tiling_format, width, height, cpp)) {
 692                goto fail;
 693        }
 694
 695        /* The mipmap levels are stored before the base of the texture.  Make
 696         * sure there is actually space in the BO.
 697         */
 698        for (i = 1; i <= miplevels; i++) {
 699                uint32_t level_width = max(width >> i, 1u);
 700                uint32_t level_height = max(height >> i, 1u);
 701                uint32_t aligned_width, aligned_height;
 702                uint32_t level_size;
 703
 704                /* Once the levels get small enough, they drop from T to LT. */
 705                if (tiling_format == VC4_TILING_FORMAT_T &&
 706                    size_is_lt(level_width, level_height, cpp)) {
 707                        tiling_format = VC4_TILING_FORMAT_LT;
 708                }
 709
 710                switch (tiling_format) {
 711                case VC4_TILING_FORMAT_T:
 712                        aligned_width = round_up(level_width, utile_w * 8);
 713                        aligned_height = round_up(level_height, utile_h * 8);
 714                        break;
 715                case VC4_TILING_FORMAT_LT:
 716                        aligned_width = round_up(level_width, utile_w);
 717                        aligned_height = round_up(level_height, utile_h);
 718                        break;
 719                default:
 720                        aligned_width = round_up(level_width, utile_w);
 721                        aligned_height = level_height;
 722                        break;
 723                }
 724
 725                level_size = aligned_width * cpp * aligned_height;
 726
 727                if (offset < level_size) {
 728                        DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
 729                                  "overflowed buffer bounds (offset %d)\n",
 730                                  i, level_width, level_height,
 731                                  aligned_width, aligned_height,
 732                                  level_size, offset);
 733                        goto fail;
 734                }
 735
 736                offset -= level_size;
 737        }
 738
 739        *validated_p0 = tex->paddr + p0;
 740
 741        if (is_cs) {
 742                exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 743                                          to_vc4_bo(&tex->base)->write_seqno);
 744        }
 745
 746        return true;
 747 fail:
 748        DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
 749        DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
 750        DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
 751        DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
 752        return false;
 753}
 754
 755static int
 756validate_gl_shader_rec(struct drm_device *dev,
 757                       struct vc4_exec_info *exec,
 758                       struct vc4_shader_state *state)
 759{
 760        uint32_t *src_handles;
 761        void *pkt_u, *pkt_v;
 762        static const uint32_t shader_reloc_offsets[] = {
 763                4, /* fs */
 764                16, /* vs */
 765                28, /* cs */
 766        };
 767        uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
 768        struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
 769        uint32_t nr_attributes, nr_relocs, packet_size;
 770        int i;
 771
 772        nr_attributes = state->addr & 0x7;
 773        if (nr_attributes == 0)
 774                nr_attributes = 8;
 775        packet_size = gl_shader_rec_size(state->addr);
 776
 777        nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
 778        if (nr_relocs * 4 > exec->shader_rec_size) {
 779                DRM_DEBUG("overflowed shader recs reading %d handles "
 780                          "from %d bytes left\n",
 781                          nr_relocs, exec->shader_rec_size);
 782                return -EINVAL;
 783        }
 784        src_handles = exec->shader_rec_u;
 785        exec->shader_rec_u += nr_relocs * 4;
 786        exec->shader_rec_size -= nr_relocs * 4;
 787
 788        if (packet_size > exec->shader_rec_size) {
 789                DRM_DEBUG("overflowed shader recs copying %db packet "
 790                          "from %d bytes left\n",
 791                          packet_size, exec->shader_rec_size);
 792                return -EINVAL;
 793        }
 794        pkt_u = exec->shader_rec_u;
 795        pkt_v = exec->shader_rec_v;
 796        memcpy(pkt_v, pkt_u, packet_size);
 797        exec->shader_rec_u += packet_size;
 798        /* Shader recs have to be aligned to 16 bytes (due to the attribute
 799         * flags being in the low bytes), so round the next validated shader
 800         * rec address up.  This should be safe, since we've got so many
 801         * relocations in a shader rec packet.
 802         */
 803        BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
 804        exec->shader_rec_v += roundup(packet_size, 16);
 805        exec->shader_rec_size -= packet_size;
 806
 807        for (i = 0; i < shader_reloc_count; i++) {
 808                if (src_handles[i] > exec->bo_count) {
 809                        DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
 810                        return -EINVAL;
 811                }
 812
 813                bo[i] = exec->bo[src_handles[i]];
 814                if (!bo[i])
 815                        return -EINVAL;
 816        }
 817        for (i = shader_reloc_count; i < nr_relocs; i++) {
 818                bo[i] = vc4_use_bo(exec, src_handles[i]);
 819                if (!bo[i])
 820                        return -EINVAL;
 821        }
 822
 823        if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
 824            to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
 825                DRM_DEBUG("Thread mode of CL and FS do not match\n");
 826                return -EINVAL;
 827        }
 828
 829        if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
 830            to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
 831                DRM_DEBUG("cs and vs cannot be threaded\n");
 832                return -EINVAL;
 833        }
 834
 835        for (i = 0; i < shader_reloc_count; i++) {
 836                struct vc4_validated_shader_info *validated_shader;
 837                uint32_t o = shader_reloc_offsets[i];
 838                uint32_t src_offset = *(uint32_t *)(pkt_u + o);
 839                uint32_t *texture_handles_u;
 840                void *uniform_data_u;
 841                uint32_t tex, uni;
 842
 843                *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 844
 845                if (src_offset != 0) {
 846                        DRM_DEBUG("Shaders must be at offset 0 of "
 847                                  "the BO.\n");
 848                        return -EINVAL;
 849                }
 850
 851                validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
 852                if (!validated_shader)
 853                        return -EINVAL;
 854
 855                if (validated_shader->uniforms_src_size >
 856                    exec->uniforms_size) {
 857                        DRM_DEBUG("Uniforms src buffer overflow\n");
 858                        return -EINVAL;
 859                }
 860
 861                texture_handles_u = exec->uniforms_u;
 862                uniform_data_u = (texture_handles_u +
 863                                  validated_shader->num_texture_samples);
 864
 865                memcpy(exec->uniforms_v, uniform_data_u,
 866                       validated_shader->uniforms_size);
 867
 868                for (tex = 0;
 869                     tex < validated_shader->num_texture_samples;
 870                     tex++) {
 871                        if (!reloc_tex(exec,
 872                                       uniform_data_u,
 873                                       &validated_shader->texture_samples[tex],
 874                                       texture_handles_u[tex],
 875                                       i == 2)) {
 876                                return -EINVAL;
 877                        }
 878                }
 879
 880                /* Fill in the uniform slots that need this shader's
 881                 * start-of-uniforms address (used for resetting the uniform
 882                 * stream in the presence of control flow).
 883                 */
 884                for (uni = 0;
 885                     uni < validated_shader->num_uniform_addr_offsets;
 886                     uni++) {
 887                        uint32_t o = validated_shader->uniform_addr_offsets[uni];
 888                        ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
 889                }
 890
 891                *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
 892
 893                exec->uniforms_u += validated_shader->uniforms_src_size;
 894                exec->uniforms_v += validated_shader->uniforms_size;
 895                exec->uniforms_p += validated_shader->uniforms_size;
 896        }
 897
 898        for (i = 0; i < nr_attributes; i++) {
 899                struct drm_gem_cma_object *vbo =
 900                        bo[ARRAY_SIZE(shader_reloc_offsets) + i];
 901                uint32_t o = 36 + i * 8;
 902                uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
 903                uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
 904                uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
 905                uint32_t max_index;
 906
 907                exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 908                                          to_vc4_bo(&vbo->base)->write_seqno);
 909
 910                if (state->addr & 0x8)
 911                        stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
 912
 913                if (vbo->base.size < offset ||
 914                    vbo->base.size - offset < attr_size) {
 915                        DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
 916                                  offset, attr_size, vbo->base.size);
 917                        return -EINVAL;
 918                }
 919
 920                if (stride != 0) {
 921                        max_index = ((vbo->base.size - offset - attr_size) /
 922                                     stride);
 923                        if (state->max_index > max_index) {
 924                                DRM_DEBUG("primitives use index %d out of "
 925                                          "supplied %d\n",
 926                                          state->max_index, max_index);
 927                                return -EINVAL;
 928                        }
 929                }
 930
 931                *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
 932        }
 933
 934        return 0;
 935}
 936
 937int
 938vc4_validate_shader_recs(struct drm_device *dev,
 939                         struct vc4_exec_info *exec)
 940{
 941        struct vc4_dev *vc4 = to_vc4_dev(dev);
 942        uint32_t i;
 943        int ret = 0;
 944
 945        if (WARN_ON_ONCE(vc4->is_vc5))
 946                return -ENODEV;
 947
 948        for (i = 0; i < exec->shader_state_count; i++) {
 949                ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
 950                if (ret)
 951                        return ret;
 952        }
 953
 954        return ret;
 955}
 956