linux/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
<<
>>
Prefs
   1
   2/*
   3 * Copyright 2017 Advanced Micro Devices, Inc.
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice shall be included in
  13 * all copies or substantial portions of the Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21 * OTHER DEALINGS IN THE SOFTWARE.
  22 *
  23 * Authors: AMD
  24 *
  25 */
  26#include <drm/drm_dsc.h>
  27
  28#include "os_types.h"
  29#include "rc_calc.h"
  30#include "qp_tables.h"
  31
  32#define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
  33
  34#define MODE_SELECT(val444, val422, val420) \
  35        (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
  36
  37
  38#define TABLE_CASE(mode, bpc, max)   case (table_hash(mode, BPC_##bpc, max)): \
  39        table = qp_table_##mode##_##bpc##bpc_##max; \
  40        table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
  41        break
  42
  43
  44static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
  45                       enum max_min max_min, float bpp)
  46{
  47        int mode = MODE_SELECT(444, 422, 420);
  48        int sel = table_hash(mode, bpc, max_min);
  49        int table_size = 0;
  50        int index;
  51        const struct qp_entry *table = 0L;
  52
  53        // alias enum
  54        enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
  55        switch (sel) {
  56                TABLE_CASE(444,  8, max);
  57                TABLE_CASE(444,  8, min);
  58                TABLE_CASE(444, 10, max);
  59                TABLE_CASE(444, 10, min);
  60                TABLE_CASE(444, 12, max);
  61                TABLE_CASE(444, 12, min);
  62                TABLE_CASE(422,  8, max);
  63                TABLE_CASE(422,  8, min);
  64                TABLE_CASE(422, 10, max);
  65                TABLE_CASE(422, 10, min);
  66                TABLE_CASE(422, 12, max);
  67                TABLE_CASE(422, 12, min);
  68                TABLE_CASE(420,  8, max);
  69                TABLE_CASE(420,  8, min);
  70                TABLE_CASE(420, 10, max);
  71                TABLE_CASE(420, 10, min);
  72                TABLE_CASE(420, 12, max);
  73                TABLE_CASE(420, 12, min);
  74        }
  75
  76        if (table == 0)
  77                return;
  78
  79        index = (bpp - table[0].bpp) * 2;
  80
  81        /* requested size is bigger than the table */
  82        if (index >= table_size) {
  83                dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
  84                return;
  85        }
  86
  87        memcpy(qps, table[index].qps, sizeof(qp_set));
  88}
  89
  90static double dsc_roundf(double num)
  91{
  92        if (num < 0.0)
  93                num = num - 0.5;
  94        else
  95                num = num + 0.5;
  96
  97        return (int)(num);
  98}
  99
 100static double dsc_ceil(double num)
 101{
 102        double retval = (int)num;
 103
 104        if (retval != num && num > 0)
 105                retval = num + 1;
 106
 107        return (int)retval;
 108}
 109
 110static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
 111{
 112        int   *p = ofs;
 113
 114        if (mode == CM_444 || mode == CM_RGB) {
 115                *p++ = (bpp <=  6) ? (0) : ((((bpp >=  8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
 116                *p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
 117                *p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
 118                *p++ = (bpp <=  6) ? (-4) : ((((bpp >=  8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
 119                *p++ = (bpp <=  6) ? (-6) : ((((bpp >=  8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
 120                *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
 121                *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
 122                *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
 123                *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
 124                *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
 125                *p++ = -10;
 126                *p++ = (bpp <=  6) ? (-12) : ((bpp >=  8) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2 / 2.0))));
 127                *p++ = -12;
 128                *p++ = -12;
 129                *p++ = -12;
 130        } else if (mode == CM_422) {
 131                *p++ = (bpp <=  8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp -  8) * (8 / 2.0))));
 132                *p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp -  8) * (8 / 2.0))));
 133                *p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp -  8) * (6 / 2.0))));
 134                *p++ = (bpp <=  8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp -  8) * (6 / 2.0))));
 135                *p++ = (bpp <=  8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp -  8) * (6 / 2.0))));
 136                *p++ = (bpp <=  8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp -  8) * (6 / 2.0))));
 137                *p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp -  8) * (6 / 2.0))));
 138                *p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp -  8) * (4 / 2.0))));
 139                *p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp -  8) * (2 / 2.0))));
 140                *p++ = (bpp <=  8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp -  8) * (2 / 2.0))));
 141                *p++ = -10;
 142                *p++ = (bpp <=  6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2.0 / 1))));
 143                *p++ = -12;
 144                *p++ = -12;
 145                *p++ = -12;
 146        } else {
 147                *p++ = (bpp <=  6) ? (2) : ((bpp >=  8) ? (10) : (2 + dsc_roundf((bpp -  6) * (8 / 2.0))));
 148                *p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (8) : (0 + dsc_roundf((bpp -  6) * (8 / 2.0))));
 149                *p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (6) : (0 + dsc_roundf((bpp -  6) * (6 / 2.0))));
 150                *p++ = (bpp <=  6) ? (-2) : ((bpp >=  8) ? (4) : (-2 + dsc_roundf((bpp -  6) * (6 / 2.0))));
 151                *p++ = (bpp <=  6) ? (-4) : ((bpp >=  8) ? (2) : (-4 + dsc_roundf((bpp -  6) * (6 / 2.0))));
 152                *p++ = (bpp <=  6) ? (-6) : ((bpp >=  8) ? (0) : (-6 + dsc_roundf((bpp -  6) * (6 / 2.0))));
 153                *p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-2) : (-8 + dsc_roundf((bpp -  6) * (6 / 2.0))));
 154                *p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-4) : (-8 + dsc_roundf((bpp -  6) * (4 / 2.0))));
 155                *p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-6) : (-8 + dsc_roundf((bpp -  6) * (2 / 2.0))));
 156                *p++ = (bpp <=  6) ? (-10) : ((bpp >=  8) ? (-8) : (-10 + dsc_roundf((bpp -  6) * (2 / 2.0))));
 157                *p++ = -10;
 158                *p++ = (bpp <=  4) ? (-12) : ((bpp >=  5) ? (-10) : (-12 + dsc_roundf((bpp -  4) * (2 / 1.0))));
 159                *p++ = -12;
 160                *p++ = -12;
 161                *p++ = -12;
 162        }
 163}
 164
 165static int median3(int a, int b, int c)
 166{
 167        if (a > b)
 168                swap(a, b);
 169        if (b > c)
 170                swap(b, c);
 171        if (a > b)
 172                swap(b, c);
 173
 174        return b;
 175}
 176
 177static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
 178                               enum bits_per_comp bpc, u16 drm_bpp,
 179                               bool is_navite_422_or_420,
 180                               int slice_width, int slice_height,
 181                               int minor_version)
 182{
 183        float bpp;
 184        float bpp_group;
 185        float initial_xmit_delay_factor;
 186        int padding_pixels;
 187        int i;
 188
 189        bpp = ((float)drm_bpp / 16.0);
 190        /* in native_422 or native_420 modes, the bits_per_pixel is double the
 191         * target bpp (the latter is what calc_rc_params expects)
 192         */
 193        if (is_navite_422_or_420)
 194                bpp /= 2.0;
 195
 196        rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
 197        rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
 198
 199        bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
 200
 201        switch (cm) {
 202        case CM_420:
 203                rc->initial_fullness_offset = (bpp >=  6) ? (2048) : ((bpp <=  4) ? (6144) : ((((bpp >  4) && (bpp <=  5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp -  5) * (3584)))));
 204                rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
 205                rc->second_line_bpg_offset  = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
 206                break;
 207        case CM_422:
 208                rc->initial_fullness_offset = (bpp >=  8) ? (2048) : ((bpp <=  7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
 209                rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
 210                rc->second_line_bpg_offset  = 0;
 211                break;
 212        case CM_444:
 213        case CM_RGB:
 214                rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <=  8) ? (6144) : ((((bpp >  8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
 215                rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
 216                rc->second_line_bpg_offset  = 0;
 217                break;
 218        }
 219
 220        initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
 221        rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
 222
 223        if (cm == CM_422 || cm == CM_420)
 224                slice_width /= 2;
 225
 226        padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
 227        if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
 228                if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
 229                        rc->initial_xmit_delay++;
 230        }
 231
 232        rc->flatness_min_qp     = ((bpc == BPC_8) ?  (3) : ((bpc == BPC_10) ? (7)  : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
 233        rc->flatness_max_qp     = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
 234        rc->flatness_det_thresh = 2 << (bpc - 8);
 235
 236        get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp);
 237        get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp);
 238        if (cm == CM_444 && minor_version == 1) {
 239                for (i = 0; i < QP_SET_SIZE; ++i) {
 240                        rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
 241                        rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
 242                }
 243        }
 244        get_ofs_set(rc->ofs, cm, bpp);
 245
 246        /* fixed parameters */
 247        rc->rc_model_size    = 8192;
 248        rc->rc_edge_factor   = 6;
 249        rc->rc_tgt_offset_hi = 3;
 250        rc->rc_tgt_offset_lo = 3;
 251
 252        rc->rc_buf_thresh[0] = 896;
 253        rc->rc_buf_thresh[1] = 1792;
 254        rc->rc_buf_thresh[2] = 2688;
 255        rc->rc_buf_thresh[3] = 3584;
 256        rc->rc_buf_thresh[4] = 4480;
 257        rc->rc_buf_thresh[5] = 5376;
 258        rc->rc_buf_thresh[6] = 6272;
 259        rc->rc_buf_thresh[7] = 6720;
 260        rc->rc_buf_thresh[8] = 7168;
 261        rc->rc_buf_thresh[9] = 7616;
 262        rc->rc_buf_thresh[10] = 7744;
 263        rc->rc_buf_thresh[11] = 7872;
 264        rc->rc_buf_thresh[12] = 8000;
 265        rc->rc_buf_thresh[13] = 8064;
 266}
 267
 268static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp,
 269                                    bool is_navite_422_or_420)
 270{
 271        float bpp;
 272        u32 bytes_per_pixel;
 273        double d_bytes_per_pixel;
 274
 275        bpp = ((float)drm_bpp / 16.0);
 276        d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
 277        // TODO: Make sure the formula for calculating this is precise (ceiling
 278        // vs. floor, and at what point they should be applied)
 279        if (is_navite_422_or_420)
 280                d_bytes_per_pixel /= 2;
 281
 282        bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
 283
 284        return bytes_per_pixel;
 285}
 286
 287/**
 288 * calc_rc_params - reads the user's cmdline mode
 289 * @rc: DC internal DSC parameters
 290 * @pps: DRM struct with all required DSC values
 291 *
 292 * This function expects a drm_dsc_config data struct with all the required DSC
 293 * values previously filled out by our driver and based on this information it
 294 * computes some of the DSC values.
 295 *
 296 * @note This calculation requires float point operation, most of it executes
 297 * under kernel_fpu_{begin,end}.
 298 */
 299void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps)
 300{
 301        enum colour_mode mode;
 302        enum bits_per_comp bpc;
 303        bool is_navite_422_or_420;
 304        u16 drm_bpp = pps->bits_per_pixel;
 305        int slice_width  = pps->slice_width;
 306        int slice_height = pps->slice_height;
 307
 308        mode = pps->convert_rgb ? CM_RGB : (pps->simple_422  ? CM_444 :
 309                                           (pps->native_422  ? CM_422 :
 310                                            pps->native_420  ? CM_420 : CM_444));
 311        bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10)
 312                                             ? BPC_10 : BPC_12;
 313
 314        is_navite_422_or_420 = pps->native_422 || pps->native_420;
 315
 316        DC_FP_START();
 317        _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420,
 318                           slice_width, slice_height,
 319                           pps->dsc_version_minor);
 320        DC_FP_END();
 321}
 322
 323/**
 324 * calc_dsc_bytes_per_pixel - calculate bytes per pixel
 325 * @pps: DRM struct with all required DSC values
 326 *
 327 * Based on the information inside drm_dsc_config, this function calculates the
 328 * total of bytes per pixel.
 329 *
 330 * @note This calculation requires float point operation, most of it executes
 331 * under kernel_fpu_{begin,end}.
 332 *
 333 * Return:
 334 * Return the number of bytes per pixel
 335 */
 336u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps)
 337
 338{
 339        u32 ret;
 340        u16 drm_bpp = pps->bits_per_pixel;
 341        int slice_width  = pps->slice_width;
 342        bool is_navite_422_or_420 = pps->native_422 || pps->native_420;
 343
 344        DC_FP_START();
 345        ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp,
 346                                       is_navite_422_or_420);
 347        DC_FP_END();
 348        return ret;
 349}
 350