linux/drivers/staging/media/hantro/hantro_h264.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Rockchip RK3288 VPU codec driver
   4 *
   5 * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
   6 *      Hertz Wong <hertz.wong@rock-chips.com>
   7 *      Herman Chen <herman.chen@rock-chips.com>
   8 *
   9 * Copyright (C) 2014 Google, Inc.
  10 *      Tomasz Figa <tfiga@chromium.org>
  11 */
  12
  13#include <linux/types.h>
  14#include <linux/sort.h>
  15#include <media/v4l2-mem2mem.h>
  16
  17#include "hantro.h"
  18#include "hantro_hw.h"
  19
  20/* Size with u32 units. */
  21#define CABAC_INIT_BUFFER_SIZE          (460 * 2)
  22#define POC_BUFFER_SIZE                 34
  23#define SCALING_LIST_SIZE               (6 * 16 + 6 * 64)
  24
  25#define POC_CMP(p0, p1) ((p0) < (p1) ? -1 : 1)
  26
  27/* Data structure describing auxiliary buffer format. */
  28struct hantro_h264_dec_priv_tbl {
  29        u32 cabac_table[CABAC_INIT_BUFFER_SIZE];
  30        u32 poc[POC_BUFFER_SIZE];
  31        u8 scaling_list[SCALING_LIST_SIZE];
  32};
  33
  34/*
  35 * Constant CABAC table.
  36 * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c
  37 * in https://chromium.googlesource.com/chromiumos/third_party/kernel,
  38 * chromeos-3.14 branch.
  39 */
  40static const u32 h264_cabac_table[] = {
  41        0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000,
  42        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  43        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  44        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  45        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  46        0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137,
  47        0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72,
  48        0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a,
  49        0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d,
  50        0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e,
  51        0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13,
  52        0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357,
  53        0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47,
  54        0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09,
  55        0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e,
  56        0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37,
  57        0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4,
  58        0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8,
  59        0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47,
  60        0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27,
  61        0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41,
  62        0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360,
  63        0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261,
  64        0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a,
  65        0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424,
  66        0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955,
  67        0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f,
  68        0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37,
  69        0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17,
  70        0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32,
  71        0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0,
  72        0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00,
  73        0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d,
  74        0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259,
  75        0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1,
  76        0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37,
  77        0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256,
  78        0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03,
  79        0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968,
  80        0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541,
  81        0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68,
  82        0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637,
  83        0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a,
  84        0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948,
  85        0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053,
  86        0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39,
  87        0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45,
  88        0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f,
  89        0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24,
  90        0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038,
  91        0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f,
  92        0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e,
  93        0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e,
  94        0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24,
  95        0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000,
  96        0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b,
  97        0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940,
  98        0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852,
  99        0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a,
 100        0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f,
 101        0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228,
 102        0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e,
 103        0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943,
 104        0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e,
 105        0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f,
 106        0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28,
 107        0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe,
 108        0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6,
 109        0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00,
 110        0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f,
 111        0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728,
 112        0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947,
 113        0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41,
 114        0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923,
 115        0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951,
 116        0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3,
 117        0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1,
 118        0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429,
 119        0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902,
 120        0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b,
 121        0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51,
 122        0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f,
 123        0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60,
 124        0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e,
 125        0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737,
 126        0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e,
 127        0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848,
 128        0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d,
 129        0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546,
 130        0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e,
 131        0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f,
 132        0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb,
 133        0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7,
 134        0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12,
 135        0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d,
 136        0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42,
 137        0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b,
 138        0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a,
 139        0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704,
 140        0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e,
 141        0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f,
 142        0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045,
 143        0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42,
 144        0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25,
 145        0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa,
 146        0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef,
 147        0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a,
 148        0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4,
 149        0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15,
 150        0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920,
 151        0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743,
 152        0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a,
 153        0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952,
 154        0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d,
 155        0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39,
 156        0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10,
 157        0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39,
 158        0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539,
 159        0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f,
 160        0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c,
 161        0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758,
 162        0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a,
 163        0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b,
 164        0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52,
 165        0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a,
 166        0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934,
 167        0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b,
 168        0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51,
 169        0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a,
 170        0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a,
 171        0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d,
 172        0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311,
 173        0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24,
 174        0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873,
 175        0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443,
 176        0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946,
 177        0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753,
 178        0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657,
 179        0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178,
 180        0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d,
 181        0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240,
 182        0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46,
 183        0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d,
 184        0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8,
 185        0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f,
 186        0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f,
 187        0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a,
 188        0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b,
 189        0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447,
 190        0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc,
 191        0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b,
 192        0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46,
 193        0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107,
 194        0x1f0c2517, 0x1f261440
 195};
 196
 197/*
 198 * NOTE: The scaling lists are in zig-zag order, apply inverse scanning process
 199 * to get the values in matrix order. In addition, the hardware requires bytes
 200 * swapped within each subsequent 4 bytes. Both arrays below include both
 201 * transformations.
 202 */
 203static const u32 zig_zag_4x4[] = {
 204        3, 2, 7, 11, 6, 1, 0, 5, 10, 15, 14, 9, 4, 8, 13, 12
 205};
 206
 207static const u32 zig_zag_8x8[] = {
 208        3, 2, 11, 19, 10, 1, 0, 9, 18, 27, 35, 26, 17, 8, 7, 6,
 209        15, 16, 25, 34, 43, 51, 42, 33, 24, 23, 14, 5, 4, 13, 22, 31,
 210        32, 41, 50, 59, 58, 49, 40, 39, 30, 21, 12, 20, 29, 38, 47, 48,
 211        57, 56, 55, 46, 37, 28, 36, 45, 54, 63, 62, 53, 44, 52, 61, 60
 212};
 213
 214static void
 215reorder_scaling_list(struct hantro_ctx *ctx)
 216{
 217        const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
 218        const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling;
 219        const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4);
 220        const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]);
 221        const size_t num_list_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8);
 222        const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]);
 223        struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
 224        u8 *dst = tbl->scaling_list;
 225        const u8 *src;
 226        int i, j;
 227
 228        BUILD_BUG_ON(ARRAY_SIZE(zig_zag_4x4) != list_len_4x4);
 229        BUILD_BUG_ON(ARRAY_SIZE(zig_zag_8x8) != list_len_8x8);
 230        BUILD_BUG_ON(ARRAY_SIZE(tbl->scaling_list) !=
 231                     num_list_4x4 * list_len_4x4 +
 232                     num_list_8x8 * list_len_8x8);
 233
 234        src = &scaling->scaling_list_4x4[0][0];
 235        for (i = 0; i < num_list_4x4; ++i) {
 236                for (j = 0; j < list_len_4x4; ++j)
 237                        dst[zig_zag_4x4[j]] = src[j];
 238                src += list_len_4x4;
 239                dst += list_len_4x4;
 240        }
 241
 242        src = &scaling->scaling_list_8x8[0][0];
 243        for (i = 0; i < num_list_8x8; ++i) {
 244                for (j = 0; j < list_len_8x8; ++j)
 245                        dst[zig_zag_8x8[j]] = src[j];
 246                src += list_len_8x8;
 247                dst += list_len_8x8;
 248        }
 249}
 250
 251static void prepare_table(struct hantro_ctx *ctx)
 252{
 253        const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
 254        const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
 255        struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
 256        const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
 257        int i;
 258
 259        for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
 260                tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
 261                tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
 262        }
 263
 264        tbl->poc[32] = dec_param->top_field_order_cnt;
 265        tbl->poc[33] = dec_param->bottom_field_order_cnt;
 266
 267        reorder_scaling_list(ctx);
 268}
 269
 270struct hantro_h264_reflist_builder {
 271        const struct v4l2_h264_dpb_entry *dpb;
 272        s32 pocs[HANTRO_H264_DPB_SIZE];
 273        u8 unordered_reflist[HANTRO_H264_DPB_SIZE];
 274        s32 curpoc;
 275        u8 num_valid;
 276};
 277
 278static s32 get_poc(enum v4l2_field field, s32 top_field_order_cnt,
 279                   s32 bottom_field_order_cnt)
 280{
 281        switch (field) {
 282        case V4L2_FIELD_TOP:
 283                return top_field_order_cnt;
 284        case V4L2_FIELD_BOTTOM:
 285                return bottom_field_order_cnt;
 286        default:
 287                break;
 288        }
 289
 290        return min(top_field_order_cnt, bottom_field_order_cnt);
 291}
 292
 293static void
 294init_reflist_builder(struct hantro_ctx *ctx,
 295                     struct hantro_h264_reflist_builder *b)
 296{
 297        const struct v4l2_ctrl_h264_decode_params *dec_param;
 298        struct vb2_v4l2_buffer *buf = hantro_get_dst_buf(ctx);
 299        const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
 300        struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
 301        unsigned int i;
 302
 303        dec_param = ctx->h264_dec.ctrls.decode;
 304
 305        memset(b, 0, sizeof(*b));
 306        b->dpb = dpb;
 307        b->curpoc = get_poc(buf->field, dec_param->top_field_order_cnt,
 308                            dec_param->bottom_field_order_cnt);
 309
 310        for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++) {
 311                int buf_idx;
 312
 313                if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
 314                        continue;
 315
 316                buf_idx = vb2_find_timestamp(cap_q, dpb[i].reference_ts, 0);
 317                if (buf_idx < 0)
 318                        continue;
 319
 320                buf = to_vb2_v4l2_buffer(vb2_get_buffer(cap_q, buf_idx));
 321                b->pocs[i] = get_poc(buf->field, dpb[i].top_field_order_cnt,
 322                                     dpb[i].bottom_field_order_cnt);
 323                b->unordered_reflist[b->num_valid] = i;
 324                b->num_valid++;
 325        }
 326
 327        for (i = b->num_valid; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
 328                b->unordered_reflist[i] = i;
 329}
 330
 331static int p_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
 332{
 333        const struct hantro_h264_reflist_builder *builder = data;
 334        const struct v4l2_h264_dpb_entry *a, *b;
 335        u8 idxa, idxb;
 336
 337        idxa = *((u8 *)ptra);
 338        idxb = *((u8 *)ptrb);
 339        a = &builder->dpb[idxa];
 340        b = &builder->dpb[idxb];
 341
 342        if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
 343            (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
 344                /* Short term pics firt. */
 345                if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
 346                        return -1;
 347                else
 348                        return 1;
 349        }
 350
 351        /*
 352         * Short term pics in descending pic num order, long term ones in
 353         * ascending order.
 354         */
 355        if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
 356                return b->frame_num - a->frame_num;
 357
 358        return a->pic_num - b->pic_num;
 359}
 360
 361static int b0_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
 362{
 363        const struct hantro_h264_reflist_builder *builder = data;
 364        const struct v4l2_h264_dpb_entry *a, *b;
 365        s32 poca, pocb;
 366        u8 idxa, idxb;
 367
 368        idxa = *((u8 *)ptra);
 369        idxb = *((u8 *)ptrb);
 370        a = &builder->dpb[idxa];
 371        b = &builder->dpb[idxb];
 372
 373        if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
 374            (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
 375                /* Short term pics firt. */
 376                if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
 377                        return -1;
 378                else
 379                        return 1;
 380        }
 381
 382        /* Long term pics in ascending pic num order. */
 383        if (a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
 384                return a->pic_num - b->pic_num;
 385
 386        poca = builder->pocs[idxa];
 387        pocb = builder->pocs[idxb];
 388
 389        /*
 390         * Short term pics with POC < cur POC first in POC descending order
 391         * followed by short term pics with POC > cur POC in POC ascending
 392         * order.
 393         */
 394        if ((poca < builder->curpoc) != (pocb < builder->curpoc))
 395                return POC_CMP(poca, pocb);
 396        else if (poca < builder->curpoc)
 397                return POC_CMP(pocb, poca);
 398
 399        return POC_CMP(poca, pocb);
 400}
 401
 402static int b1_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
 403{
 404        const struct hantro_h264_reflist_builder *builder = data;
 405        const struct v4l2_h264_dpb_entry *a, *b;
 406        s32 poca, pocb;
 407        u8 idxa, idxb;
 408
 409        idxa = *((u8 *)ptra);
 410        idxb = *((u8 *)ptrb);
 411        a = &builder->dpb[idxa];
 412        b = &builder->dpb[idxb];
 413
 414        if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
 415            (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
 416                /* Short term pics firt. */
 417                if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
 418                        return -1;
 419                else
 420                        return 1;
 421        }
 422
 423        /* Long term pics in ascending pic num order. */
 424        if (a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
 425                return a->pic_num - b->pic_num;
 426
 427        poca = builder->pocs[idxa];
 428        pocb = builder->pocs[idxb];
 429
 430        /*
 431         * Short term pics with POC > cur POC first in POC ascending order
 432         * followed by short term pics with POC > cur POC in POC descending
 433         * order.
 434         */
 435        if ((poca < builder->curpoc) != (pocb < builder->curpoc))
 436                return POC_CMP(pocb, poca);
 437        else if (poca < builder->curpoc)
 438                return POC_CMP(pocb, poca);
 439
 440        return POC_CMP(poca, pocb);
 441}
 442
 443static void
 444build_p_ref_list(const struct hantro_h264_reflist_builder *builder,
 445                 u8 *reflist)
 446{
 447        memcpy(reflist, builder->unordered_reflist,
 448               sizeof(builder->unordered_reflist));
 449        sort_r(reflist, builder->num_valid, sizeof(*reflist),
 450               p_ref_list_cmp, NULL, builder);
 451}
 452
 453static void
 454build_b_ref_lists(const struct hantro_h264_reflist_builder *builder,
 455                  u8 *b0_reflist, u8 *b1_reflist)
 456{
 457        memcpy(b0_reflist, builder->unordered_reflist,
 458               sizeof(builder->unordered_reflist));
 459        sort_r(b0_reflist, builder->num_valid, sizeof(*b0_reflist),
 460               b0_ref_list_cmp, NULL, builder);
 461
 462        memcpy(b1_reflist, builder->unordered_reflist,
 463               sizeof(builder->unordered_reflist));
 464        sort_r(b1_reflist, builder->num_valid, sizeof(*b1_reflist),
 465               b1_ref_list_cmp, NULL, builder);
 466
 467        if (builder->num_valid > 1 &&
 468            !memcmp(b1_reflist, b0_reflist, builder->num_valid))
 469                swap(b1_reflist[0], b1_reflist[1]);
 470}
 471
 472static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
 473                            const struct v4l2_h264_dpb_entry *b)
 474{
 475        return a->top_field_order_cnt == b->top_field_order_cnt &&
 476               a->bottom_field_order_cnt == b->bottom_field_order_cnt;
 477}
 478
 479static void update_dpb(struct hantro_ctx *ctx)
 480{
 481        const struct v4l2_ctrl_h264_decode_params *dec_param;
 482        DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
 483        DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
 484        unsigned int i, j;
 485
 486        dec_param = ctx->h264_dec.ctrls.decode;
 487
 488        /* Disable all entries by default. */
 489        for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
 490                ctx->h264_dec.dpb[i].flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
 491
 492        /* Try to match new DPB entries with existing ones by their POCs. */
 493        for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
 494                const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
 495
 496                if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
 497                        continue;
 498
 499                /*
 500                 * To cut off some comparisons, iterate only on target DPB
 501                 * entries which are not used yet.
 502                 */
 503                for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) {
 504                        struct v4l2_h264_dpb_entry *cdpb;
 505
 506                        cdpb = &ctx->h264_dec.dpb[j];
 507                        if (cdpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE ||
 508                            !dpb_entry_match(cdpb, ndpb))
 509                                continue;
 510
 511                        *cdpb = *ndpb;
 512                        set_bit(j, used);
 513                        break;
 514                }
 515
 516                if (j == ARRAY_SIZE(ctx->h264_dec.dpb))
 517                        set_bit(i, new);
 518        }
 519
 520        /* For entries that could not be matched, use remaining free slots. */
 521        for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
 522                const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
 523                struct v4l2_h264_dpb_entry *cdpb;
 524
 525                /*
 526                 * Both arrays are of the same sizes, so there is no way
 527                 * we can end up with no space in target array, unless
 528                 * something is buggy.
 529                 */
 530                j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb));
 531                if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb)))
 532                        return;
 533
 534                cdpb = &ctx->h264_dec.dpb[j];
 535                *cdpb = *ndpb;
 536                set_bit(j, used);
 537        }
 538}
 539
 540struct vb2_buffer *hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
 541                                           unsigned int dpb_idx)
 542{
 543        struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
 544        struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
 545        struct vb2_buffer *buf;
 546        int buf_idx = -1;
 547
 548        if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
 549                buf_idx = vb2_find_timestamp(cap_q,
 550                                             dpb[dpb_idx].reference_ts, 0);
 551
 552        if (buf_idx >= 0) {
 553                buf = vb2_get_buffer(cap_q, buf_idx);
 554        } else {
 555                struct vb2_v4l2_buffer *dst_buf;
 556
 557                /*
 558                 * If a DPB entry is unused or invalid, address of current
 559                 * destination buffer is returned.
 560                 */
 561                dst_buf = hantro_get_dst_buf(ctx);
 562                buf = &dst_buf->vb2_buf;
 563        }
 564
 565        return buf;
 566}
 567
 568int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
 569{
 570        struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
 571        struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls;
 572        struct hantro_h264_reflist_builder reflist_builder;
 573
 574        hantro_prepare_run(ctx);
 575
 576        ctrls->scaling =
 577                hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX);
 578        if (WARN_ON(!ctrls->scaling))
 579                return -EINVAL;
 580
 581        ctrls->decode =
 582                hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
 583        if (WARN_ON(!ctrls->decode))
 584                return -EINVAL;
 585
 586        ctrls->slices =
 587                hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS);
 588        if (WARN_ON(!ctrls->slices))
 589                return -EINVAL;
 590
 591        ctrls->sps =
 592                hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SPS);
 593        if (WARN_ON(!ctrls->sps))
 594                return -EINVAL;
 595
 596        ctrls->pps =
 597                hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_PPS);
 598        if (WARN_ON(!ctrls->pps))
 599                return -EINVAL;
 600
 601        /* Update the DPB with new refs. */
 602        update_dpb(ctx);
 603
 604        /* Prepare data in memory. */
 605        prepare_table(ctx);
 606
 607        /* Build the P/B{0,1} ref lists. */
 608        init_reflist_builder(ctx, &reflist_builder);
 609        build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
 610        build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
 611                          h264_ctx->reflists.b1);
 612        return 0;
 613}
 614
 615void hantro_h264_dec_exit(struct hantro_ctx *ctx)
 616{
 617        struct hantro_dev *vpu = ctx->dev;
 618        struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
 619        struct hantro_aux_buf *priv = &h264_dec->priv;
 620
 621        dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma);
 622}
 623
 624int hantro_h264_dec_init(struct hantro_ctx *ctx)
 625{
 626        struct hantro_dev *vpu = ctx->dev;
 627        struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
 628        struct hantro_aux_buf *priv = &h264_dec->priv;
 629        struct hantro_h264_dec_priv_tbl *tbl;
 630        struct v4l2_pix_format_mplane pix_mp;
 631
 632        priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma,
 633                                       GFP_KERNEL);
 634        if (!priv->cpu)
 635                return -ENOMEM;
 636
 637        priv->size = sizeof(*tbl);
 638        tbl = priv->cpu;
 639        memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table));
 640
 641        v4l2_fill_pixfmt_mp(&pix_mp, ctx->dst_fmt.pixelformat,
 642                            ctx->dst_fmt.width, ctx->dst_fmt.height);
 643        h264_dec->pic_size = pix_mp.plane_fmt[0].sizeimage;
 644
 645        return 0;
 646}
 647