linux/drivers/media/platform/ti-vpe/vpe.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * TI VPE mem2mem driver, based on the virtual v4l2-mem2mem example driver
   4 *
   5 * Copyright (c) 2013 Texas Instruments Inc.
   6 * David Griego, <dagriego@biglakesoftware.com>
   7 * Dale Farnsworth, <dale@farnsworth.org>
   8 * Archit Taneja, <archit@ti.com>
   9 *
  10 * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd.
  11 * Pawel Osciak, <pawel@osciak.com>
  12 * Marek Szyprowski, <m.szyprowski@samsung.com>
  13 *
  14 * Based on the virtual v4l2-mem2mem example device
  15 */
  16
  17#include <linux/delay.h>
  18#include <linux/dma-mapping.h>
  19#include <linux/err.h>
  20#include <linux/fs.h>
  21#include <linux/interrupt.h>
  22#include <linux/io.h>
  23#include <linux/ioctl.h>
  24#include <linux/module.h>
  25#include <linux/of.h>
  26#include <linux/platform_device.h>
  27#include <linux/pm_runtime.h>
  28#include <linux/sched.h>
  29#include <linux/slab.h>
  30#include <linux/videodev2.h>
  31#include <linux/log2.h>
  32#include <linux/sizes.h>
  33
  34#include <media/v4l2-common.h>
  35#include <media/v4l2-ctrls.h>
  36#include <media/v4l2-device.h>
  37#include <media/v4l2-event.h>
  38#include <media/v4l2-ioctl.h>
  39#include <media/v4l2-mem2mem.h>
  40#include <media/videobuf2-v4l2.h>
  41#include <media/videobuf2-dma-contig.h>
  42
  43#include "vpdma.h"
  44#include "vpdma_priv.h"
  45#include "vpe_regs.h"
  46#include "sc.h"
  47#include "csc.h"
  48
  49#define VPE_MODULE_NAME "vpe"
  50
  51/* minimum and maximum frame sizes */
  52#define MIN_W           32
  53#define MIN_H           32
  54#define MAX_W           2048
  55#define MAX_H           1184
  56
  57/* required alignments */
  58#define S_ALIGN         0       /* multiple of 1 */
  59#define H_ALIGN         1       /* multiple of 2 */
  60
  61/* flags that indicate a format can be used for capture/output */
  62#define VPE_FMT_TYPE_CAPTURE    (1 << 0)
  63#define VPE_FMT_TYPE_OUTPUT     (1 << 1)
  64
  65/* used as plane indices */
  66#define VPE_MAX_PLANES  2
  67#define VPE_LUMA        0
  68#define VPE_CHROMA      1
  69
  70/* per m2m context info */
  71#define VPE_MAX_SRC_BUFS        3       /* need 3 src fields to de-interlace */
  72
  73#define VPE_DEF_BUFS_PER_JOB    1       /* default one buffer per batch job */
  74
  75/*
  76 * each VPE context can need up to 3 config descriptors, 7 input descriptors,
  77 * 3 output descriptors, and 10 control descriptors
  78 */
  79#define VPE_DESC_LIST_SIZE      (10 * VPDMA_DTD_DESC_SIZE +     \
  80                                        13 * VPDMA_CFD_CTD_DESC_SIZE)
  81
  82#define vpe_dbg(vpedev, fmt, arg...)    \
  83                dev_dbg((vpedev)->v4l2_dev.dev, fmt, ##arg)
  84#define vpe_err(vpedev, fmt, arg...)    \
  85                dev_err((vpedev)->v4l2_dev.dev, fmt, ##arg)
  86
  87struct vpe_us_coeffs {
  88        unsigned short  anchor_fid0_c0;
  89        unsigned short  anchor_fid0_c1;
  90        unsigned short  anchor_fid0_c2;
  91        unsigned short  anchor_fid0_c3;
  92        unsigned short  interp_fid0_c0;
  93        unsigned short  interp_fid0_c1;
  94        unsigned short  interp_fid0_c2;
  95        unsigned short  interp_fid0_c3;
  96        unsigned short  anchor_fid1_c0;
  97        unsigned short  anchor_fid1_c1;
  98        unsigned short  anchor_fid1_c2;
  99        unsigned short  anchor_fid1_c3;
 100        unsigned short  interp_fid1_c0;
 101        unsigned short  interp_fid1_c1;
 102        unsigned short  interp_fid1_c2;
 103        unsigned short  interp_fid1_c3;
 104};
 105
 106/*
 107 * Default upsampler coefficients
 108 */
 109static const struct vpe_us_coeffs us_coeffs[] = {
 110        {
 111                /* Coefficients for progressive input */
 112                0x00C8, 0x0348, 0x0018, 0x3FD8, 0x3FB8, 0x0378, 0x00E8, 0x3FE8,
 113                0x00C8, 0x0348, 0x0018, 0x3FD8, 0x3FB8, 0x0378, 0x00E8, 0x3FE8,
 114        },
 115        {
 116                /* Coefficients for Top Field Interlaced input */
 117                0x0051, 0x03D5, 0x3FE3, 0x3FF7, 0x3FB5, 0x02E9, 0x018F, 0x3FD3,
 118                /* Coefficients for Bottom Field Interlaced input */
 119                0x016B, 0x0247, 0x00B1, 0x3F9D, 0x3FCF, 0x03DB, 0x005D, 0x3FF9,
 120        },
 121};
 122
 123/*
 124 * the following registers are for configuring some of the parameters of the
 125 * motion and edge detection blocks inside DEI, these generally remain the same,
 126 * these could be passed later via userspace if some one needs to tweak these.
 127 */
 128struct vpe_dei_regs {
 129        unsigned long mdt_spacial_freq_thr_reg;         /* VPE_DEI_REG2 */
 130        unsigned long edi_config_reg;                   /* VPE_DEI_REG3 */
 131        unsigned long edi_lut_reg0;                     /* VPE_DEI_REG4 */
 132        unsigned long edi_lut_reg1;                     /* VPE_DEI_REG5 */
 133        unsigned long edi_lut_reg2;                     /* VPE_DEI_REG6 */
 134        unsigned long edi_lut_reg3;                     /* VPE_DEI_REG7 */
 135};
 136
 137/*
 138 * default expert DEI register values, unlikely to be modified.
 139 */
 140static const struct vpe_dei_regs dei_regs = {
 141        .mdt_spacial_freq_thr_reg = 0x020C0804u,
 142        .edi_config_reg = 0x0118100Cu,
 143        .edi_lut_reg0 = 0x08040200u,
 144        .edi_lut_reg1 = 0x1010100Cu,
 145        .edi_lut_reg2 = 0x10101010u,
 146        .edi_lut_reg3 = 0x10101010u,
 147};
 148
 149/*
 150 * The port_data structure contains per-port data.
 151 */
 152struct vpe_port_data {
 153        enum vpdma_channel channel;     /* VPDMA channel */
 154        u8      vb_index;               /* input frame f, f-1, f-2 index */
 155        u8      vb_part;                /* plane index for co-panar formats */
 156};
 157
 158/*
 159 * Define indices into the port_data tables
 160 */
 161#define VPE_PORT_LUMA1_IN       0
 162#define VPE_PORT_CHROMA1_IN     1
 163#define VPE_PORT_LUMA2_IN       2
 164#define VPE_PORT_CHROMA2_IN     3
 165#define VPE_PORT_LUMA3_IN       4
 166#define VPE_PORT_CHROMA3_IN     5
 167#define VPE_PORT_MV_IN          6
 168#define VPE_PORT_MV_OUT         7
 169#define VPE_PORT_LUMA_OUT       8
 170#define VPE_PORT_CHROMA_OUT     9
 171#define VPE_PORT_RGB_OUT        10
 172
 173static const struct vpe_port_data port_data[11] = {
 174        [VPE_PORT_LUMA1_IN] = {
 175                .channel        = VPE_CHAN_LUMA1_IN,
 176                .vb_index       = 0,
 177                .vb_part        = VPE_LUMA,
 178        },
 179        [VPE_PORT_CHROMA1_IN] = {
 180                .channel        = VPE_CHAN_CHROMA1_IN,
 181                .vb_index       = 0,
 182                .vb_part        = VPE_CHROMA,
 183        },
 184        [VPE_PORT_LUMA2_IN] = {
 185                .channel        = VPE_CHAN_LUMA2_IN,
 186                .vb_index       = 1,
 187                .vb_part        = VPE_LUMA,
 188        },
 189        [VPE_PORT_CHROMA2_IN] = {
 190                .channel        = VPE_CHAN_CHROMA2_IN,
 191                .vb_index       = 1,
 192                .vb_part        = VPE_CHROMA,
 193        },
 194        [VPE_PORT_LUMA3_IN] = {
 195                .channel        = VPE_CHAN_LUMA3_IN,
 196                .vb_index       = 2,
 197                .vb_part        = VPE_LUMA,
 198        },
 199        [VPE_PORT_CHROMA3_IN] = {
 200                .channel        = VPE_CHAN_CHROMA3_IN,
 201                .vb_index       = 2,
 202                .vb_part        = VPE_CHROMA,
 203        },
 204        [VPE_PORT_MV_IN] = {
 205                .channel        = VPE_CHAN_MV_IN,
 206        },
 207        [VPE_PORT_MV_OUT] = {
 208                .channel        = VPE_CHAN_MV_OUT,
 209        },
 210        [VPE_PORT_LUMA_OUT] = {
 211                .channel        = VPE_CHAN_LUMA_OUT,
 212                .vb_part        = VPE_LUMA,
 213        },
 214        [VPE_PORT_CHROMA_OUT] = {
 215                .channel        = VPE_CHAN_CHROMA_OUT,
 216                .vb_part        = VPE_CHROMA,
 217        },
 218        [VPE_PORT_RGB_OUT] = {
 219                .channel        = VPE_CHAN_RGB_OUT,
 220                .vb_part        = VPE_LUMA,
 221        },
 222};
 223
 224
 225/* driver info for each of the supported video formats */
 226struct vpe_fmt {
 227        u32     fourcc;                 /* standard format identifier */
 228        u8      types;                  /* CAPTURE and/or OUTPUT */
 229        u8      coplanar;               /* set for unpacked Luma and Chroma */
 230        /* vpdma format info for each plane */
 231        struct vpdma_data_format const *vpdma_fmt[VPE_MAX_PLANES];
 232};
 233
 234static struct vpe_fmt vpe_formats[] = {
 235        {
 236                .fourcc         = V4L2_PIX_FMT_NV16,
 237                .types          = VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 238                .coplanar       = 1,
 239                .vpdma_fmt      = { &vpdma_yuv_fmts[VPDMA_DATA_FMT_Y444],
 240                                    &vpdma_yuv_fmts[VPDMA_DATA_FMT_C444],
 241                                  },
 242        },
 243        {
 244                .fourcc         = V4L2_PIX_FMT_NV12,
 245                .types          = VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 246                .coplanar       = 1,
 247                .vpdma_fmt      = { &vpdma_yuv_fmts[VPDMA_DATA_FMT_Y420],
 248                                    &vpdma_yuv_fmts[VPDMA_DATA_FMT_C420],
 249                                  },
 250        },
 251        {
 252                .fourcc         = V4L2_PIX_FMT_YUYV,
 253                .types          = VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 254                .coplanar       = 0,
 255                .vpdma_fmt      = { &vpdma_yuv_fmts[VPDMA_DATA_FMT_YCB422],
 256                                  },
 257        },
 258        {
 259                .fourcc         = V4L2_PIX_FMT_UYVY,
 260                .types          = VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 261                .coplanar       = 0,
 262                .vpdma_fmt      = { &vpdma_yuv_fmts[VPDMA_DATA_FMT_CBY422],
 263                                  },
 264        },
 265        {
 266                .fourcc         = V4L2_PIX_FMT_RGB24,
 267                .types          = VPE_FMT_TYPE_CAPTURE,
 268                .coplanar       = 0,
 269                .vpdma_fmt      = { &vpdma_rgb_fmts[VPDMA_DATA_FMT_RGB24],
 270                                  },
 271        },
 272        {
 273                .fourcc         = V4L2_PIX_FMT_RGB32,
 274                .types          = VPE_FMT_TYPE_CAPTURE,
 275                .coplanar       = 0,
 276                .vpdma_fmt      = { &vpdma_rgb_fmts[VPDMA_DATA_FMT_ARGB32],
 277                                  },
 278        },
 279        {
 280                .fourcc         = V4L2_PIX_FMT_BGR24,
 281                .types          = VPE_FMT_TYPE_CAPTURE,
 282                .coplanar       = 0,
 283                .vpdma_fmt      = { &vpdma_rgb_fmts[VPDMA_DATA_FMT_BGR24],
 284                                  },
 285        },
 286        {
 287                .fourcc         = V4L2_PIX_FMT_BGR32,
 288                .types          = VPE_FMT_TYPE_CAPTURE,
 289                .coplanar       = 0,
 290                .vpdma_fmt      = { &vpdma_rgb_fmts[VPDMA_DATA_FMT_ABGR32],
 291                                  },
 292        },
 293        {
 294                .fourcc         = V4L2_PIX_FMT_RGB565,
 295                .types          = VPE_FMT_TYPE_CAPTURE,
 296                .coplanar       = 0,
 297                .vpdma_fmt      = { &vpdma_rgb_fmts[VPDMA_DATA_FMT_RGB565],
 298                                  },
 299        },
 300        {
 301                .fourcc         = V4L2_PIX_FMT_RGB555,
 302                .types          = VPE_FMT_TYPE_CAPTURE,
 303                .coplanar       = 0,
 304                .vpdma_fmt      = { &vpdma_rgb_fmts[VPDMA_DATA_FMT_RGBA16_5551],
 305                                  },
 306        },
 307};
 308
 309/*
 310 * per-queue, driver-specific private data.
 311 * there is one source queue and one destination queue for each m2m context.
 312 */
 313struct vpe_q_data {
 314        unsigned int            width;                          /* frame width */
 315        unsigned int            height;                         /* frame height */
 316        unsigned int            nplanes;                        /* Current number of planes */
 317        unsigned int            bytesperline[VPE_MAX_PLANES];   /* bytes per line in memory */
 318        enum v4l2_colorspace    colorspace;
 319        enum v4l2_field         field;                          /* supported field value */
 320        unsigned int            flags;
 321        unsigned int            sizeimage[VPE_MAX_PLANES];      /* image size in memory */
 322        struct v4l2_rect        c_rect;                         /* crop/compose rectangle */
 323        struct vpe_fmt          *fmt;                           /* format info */
 324};
 325
 326/* vpe_q_data flag bits */
 327#define Q_DATA_FRAME_1D                 BIT(0)
 328#define Q_DATA_MODE_TILED               BIT(1)
 329#define Q_DATA_INTERLACED_ALTERNATE     BIT(2)
 330#define Q_DATA_INTERLACED_SEQ_TB        BIT(3)
 331
 332#define Q_IS_INTERLACED         (Q_DATA_INTERLACED_ALTERNATE | \
 333                                Q_DATA_INTERLACED_SEQ_TB)
 334
 335enum {
 336        Q_DATA_SRC = 0,
 337        Q_DATA_DST = 1,
 338};
 339
 340/* find our format description corresponding to the passed v4l2_format */
 341static struct vpe_fmt *find_format(struct v4l2_format *f)
 342{
 343        struct vpe_fmt *fmt;
 344        unsigned int k;
 345
 346        for (k = 0; k < ARRAY_SIZE(vpe_formats); k++) {
 347                fmt = &vpe_formats[k];
 348                if (fmt->fourcc == f->fmt.pix.pixelformat)
 349                        return fmt;
 350        }
 351
 352        return NULL;
 353}
 354
 355/*
 356 * there is one vpe_dev structure in the driver, it is shared by
 357 * all instances.
 358 */
 359struct vpe_dev {
 360        struct v4l2_device      v4l2_dev;
 361        struct video_device     vfd;
 362        struct v4l2_m2m_dev     *m2m_dev;
 363
 364        atomic_t                num_instances;  /* count of driver instances */
 365        dma_addr_t              loaded_mmrs;    /* shadow mmrs in device */
 366        struct mutex            dev_mutex;
 367        spinlock_t              lock;
 368
 369        int                     irq;
 370        void __iomem            *base;
 371        struct resource         *res;
 372
 373        struct vpdma_data       vpdma_data;
 374        struct vpdma_data       *vpdma;         /* vpdma data handle */
 375        struct sc_data          *sc;            /* scaler data handle */
 376        struct csc_data         *csc;           /* csc data handle */
 377};
 378
 379/*
 380 * There is one vpe_ctx structure for each m2m context.
 381 */
 382struct vpe_ctx {
 383        struct v4l2_fh          fh;
 384        struct vpe_dev          *dev;
 385        struct v4l2_ctrl_handler hdl;
 386
 387        unsigned int            field;                  /* current field */
 388        unsigned int            sequence;               /* current frame/field seq */
 389        unsigned int            aborting;               /* abort after next irq */
 390
 391        unsigned int            bufs_per_job;           /* input buffers per batch */
 392        unsigned int            bufs_completed;         /* bufs done in this batch */
 393
 394        struct vpe_q_data       q_data[2];              /* src & dst queue data */
 395        struct vb2_v4l2_buffer  *src_vbs[VPE_MAX_SRC_BUFS];
 396        struct vb2_v4l2_buffer  *dst_vb;
 397
 398        dma_addr_t              mv_buf_dma[2];          /* dma addrs of motion vector in/out bufs */
 399        void                    *mv_buf[2];             /* virtual addrs of motion vector bufs */
 400        size_t                  mv_buf_size;            /* current motion vector buffer size */
 401        struct vpdma_buf        mmr_adb;                /* shadow reg addr/data block */
 402        struct vpdma_buf        sc_coeff_h;             /* h coeff buffer */
 403        struct vpdma_buf        sc_coeff_v;             /* v coeff buffer */
 404        struct vpdma_desc_list  desc_list;              /* DMA descriptor list */
 405
 406        bool                    deinterlacing;          /* using de-interlacer */
 407        bool                    load_mmrs;              /* have new shadow reg values */
 408
 409        unsigned int            src_mv_buf_selector;
 410};
 411
 412
 413/*
 414 * M2M devices get 2 queues.
 415 * Return the queue given the type.
 416 */
 417static struct vpe_q_data *get_q_data(struct vpe_ctx *ctx,
 418                                     enum v4l2_buf_type type)
 419{
 420        switch (type) {
 421        case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
 422        case V4L2_BUF_TYPE_VIDEO_OUTPUT:
 423                return &ctx->q_data[Q_DATA_SRC];
 424        case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
 425        case V4L2_BUF_TYPE_VIDEO_CAPTURE:
 426                return &ctx->q_data[Q_DATA_DST];
 427        default:
 428                return NULL;
 429        }
 430        return NULL;
 431}
 432
 433static u32 read_reg(struct vpe_dev *dev, int offset)
 434{
 435        return ioread32(dev->base + offset);
 436}
 437
 438static void write_reg(struct vpe_dev *dev, int offset, u32 value)
 439{
 440        iowrite32(value, dev->base + offset);
 441}
 442
 443/* register field read/write helpers */
 444static int get_field(u32 value, u32 mask, int shift)
 445{
 446        return (value & (mask << shift)) >> shift;
 447}
 448
 449static int read_field_reg(struct vpe_dev *dev, int offset, u32 mask, int shift)
 450{
 451        return get_field(read_reg(dev, offset), mask, shift);
 452}
 453
 454static void write_field(u32 *valp, u32 field, u32 mask, int shift)
 455{
 456        u32 val = *valp;
 457
 458        val &= ~(mask << shift);
 459        val |= (field & mask) << shift;
 460        *valp = val;
 461}
 462
 463static void write_field_reg(struct vpe_dev *dev, int offset, u32 field,
 464                u32 mask, int shift)
 465{
 466        u32 val = read_reg(dev, offset);
 467
 468        write_field(&val, field, mask, shift);
 469
 470        write_reg(dev, offset, val);
 471}
 472
 473/*
 474 * DMA address/data block for the shadow registers
 475 */
 476struct vpe_mmr_adb {
 477        struct vpdma_adb_hdr    out_fmt_hdr;
 478        u32                     out_fmt_reg[1];
 479        u32                     out_fmt_pad[3];
 480        struct vpdma_adb_hdr    us1_hdr;
 481        u32                     us1_regs[8];
 482        struct vpdma_adb_hdr    us2_hdr;
 483        u32                     us2_regs[8];
 484        struct vpdma_adb_hdr    us3_hdr;
 485        u32                     us3_regs[8];
 486        struct vpdma_adb_hdr    dei_hdr;
 487        u32                     dei_regs[8];
 488        struct vpdma_adb_hdr    sc_hdr0;
 489        u32                     sc_regs0[7];
 490        u32                     sc_pad0[1];
 491        struct vpdma_adb_hdr    sc_hdr8;
 492        u32                     sc_regs8[6];
 493        u32                     sc_pad8[2];
 494        struct vpdma_adb_hdr    sc_hdr17;
 495        u32                     sc_regs17[9];
 496        u32                     sc_pad17[3];
 497        struct vpdma_adb_hdr    csc_hdr;
 498        u32                     csc_regs[6];
 499        u32                     csc_pad[2];
 500};
 501
 502#define GET_OFFSET_TOP(ctx, obj, reg)   \
 503        ((obj)->res->start - ctx->dev->res->start + reg)
 504
 505#define VPE_SET_MMR_ADB_HDR(ctx, hdr, regs, offset_a)   \
 506        VPDMA_SET_MMR_ADB_HDR(ctx->mmr_adb, vpe_mmr_adb, hdr, regs, offset_a)
 507/*
 508 * Set the headers for all of the address/data block structures.
 509 */
 510static void init_adb_hdrs(struct vpe_ctx *ctx)
 511{
 512        VPE_SET_MMR_ADB_HDR(ctx, out_fmt_hdr, out_fmt_reg, VPE_CLK_FORMAT_SELECT);
 513        VPE_SET_MMR_ADB_HDR(ctx, us1_hdr, us1_regs, VPE_US1_R0);
 514        VPE_SET_MMR_ADB_HDR(ctx, us2_hdr, us2_regs, VPE_US2_R0);
 515        VPE_SET_MMR_ADB_HDR(ctx, us3_hdr, us3_regs, VPE_US3_R0);
 516        VPE_SET_MMR_ADB_HDR(ctx, dei_hdr, dei_regs, VPE_DEI_FRAME_SIZE);
 517        VPE_SET_MMR_ADB_HDR(ctx, sc_hdr0, sc_regs0,
 518                GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC0));
 519        VPE_SET_MMR_ADB_HDR(ctx, sc_hdr8, sc_regs8,
 520                GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC8));
 521        VPE_SET_MMR_ADB_HDR(ctx, sc_hdr17, sc_regs17,
 522                GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC17));
 523        VPE_SET_MMR_ADB_HDR(ctx, csc_hdr, csc_regs,
 524                GET_OFFSET_TOP(ctx, ctx->dev->csc, CSC_CSC00));
 525};
 526
 527/*
 528 * Allocate or re-allocate the motion vector DMA buffers
 529 * There are two buffers, one for input and one for output.
 530 * However, the roles are reversed after each field is processed.
 531 * In other words, after each field is processed, the previous
 532 * output (dst) MV buffer becomes the new input (src) MV buffer.
 533 */
 534static int realloc_mv_buffers(struct vpe_ctx *ctx, size_t size)
 535{
 536        struct device *dev = ctx->dev->v4l2_dev.dev;
 537
 538        if (ctx->mv_buf_size == size)
 539                return 0;
 540
 541        if (ctx->mv_buf[0])
 542                dma_free_coherent(dev, ctx->mv_buf_size, ctx->mv_buf[0],
 543                        ctx->mv_buf_dma[0]);
 544
 545        if (ctx->mv_buf[1])
 546                dma_free_coherent(dev, ctx->mv_buf_size, ctx->mv_buf[1],
 547                        ctx->mv_buf_dma[1]);
 548
 549        if (size == 0)
 550                return 0;
 551
 552        ctx->mv_buf[0] = dma_alloc_coherent(dev, size, &ctx->mv_buf_dma[0],
 553                                GFP_KERNEL);
 554        if (!ctx->mv_buf[0]) {
 555                vpe_err(ctx->dev, "failed to allocate motion vector buffer\n");
 556                return -ENOMEM;
 557        }
 558
 559        ctx->mv_buf[1] = dma_alloc_coherent(dev, size, &ctx->mv_buf_dma[1],
 560                                GFP_KERNEL);
 561        if (!ctx->mv_buf[1]) {
 562                vpe_err(ctx->dev, "failed to allocate motion vector buffer\n");
 563                dma_free_coherent(dev, size, ctx->mv_buf[0],
 564                        ctx->mv_buf_dma[0]);
 565
 566                return -ENOMEM;
 567        }
 568
 569        ctx->mv_buf_size = size;
 570        ctx->src_mv_buf_selector = 0;
 571
 572        return 0;
 573}
 574
 575static void free_mv_buffers(struct vpe_ctx *ctx)
 576{
 577        realloc_mv_buffers(ctx, 0);
 578}
 579
 580/*
 581 * While de-interlacing, we keep the two most recent input buffers
 582 * around.  This function frees those two buffers when we have
 583 * finished processing the current stream.
 584 */
 585static void free_vbs(struct vpe_ctx *ctx)
 586{
 587        struct vpe_dev *dev = ctx->dev;
 588        unsigned long flags;
 589
 590        if (ctx->src_vbs[2] == NULL)
 591                return;
 592
 593        spin_lock_irqsave(&dev->lock, flags);
 594        if (ctx->src_vbs[2]) {
 595                v4l2_m2m_buf_done(ctx->src_vbs[2], VB2_BUF_STATE_DONE);
 596                if (ctx->src_vbs[1] && (ctx->src_vbs[1] != ctx->src_vbs[2]))
 597                        v4l2_m2m_buf_done(ctx->src_vbs[1], VB2_BUF_STATE_DONE);
 598                ctx->src_vbs[2] = NULL;
 599                ctx->src_vbs[1] = NULL;
 600        }
 601        spin_unlock_irqrestore(&dev->lock, flags);
 602}
 603
 604/*
 605 * Enable or disable the VPE clocks
 606 */
 607static void vpe_set_clock_enable(struct vpe_dev *dev, bool on)
 608{
 609        u32 val = 0;
 610
 611        if (on)
 612                val = VPE_DATA_PATH_CLK_ENABLE | VPE_VPEDMA_CLK_ENABLE;
 613        write_reg(dev, VPE_CLK_ENABLE, val);
 614}
 615
 616static void vpe_top_reset(struct vpe_dev *dev)
 617{
 618
 619        write_field_reg(dev, VPE_CLK_RESET, 1, VPE_DATA_PATH_CLK_RESET_MASK,
 620                VPE_DATA_PATH_CLK_RESET_SHIFT);
 621
 622        usleep_range(100, 150);
 623
 624        write_field_reg(dev, VPE_CLK_RESET, 0, VPE_DATA_PATH_CLK_RESET_MASK,
 625                VPE_DATA_PATH_CLK_RESET_SHIFT);
 626}
 627
 628static void vpe_top_vpdma_reset(struct vpe_dev *dev)
 629{
 630        write_field_reg(dev, VPE_CLK_RESET, 1, VPE_VPDMA_CLK_RESET_MASK,
 631                VPE_VPDMA_CLK_RESET_SHIFT);
 632
 633        usleep_range(100, 150);
 634
 635        write_field_reg(dev, VPE_CLK_RESET, 0, VPE_VPDMA_CLK_RESET_MASK,
 636                VPE_VPDMA_CLK_RESET_SHIFT);
 637}
 638
 639/*
 640 * Load the correct of upsampler coefficients into the shadow MMRs
 641 */
 642static void set_us_coefficients(struct vpe_ctx *ctx)
 643{
 644        struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
 645        struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
 646        u32 *us1_reg = &mmr_adb->us1_regs[0];
 647        u32 *us2_reg = &mmr_adb->us2_regs[0];
 648        u32 *us3_reg = &mmr_adb->us3_regs[0];
 649        const unsigned short *cp, *end_cp;
 650
 651        cp = &us_coeffs[0].anchor_fid0_c0;
 652
 653        if (s_q_data->flags & Q_IS_INTERLACED)          /* interlaced */
 654                cp += sizeof(us_coeffs[0]) / sizeof(*cp);
 655
 656        end_cp = cp + sizeof(us_coeffs[0]) / sizeof(*cp);
 657
 658        while (cp < end_cp) {
 659                write_field(us1_reg, *cp++, VPE_US_C0_MASK, VPE_US_C0_SHIFT);
 660                write_field(us1_reg, *cp++, VPE_US_C1_MASK, VPE_US_C1_SHIFT);
 661                *us2_reg++ = *us1_reg;
 662                *us3_reg++ = *us1_reg++;
 663        }
 664        ctx->load_mmrs = true;
 665}
 666
 667/*
 668 * Set the upsampler config mode and the VPDMA line mode in the shadow MMRs.
 669 */
 670static void set_cfg_modes(struct vpe_ctx *ctx)
 671{
 672        struct vpe_fmt *fmt = ctx->q_data[Q_DATA_SRC].fmt;
 673        struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
 674        u32 *us1_reg0 = &mmr_adb->us1_regs[0];
 675        u32 *us2_reg0 = &mmr_adb->us2_regs[0];
 676        u32 *us3_reg0 = &mmr_adb->us3_regs[0];
 677        int cfg_mode = 1;
 678
 679        /*
 680         * Cfg Mode 0: YUV420 source, enable upsampler, DEI is de-interlacing.
 681         * Cfg Mode 1: YUV422 source, disable upsampler, DEI is de-interlacing.
 682         */
 683
 684        if (fmt->fourcc == V4L2_PIX_FMT_NV12)
 685                cfg_mode = 0;
 686
 687        write_field(us1_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
 688        write_field(us2_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
 689        write_field(us3_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
 690
 691        ctx->load_mmrs = true;
 692}
 693
 694static void set_line_modes(struct vpe_ctx *ctx)
 695{
 696        struct vpe_fmt *fmt = ctx->q_data[Q_DATA_SRC].fmt;
 697        int line_mode = 1;
 698
 699        if (fmt->fourcc == V4L2_PIX_FMT_NV12)
 700                line_mode = 0;          /* double lines to line buffer */
 701
 702        /* regs for now */
 703        vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA1_IN);
 704        vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA2_IN);
 705        vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA3_IN);
 706
 707        /* frame start for input luma */
 708        vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
 709                VPE_CHAN_LUMA1_IN);
 710        vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
 711                VPE_CHAN_LUMA2_IN);
 712        vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
 713                VPE_CHAN_LUMA3_IN);
 714
 715        /* frame start for input chroma */
 716        vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
 717                VPE_CHAN_CHROMA1_IN);
 718        vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
 719                VPE_CHAN_CHROMA2_IN);
 720        vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
 721                VPE_CHAN_CHROMA3_IN);
 722
 723        /* frame start for MV in client */
 724        vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
 725                VPE_CHAN_MV_IN);
 726}
 727
 728/*
 729 * Set the shadow registers that are modified when the source
 730 * format changes.
 731 */
 732static void set_src_registers(struct vpe_ctx *ctx)
 733{
 734        set_us_coefficients(ctx);
 735}
 736
 737/*
 738 * Set the shadow registers that are modified when the destination
 739 * format changes.
 740 */
 741static void set_dst_registers(struct vpe_ctx *ctx)
 742{
 743        struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
 744        enum v4l2_colorspace clrspc = ctx->q_data[Q_DATA_DST].colorspace;
 745        struct vpe_fmt *fmt = ctx->q_data[Q_DATA_DST].fmt;
 746        u32 val = 0;
 747
 748        if (clrspc == V4L2_COLORSPACE_SRGB) {
 749                val |= VPE_RGB_OUT_SELECT;
 750                vpdma_set_bg_color(ctx->dev->vpdma,
 751                        (struct vpdma_data_format *)fmt->vpdma_fmt[0], 0xff);
 752        } else if (fmt->fourcc == V4L2_PIX_FMT_NV16)
 753                val |= VPE_COLOR_SEPARATE_422;
 754
 755        /*
 756         * the source of CHR_DS and CSC is always the scaler, irrespective of
 757         * whether it's used or not
 758         */
 759        val |= VPE_DS_SRC_DEI_SCALER | VPE_CSC_SRC_DEI_SCALER;
 760
 761        if (fmt->fourcc != V4L2_PIX_FMT_NV12)
 762                val |= VPE_DS_BYPASS;
 763
 764        mmr_adb->out_fmt_reg[0] = val;
 765
 766        ctx->load_mmrs = true;
 767}
 768
 769/*
 770 * Set the de-interlacer shadow register values
 771 */
 772static void set_dei_regs(struct vpe_ctx *ctx)
 773{
 774        struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
 775        struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
 776        unsigned int src_h = s_q_data->c_rect.height;
 777        unsigned int src_w = s_q_data->c_rect.width;
 778        u32 *dei_mmr0 = &mmr_adb->dei_regs[0];
 779        bool deinterlace = true;
 780        u32 val = 0;
 781
 782        /*
 783         * according to TRM, we should set DEI in progressive bypass mode when
 784         * the input content is progressive, however, DEI is bypassed correctly
 785         * for both progressive and interlace content in interlace bypass mode.
 786         * It has been recommended not to use progressive bypass mode.
 787         */
 788        if (!(s_q_data->flags & Q_IS_INTERLACED) || !ctx->deinterlacing) {
 789                deinterlace = false;
 790                val = VPE_DEI_INTERLACE_BYPASS;
 791        }
 792
 793        src_h = deinterlace ? src_h * 2 : src_h;
 794
 795        val |= (src_h << VPE_DEI_HEIGHT_SHIFT) |
 796                (src_w << VPE_DEI_WIDTH_SHIFT) |
 797                VPE_DEI_FIELD_FLUSH;
 798
 799        *dei_mmr0 = val;
 800
 801        ctx->load_mmrs = true;
 802}
 803
 804static void set_dei_shadow_registers(struct vpe_ctx *ctx)
 805{
 806        struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
 807        u32 *dei_mmr = &mmr_adb->dei_regs[0];
 808        const struct vpe_dei_regs *cur = &dei_regs;
 809
 810        dei_mmr[2]  = cur->mdt_spacial_freq_thr_reg;
 811        dei_mmr[3]  = cur->edi_config_reg;
 812        dei_mmr[4]  = cur->edi_lut_reg0;
 813        dei_mmr[5]  = cur->edi_lut_reg1;
 814        dei_mmr[6]  = cur->edi_lut_reg2;
 815        dei_mmr[7]  = cur->edi_lut_reg3;
 816
 817        ctx->load_mmrs = true;
 818}
 819
 820static void config_edi_input_mode(struct vpe_ctx *ctx, int mode)
 821{
 822        struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
 823        u32 *edi_config_reg = &mmr_adb->dei_regs[3];
 824
 825        if (mode & 0x2)
 826                write_field(edi_config_reg, 1, 1, 2);   /* EDI_ENABLE_3D */
 827
 828        if (mode & 0x3)
 829                write_field(edi_config_reg, 1, 1, 3);   /* EDI_CHROMA_3D  */
 830
 831        write_field(edi_config_reg, mode, VPE_EDI_INP_MODE_MASK,
 832                VPE_EDI_INP_MODE_SHIFT);
 833
 834        ctx->load_mmrs = true;
 835}
 836
 837/*
 838 * Set the shadow registers whose values are modified when either the
 839 * source or destination format is changed.
 840 */
 841static int set_srcdst_params(struct vpe_ctx *ctx)
 842{
 843        struct vpe_q_data *s_q_data =  &ctx->q_data[Q_DATA_SRC];
 844        struct vpe_q_data *d_q_data =  &ctx->q_data[Q_DATA_DST];
 845        struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
 846        unsigned int src_w = s_q_data->c_rect.width;
 847        unsigned int src_h = s_q_data->c_rect.height;
 848        unsigned int dst_w = d_q_data->c_rect.width;
 849        unsigned int dst_h = d_q_data->c_rect.height;
 850        size_t mv_buf_size;
 851        int ret;
 852
 853        ctx->sequence = 0;
 854        ctx->field = V4L2_FIELD_TOP;
 855
 856        if ((s_q_data->flags & Q_IS_INTERLACED) &&
 857                        !(d_q_data->flags & Q_IS_INTERLACED)) {
 858                int bytes_per_line;
 859                const struct vpdma_data_format *mv =
 860                        &vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
 861
 862                /*
 863                 * we make sure that the source image has a 16 byte aligned
 864                 * stride, we need to do the same for the motion vector buffer
 865                 * by aligning it's stride to the next 16 byte boundary. this
 866                 * extra space will not be used by the de-interlacer, but will
 867                 * ensure that vpdma operates correctly
 868                 */
 869                bytes_per_line = ALIGN((s_q_data->width * mv->depth) >> 3,
 870                                        VPDMA_STRIDE_ALIGN);
 871                mv_buf_size = bytes_per_line * s_q_data->height;
 872
 873                ctx->deinterlacing = true;
 874                src_h <<= 1;
 875        } else {
 876                ctx->deinterlacing = false;
 877                mv_buf_size = 0;
 878        }
 879
 880        free_vbs(ctx);
 881        ctx->src_vbs[2] = ctx->src_vbs[1] = ctx->src_vbs[0] = NULL;
 882
 883        ret = realloc_mv_buffers(ctx, mv_buf_size);
 884        if (ret)
 885                return ret;
 886
 887        set_cfg_modes(ctx);
 888        set_dei_regs(ctx);
 889
 890        csc_set_coeff(ctx->dev->csc, &mmr_adb->csc_regs[0],
 891                s_q_data->colorspace, d_q_data->colorspace);
 892
 893        sc_set_hs_coeffs(ctx->dev->sc, ctx->sc_coeff_h.addr, src_w, dst_w);
 894        sc_set_vs_coeffs(ctx->dev->sc, ctx->sc_coeff_v.addr, src_h, dst_h);
 895
 896        sc_config_scaler(ctx->dev->sc, &mmr_adb->sc_regs0[0],
 897                &mmr_adb->sc_regs8[0], &mmr_adb->sc_regs17[0],
 898                src_w, src_h, dst_w, dst_h);
 899
 900        return 0;
 901}
 902
 903/*
 904 * Return the vpe_ctx structure for a given struct file
 905 */
 906static struct vpe_ctx *file2ctx(struct file *file)
 907{
 908        return container_of(file->private_data, struct vpe_ctx, fh);
 909}
 910
 911/*
 912 * mem2mem callbacks
 913 */
 914
 915/*
 916 * job_ready() - check whether an instance is ready to be scheduled to run
 917 */
 918static int job_ready(void *priv)
 919{
 920        struct vpe_ctx *ctx = priv;
 921
 922        /*
 923         * This check is needed as this might be called directly from driver
 924         * When called by m2m framework, this will always satisfy, but when
 925         * called from vpe_irq, this might fail. (src stream with zero buffers)
 926         */
 927        if (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) <= 0 ||
 928                v4l2_m2m_num_dst_bufs_ready(ctx->fh.m2m_ctx) <= 0)
 929                return 0;
 930
 931        return 1;
 932}
 933
 934static void job_abort(void *priv)
 935{
 936        struct vpe_ctx *ctx = priv;
 937
 938        /* Will cancel the transaction in the next interrupt handler */
 939        ctx->aborting = 1;
 940}
 941
 942static void vpe_dump_regs(struct vpe_dev *dev)
 943{
 944#define DUMPREG(r) vpe_dbg(dev, "%-35s %08x\n", #r, read_reg(dev, VPE_##r))
 945
 946        vpe_dbg(dev, "VPE Registers:\n");
 947
 948        DUMPREG(PID);
 949        DUMPREG(SYSCONFIG);
 950        DUMPREG(INT0_STATUS0_RAW);
 951        DUMPREG(INT0_STATUS0);
 952        DUMPREG(INT0_ENABLE0);
 953        DUMPREG(INT0_STATUS1_RAW);
 954        DUMPREG(INT0_STATUS1);
 955        DUMPREG(INT0_ENABLE1);
 956        DUMPREG(CLK_ENABLE);
 957        DUMPREG(CLK_RESET);
 958        DUMPREG(CLK_FORMAT_SELECT);
 959        DUMPREG(CLK_RANGE_MAP);
 960        DUMPREG(US1_R0);
 961        DUMPREG(US1_R1);
 962        DUMPREG(US1_R2);
 963        DUMPREG(US1_R3);
 964        DUMPREG(US1_R4);
 965        DUMPREG(US1_R5);
 966        DUMPREG(US1_R6);
 967        DUMPREG(US1_R7);
 968        DUMPREG(US2_R0);
 969        DUMPREG(US2_R1);
 970        DUMPREG(US2_R2);
 971        DUMPREG(US2_R3);
 972        DUMPREG(US2_R4);
 973        DUMPREG(US2_R5);
 974        DUMPREG(US2_R6);
 975        DUMPREG(US2_R7);
 976        DUMPREG(US3_R0);
 977        DUMPREG(US3_R1);
 978        DUMPREG(US3_R2);
 979        DUMPREG(US3_R3);
 980        DUMPREG(US3_R4);
 981        DUMPREG(US3_R5);
 982        DUMPREG(US3_R6);
 983        DUMPREG(US3_R7);
 984        DUMPREG(DEI_FRAME_SIZE);
 985        DUMPREG(MDT_BYPASS);
 986        DUMPREG(MDT_SF_THRESHOLD);
 987        DUMPREG(EDI_CONFIG);
 988        DUMPREG(DEI_EDI_LUT_R0);
 989        DUMPREG(DEI_EDI_LUT_R1);
 990        DUMPREG(DEI_EDI_LUT_R2);
 991        DUMPREG(DEI_EDI_LUT_R3);
 992        DUMPREG(DEI_FMD_WINDOW_R0);
 993        DUMPREG(DEI_FMD_WINDOW_R1);
 994        DUMPREG(DEI_FMD_CONTROL_R0);
 995        DUMPREG(DEI_FMD_CONTROL_R1);
 996        DUMPREG(DEI_FMD_STATUS_R0);
 997        DUMPREG(DEI_FMD_STATUS_R1);
 998        DUMPREG(DEI_FMD_STATUS_R2);
 999#undef DUMPREG
1000
1001        sc_dump_regs(dev->sc);
1002        csc_dump_regs(dev->csc);
1003}
1004
1005static void add_out_dtd(struct vpe_ctx *ctx, int port)
1006{
1007        struct vpe_q_data *q_data = &ctx->q_data[Q_DATA_DST];
1008        const struct vpe_port_data *p_data = &port_data[port];
1009        struct vb2_buffer *vb = &ctx->dst_vb->vb2_buf;
1010        struct vpe_fmt *fmt = q_data->fmt;
1011        const struct vpdma_data_format *vpdma_fmt;
1012        int mv_buf_selector = !ctx->src_mv_buf_selector;
1013        dma_addr_t dma_addr;
1014        u32 flags = 0;
1015        u32 offset = 0;
1016
1017        if (port == VPE_PORT_MV_OUT) {
1018                vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
1019                dma_addr = ctx->mv_buf_dma[mv_buf_selector];
1020                q_data = &ctx->q_data[Q_DATA_SRC];
1021        } else {
1022                /* to incorporate interleaved formats */
1023                int plane = fmt->coplanar ? p_data->vb_part : 0;
1024
1025                vpdma_fmt = fmt->vpdma_fmt[plane];
1026                /*
1027                 * If we are using a single plane buffer and
1028                 * we need to set a separate vpdma chroma channel.
1029                 */
1030                if (q_data->nplanes == 1 && plane) {
1031                        dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
1032                        /* Compute required offset */
1033                        offset = q_data->bytesperline[0] * q_data->height;
1034                } else {
1035                        dma_addr = vb2_dma_contig_plane_dma_addr(vb, plane);
1036                        /* Use address as is, no offset */
1037                        offset = 0;
1038                }
1039                if (!dma_addr) {
1040                        vpe_err(ctx->dev,
1041                                "acquiring output buffer(%d) dma_addr failed\n",
1042                                port);
1043                        return;
1044                }
1045                /* Apply the offset */
1046                dma_addr += offset;
1047        }
1048
1049        if (q_data->flags & Q_DATA_FRAME_1D)
1050                flags |= VPDMA_DATA_FRAME_1D;
1051        if (q_data->flags & Q_DATA_MODE_TILED)
1052                flags |= VPDMA_DATA_MODE_TILED;
1053
1054        vpdma_set_max_size(ctx->dev->vpdma, VPDMA_MAX_SIZE1,
1055                           MAX_W, MAX_H);
1056
1057        vpdma_add_out_dtd(&ctx->desc_list, q_data->width,
1058                          q_data->bytesperline[VPE_LUMA], &q_data->c_rect,
1059                          vpdma_fmt, dma_addr, MAX_OUT_WIDTH_REG1,
1060                          MAX_OUT_HEIGHT_REG1, p_data->channel, flags);
1061}
1062
1063static void add_in_dtd(struct vpe_ctx *ctx, int port)
1064{
1065        struct vpe_q_data *q_data = &ctx->q_data[Q_DATA_SRC];
1066        const struct vpe_port_data *p_data = &port_data[port];
1067        struct vb2_buffer *vb = &ctx->src_vbs[p_data->vb_index]->vb2_buf;
1068        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1069        struct vpe_fmt *fmt = q_data->fmt;
1070        const struct vpdma_data_format *vpdma_fmt;
1071        int mv_buf_selector = ctx->src_mv_buf_selector;
1072        int field = vbuf->field == V4L2_FIELD_BOTTOM;
1073        int frame_width, frame_height;
1074        dma_addr_t dma_addr;
1075        u32 flags = 0;
1076        u32 offset = 0;
1077
1078        if (port == VPE_PORT_MV_IN) {
1079                vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
1080                dma_addr = ctx->mv_buf_dma[mv_buf_selector];
1081        } else {
1082                /* to incorporate interleaved formats */
1083                int plane = fmt->coplanar ? p_data->vb_part : 0;
1084
1085                vpdma_fmt = fmt->vpdma_fmt[plane];
1086                /*
1087                 * If we are using a single plane buffer and
1088                 * we need to set a separate vpdma chroma channel.
1089                 */
1090                if (q_data->nplanes == 1 && plane) {
1091                        dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
1092                        /* Compute required offset */
1093                        offset = q_data->bytesperline[0] * q_data->height;
1094                } else {
1095                        dma_addr = vb2_dma_contig_plane_dma_addr(vb, plane);
1096                        /* Use address as is, no offset */
1097                        offset = 0;
1098                }
1099                if (!dma_addr) {
1100                        vpe_err(ctx->dev,
1101                                "acquiring output buffer(%d) dma_addr failed\n",
1102                                port);
1103                        return;
1104                }
1105                /* Apply the offset */
1106                dma_addr += offset;
1107
1108                if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB) {
1109                        /*
1110                         * Use top or bottom field from same vb alternately
1111                         * f,f-1,f-2 = TBT when seq is even
1112                         * f,f-1,f-2 = BTB when seq is odd
1113                         */
1114                        field = (p_data->vb_index + (ctx->sequence % 2)) % 2;
1115
1116                        if (field) {
1117                                /*
1118                                 * bottom field of a SEQ_TB buffer
1119                                 * Skip the top field data by
1120                                 */
1121                                int height = q_data->height / 2;
1122                                int bpp = fmt->fourcc == V4L2_PIX_FMT_NV12 ?
1123                                                1 : (vpdma_fmt->depth >> 3);
1124                                if (plane)
1125                                        height /= 2;
1126                                dma_addr += q_data->width * height * bpp;
1127                        }
1128                }
1129        }
1130
1131        if (q_data->flags & Q_DATA_FRAME_1D)
1132                flags |= VPDMA_DATA_FRAME_1D;
1133        if (q_data->flags & Q_DATA_MODE_TILED)
1134                flags |= VPDMA_DATA_MODE_TILED;
1135
1136        frame_width = q_data->c_rect.width;
1137        frame_height = q_data->c_rect.height;
1138
1139        if (p_data->vb_part && fmt->fourcc == V4L2_PIX_FMT_NV12)
1140                frame_height /= 2;
1141
1142        vpdma_add_in_dtd(&ctx->desc_list, q_data->width,
1143                         q_data->bytesperline[VPE_LUMA], &q_data->c_rect,
1144                vpdma_fmt, dma_addr, p_data->channel, field, flags, frame_width,
1145                frame_height, 0, 0);
1146}
1147
1148/*
1149 * Enable the expected IRQ sources
1150 */
1151static void enable_irqs(struct vpe_ctx *ctx)
1152{
1153        write_reg(ctx->dev, VPE_INT0_ENABLE0_SET, VPE_INT0_LIST0_COMPLETE);
1154        write_reg(ctx->dev, VPE_INT0_ENABLE1_SET, VPE_DEI_ERROR_INT |
1155                                VPE_DS1_UV_ERROR_INT);
1156
1157        vpdma_enable_list_complete_irq(ctx->dev->vpdma, 0, 0, true);
1158}
1159
1160static void disable_irqs(struct vpe_ctx *ctx)
1161{
1162        write_reg(ctx->dev, VPE_INT0_ENABLE0_CLR, 0xffffffff);
1163        write_reg(ctx->dev, VPE_INT0_ENABLE1_CLR, 0xffffffff);
1164
1165        vpdma_enable_list_complete_irq(ctx->dev->vpdma, 0, 0, false);
1166}
1167
1168/* device_run() - prepares and starts the device
1169 *
1170 * This function is only called when both the source and destination
1171 * buffers are in place.
1172 */
1173static void device_run(void *priv)
1174{
1175        struct vpe_ctx *ctx = priv;
1176        struct sc_data *sc = ctx->dev->sc;
1177        struct vpe_q_data *d_q_data = &ctx->q_data[Q_DATA_DST];
1178        struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
1179
1180        if (ctx->deinterlacing && s_q_data->flags & Q_DATA_INTERLACED_SEQ_TB &&
1181                ctx->sequence % 2 == 0) {
1182                /* When using SEQ_TB buffers, When using it first time,
1183                 * No need to remove the buffer as the next field is present
1184                 * in the same buffer. (so that job_ready won't fail)
1185                 * It will be removed when using bottom field
1186                 */
1187                ctx->src_vbs[0] = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1188                WARN_ON(ctx->src_vbs[0] == NULL);
1189        } else {
1190                ctx->src_vbs[0] = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1191                WARN_ON(ctx->src_vbs[0] == NULL);
1192        }
1193
1194        ctx->dst_vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1195        WARN_ON(ctx->dst_vb == NULL);
1196
1197        if (ctx->deinterlacing) {
1198
1199                if (ctx->src_vbs[2] == NULL) {
1200                        ctx->src_vbs[2] = ctx->src_vbs[0];
1201                        WARN_ON(ctx->src_vbs[2] == NULL);
1202                        ctx->src_vbs[1] = ctx->src_vbs[0];
1203                        WARN_ON(ctx->src_vbs[1] == NULL);
1204                }
1205
1206                /*
1207                 * we have output the first 2 frames through line average, we
1208                 * now switch to EDI de-interlacer
1209                 */
1210                if (ctx->sequence == 2)
1211                        config_edi_input_mode(ctx, 0x3); /* EDI (Y + UV) */
1212        }
1213
1214        /* config descriptors */
1215        if (ctx->dev->loaded_mmrs != ctx->mmr_adb.dma_addr || ctx->load_mmrs) {
1216                vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->mmr_adb);
1217                vpdma_add_cfd_adb(&ctx->desc_list, CFD_MMR_CLIENT, &ctx->mmr_adb);
1218
1219                set_line_modes(ctx);
1220
1221                ctx->dev->loaded_mmrs = ctx->mmr_adb.dma_addr;
1222                ctx->load_mmrs = false;
1223        }
1224
1225        if (sc->loaded_coeff_h != ctx->sc_coeff_h.dma_addr ||
1226                        sc->load_coeff_h) {
1227                vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->sc_coeff_h);
1228                vpdma_add_cfd_block(&ctx->desc_list, CFD_SC_CLIENT,
1229                        &ctx->sc_coeff_h, 0);
1230
1231                sc->loaded_coeff_h = ctx->sc_coeff_h.dma_addr;
1232                sc->load_coeff_h = false;
1233        }
1234
1235        if (sc->loaded_coeff_v != ctx->sc_coeff_v.dma_addr ||
1236                        sc->load_coeff_v) {
1237                vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->sc_coeff_v);
1238                vpdma_add_cfd_block(&ctx->desc_list, CFD_SC_CLIENT,
1239                        &ctx->sc_coeff_v, SC_COEF_SRAM_SIZE >> 4);
1240
1241                sc->loaded_coeff_v = ctx->sc_coeff_v.dma_addr;
1242                sc->load_coeff_v = false;
1243        }
1244
1245        /* output data descriptors */
1246        if (ctx->deinterlacing)
1247                add_out_dtd(ctx, VPE_PORT_MV_OUT);
1248
1249        if (d_q_data->colorspace == V4L2_COLORSPACE_SRGB) {
1250                add_out_dtd(ctx, VPE_PORT_RGB_OUT);
1251        } else {
1252                add_out_dtd(ctx, VPE_PORT_LUMA_OUT);
1253                if (d_q_data->fmt->coplanar)
1254                        add_out_dtd(ctx, VPE_PORT_CHROMA_OUT);
1255        }
1256
1257        /* input data descriptors */
1258        if (ctx->deinterlacing) {
1259                add_in_dtd(ctx, VPE_PORT_LUMA3_IN);
1260                add_in_dtd(ctx, VPE_PORT_CHROMA3_IN);
1261
1262                add_in_dtd(ctx, VPE_PORT_LUMA2_IN);
1263                add_in_dtd(ctx, VPE_PORT_CHROMA2_IN);
1264        }
1265
1266        add_in_dtd(ctx, VPE_PORT_LUMA1_IN);
1267        add_in_dtd(ctx, VPE_PORT_CHROMA1_IN);
1268
1269        if (ctx->deinterlacing)
1270                add_in_dtd(ctx, VPE_PORT_MV_IN);
1271
1272        /* sync on channel control descriptors for input ports */
1273        vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_LUMA1_IN);
1274        vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_CHROMA1_IN);
1275
1276        if (ctx->deinterlacing) {
1277                vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1278                        VPE_CHAN_LUMA2_IN);
1279                vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1280                        VPE_CHAN_CHROMA2_IN);
1281
1282                vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1283                        VPE_CHAN_LUMA3_IN);
1284                vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1285                        VPE_CHAN_CHROMA3_IN);
1286
1287                vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_MV_IN);
1288        }
1289
1290        /* sync on channel control descriptors for output ports */
1291        if (d_q_data->colorspace == V4L2_COLORSPACE_SRGB) {
1292                vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1293                        VPE_CHAN_RGB_OUT);
1294        } else {
1295                vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1296                        VPE_CHAN_LUMA_OUT);
1297                if (d_q_data->fmt->coplanar)
1298                        vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1299                                VPE_CHAN_CHROMA_OUT);
1300        }
1301
1302        if (ctx->deinterlacing)
1303                vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_MV_OUT);
1304
1305        enable_irqs(ctx);
1306
1307        vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->desc_list.buf);
1308        vpdma_submit_descs(ctx->dev->vpdma, &ctx->desc_list, 0);
1309}
1310
1311static void dei_error(struct vpe_ctx *ctx)
1312{
1313        dev_warn(ctx->dev->v4l2_dev.dev,
1314                "received DEI error interrupt\n");
1315}
1316
1317static void ds1_uv_error(struct vpe_ctx *ctx)
1318{
1319        dev_warn(ctx->dev->v4l2_dev.dev,
1320                "received downsampler error interrupt\n");
1321}
1322
1323static irqreturn_t vpe_irq(int irq_vpe, void *data)
1324{
1325        struct vpe_dev *dev = (struct vpe_dev *)data;
1326        struct vpe_ctx *ctx;
1327        struct vpe_q_data *d_q_data;
1328        struct vb2_v4l2_buffer *s_vb, *d_vb;
1329        unsigned long flags;
1330        u32 irqst0, irqst1;
1331        bool list_complete = false;
1332
1333        irqst0 = read_reg(dev, VPE_INT0_STATUS0);
1334        if (irqst0) {
1335                write_reg(dev, VPE_INT0_STATUS0_CLR, irqst0);
1336                vpe_dbg(dev, "INT0_STATUS0 = 0x%08x\n", irqst0);
1337        }
1338
1339        irqst1 = read_reg(dev, VPE_INT0_STATUS1);
1340        if (irqst1) {
1341                write_reg(dev, VPE_INT0_STATUS1_CLR, irqst1);
1342                vpe_dbg(dev, "INT0_STATUS1 = 0x%08x\n", irqst1);
1343        }
1344
1345        ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
1346        if (!ctx) {
1347                vpe_err(dev, "instance released before end of transaction\n");
1348                goto handled;
1349        }
1350
1351        if (irqst1) {
1352                if (irqst1 & VPE_DEI_ERROR_INT) {
1353                        irqst1 &= ~VPE_DEI_ERROR_INT;
1354                        dei_error(ctx);
1355                }
1356                if (irqst1 & VPE_DS1_UV_ERROR_INT) {
1357                        irqst1 &= ~VPE_DS1_UV_ERROR_INT;
1358                        ds1_uv_error(ctx);
1359                }
1360        }
1361
1362        if (irqst0) {
1363                if (irqst0 & VPE_INT0_LIST0_COMPLETE)
1364                        vpdma_clear_list_stat(ctx->dev->vpdma, 0, 0);
1365
1366                irqst0 &= ~(VPE_INT0_LIST0_COMPLETE);
1367                list_complete = true;
1368        }
1369
1370        if (irqst0 | irqst1) {
1371                dev_warn(dev->v4l2_dev.dev, "Unexpected interrupt: INT0_STATUS0 = 0x%08x, INT0_STATUS1 = 0x%08x\n",
1372                        irqst0, irqst1);
1373        }
1374
1375        /*
1376         * Setup next operation only when list complete IRQ occurs
1377         * otherwise, skip the following code
1378         */
1379        if (!list_complete)
1380                goto handled;
1381
1382        disable_irqs(ctx);
1383
1384        vpdma_unmap_desc_buf(dev->vpdma, &ctx->desc_list.buf);
1385        vpdma_unmap_desc_buf(dev->vpdma, &ctx->mmr_adb);
1386        vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_h);
1387        vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_v);
1388
1389        vpdma_reset_desc_list(&ctx->desc_list);
1390
1391         /* the previous dst mv buffer becomes the next src mv buffer */
1392        ctx->src_mv_buf_selector = !ctx->src_mv_buf_selector;
1393
1394        if (ctx->aborting)
1395                goto finished;
1396
1397        s_vb = ctx->src_vbs[0];
1398        d_vb = ctx->dst_vb;
1399
1400        d_vb->flags = s_vb->flags;
1401        d_vb->vb2_buf.timestamp = s_vb->vb2_buf.timestamp;
1402
1403        if (s_vb->flags & V4L2_BUF_FLAG_TIMECODE)
1404                d_vb->timecode = s_vb->timecode;
1405
1406        d_vb->sequence = ctx->sequence;
1407
1408        d_q_data = &ctx->q_data[Q_DATA_DST];
1409        if (d_q_data->flags & Q_IS_INTERLACED) {
1410                d_vb->field = ctx->field;
1411                if (ctx->field == V4L2_FIELD_BOTTOM) {
1412                        ctx->sequence++;
1413                        ctx->field = V4L2_FIELD_TOP;
1414                } else {
1415                        WARN_ON(ctx->field != V4L2_FIELD_TOP);
1416                        ctx->field = V4L2_FIELD_BOTTOM;
1417                }
1418        } else {
1419                d_vb->field = V4L2_FIELD_NONE;
1420                ctx->sequence++;
1421        }
1422
1423        if (ctx->deinterlacing) {
1424                /*
1425                 * Allow source buffer to be dequeued only if it won't be used
1426                 * in the next iteration. All vbs are initialized to first
1427                 * buffer and we are shifting buffers every iteration, for the
1428                 * first two iterations, no buffer will be dequeued.
1429                 * This ensures that driver will keep (n-2)th (n-1)th and (n)th
1430                 * field when deinterlacing is enabled
1431                 */
1432                if (ctx->src_vbs[2] != ctx->src_vbs[1])
1433                        s_vb = ctx->src_vbs[2];
1434                else
1435                        s_vb = NULL;
1436        }
1437
1438        spin_lock_irqsave(&dev->lock, flags);
1439
1440        if (s_vb)
1441                v4l2_m2m_buf_done(s_vb, VB2_BUF_STATE_DONE);
1442
1443        v4l2_m2m_buf_done(d_vb, VB2_BUF_STATE_DONE);
1444
1445        spin_unlock_irqrestore(&dev->lock, flags);
1446
1447        if (ctx->deinterlacing) {
1448                ctx->src_vbs[2] = ctx->src_vbs[1];
1449                ctx->src_vbs[1] = ctx->src_vbs[0];
1450        }
1451
1452        /*
1453         * Since the vb2_buf_done has already been called fir therse
1454         * buffer we can now NULL them out so that we won't try
1455         * to clean out stray pointer later on.
1456        */
1457        ctx->src_vbs[0] = NULL;
1458        ctx->dst_vb = NULL;
1459
1460        ctx->bufs_completed++;
1461        if (ctx->bufs_completed < ctx->bufs_per_job && job_ready(ctx)) {
1462                device_run(ctx);
1463                goto handled;
1464        }
1465
1466finished:
1467        vpe_dbg(ctx->dev, "finishing transaction\n");
1468        ctx->bufs_completed = 0;
1469        v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
1470handled:
1471        return IRQ_HANDLED;
1472}
1473
1474/*
1475 * video ioctls
1476 */
1477static int vpe_querycap(struct file *file, void *priv,
1478                        struct v4l2_capability *cap)
1479{
1480        strscpy(cap->driver, VPE_MODULE_NAME, sizeof(cap->driver));
1481        strscpy(cap->card, VPE_MODULE_NAME, sizeof(cap->card));
1482        snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
1483                VPE_MODULE_NAME);
1484        return 0;
1485}
1486
1487static int __enum_fmt(struct v4l2_fmtdesc *f, u32 type)
1488{
1489        int i, index;
1490        struct vpe_fmt *fmt = NULL;
1491
1492        index = 0;
1493        for (i = 0; i < ARRAY_SIZE(vpe_formats); ++i) {
1494                if (vpe_formats[i].types & type) {
1495                        if (index == f->index) {
1496                                fmt = &vpe_formats[i];
1497                                break;
1498                        }
1499                        index++;
1500                }
1501        }
1502
1503        if (!fmt)
1504                return -EINVAL;
1505
1506        f->pixelformat = fmt->fourcc;
1507        return 0;
1508}
1509
1510static int vpe_enum_fmt(struct file *file, void *priv,
1511                                struct v4l2_fmtdesc *f)
1512{
1513        if (V4L2_TYPE_IS_OUTPUT(f->type))
1514                return __enum_fmt(f, VPE_FMT_TYPE_OUTPUT);
1515
1516        return __enum_fmt(f, VPE_FMT_TYPE_CAPTURE);
1517}
1518
1519static int vpe_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
1520{
1521        struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1522        struct vpe_ctx *ctx = file2ctx(file);
1523        struct vb2_queue *vq;
1524        struct vpe_q_data *q_data;
1525        int i;
1526
1527        vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
1528        if (!vq)
1529                return -EINVAL;
1530
1531        q_data = get_q_data(ctx, f->type);
1532
1533        pix->width = q_data->width;
1534        pix->height = q_data->height;
1535        pix->pixelformat = q_data->fmt->fourcc;
1536        pix->field = q_data->field;
1537
1538        if (V4L2_TYPE_IS_OUTPUT(f->type)) {
1539                pix->colorspace = q_data->colorspace;
1540        } else {
1541                struct vpe_q_data *s_q_data;
1542
1543                /* get colorspace from the source queue */
1544                s_q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
1545
1546                pix->colorspace = s_q_data->colorspace;
1547        }
1548
1549        pix->num_planes = q_data->nplanes;
1550
1551        for (i = 0; i < pix->num_planes; i++) {
1552                pix->plane_fmt[i].bytesperline = q_data->bytesperline[i];
1553                pix->plane_fmt[i].sizeimage = q_data->sizeimage[i];
1554        }
1555
1556        return 0;
1557}
1558
1559static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f,
1560                       struct vpe_fmt *fmt, int type)
1561{
1562        struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1563        struct v4l2_plane_pix_format *plane_fmt;
1564        unsigned int w_align;
1565        int i, depth, depth_bytes, height;
1566        unsigned int stride = 0;
1567
1568        if (!fmt || !(fmt->types & type)) {
1569                vpe_err(ctx->dev, "Fourcc format (0x%08x) invalid.\n",
1570                        pix->pixelformat);
1571                return -EINVAL;
1572        }
1573
1574        if (pix->field != V4L2_FIELD_NONE && pix->field != V4L2_FIELD_ALTERNATE
1575                        && pix->field != V4L2_FIELD_SEQ_TB)
1576                pix->field = V4L2_FIELD_NONE;
1577
1578        depth = fmt->vpdma_fmt[VPE_LUMA]->depth;
1579
1580        /*
1581         * the line stride should 16 byte aligned for VPDMA to work, based on
1582         * the bytes per pixel, figure out how much the width should be aligned
1583         * to make sure line stride is 16 byte aligned
1584         */
1585        depth_bytes = depth >> 3;
1586
1587        if (depth_bytes == 3) {
1588                /*
1589                 * if bpp is 3(as in some RGB formats), the pixel width doesn't
1590                 * really help in ensuring line stride is 16 byte aligned
1591                 */
1592                w_align = 4;
1593        } else {
1594                /*
1595                 * for the remainder bpp(4, 2 and 1), the pixel width alignment
1596                 * can ensure a line stride alignment of 16 bytes. For example,
1597                 * if bpp is 2, then the line stride can be 16 byte aligned if
1598                 * the width is 8 byte aligned
1599                 */
1600
1601                /*
1602                 * HACK: using order_base_2() here causes lots of asm output
1603                 * errors with smatch, on i386:
1604                 * ./arch/x86/include/asm/bitops.h:457:22:
1605                 *               warning: asm output is not an lvalue
1606                 * Perhaps some gcc optimization is doing the wrong thing
1607                 * there.
1608                 * Let's get rid of them by doing the calculus on two steps
1609                 */
1610                w_align = roundup_pow_of_two(VPDMA_DESC_ALIGN / depth_bytes);
1611                w_align = ilog2(w_align);
1612        }
1613
1614        v4l_bound_align_image(&pix->width, MIN_W, MAX_W, w_align,
1615                              &pix->height, MIN_H, MAX_H, H_ALIGN,
1616                              S_ALIGN);
1617
1618        if (!pix->num_planes)
1619                pix->num_planes = fmt->coplanar ? 2 : 1;
1620        else if (pix->num_planes > 1 && !fmt->coplanar)
1621                pix->num_planes = 1;
1622
1623        pix->pixelformat = fmt->fourcc;
1624
1625        /*
1626         * For the actual image parameters, we need to consider the field
1627         * height of the image for SEQ_TB buffers.
1628         */
1629        if (pix->field == V4L2_FIELD_SEQ_TB)
1630                height = pix->height / 2;
1631        else
1632                height = pix->height;
1633
1634        if (!pix->colorspace) {
1635                if (fmt->fourcc == V4L2_PIX_FMT_RGB24 ||
1636                                fmt->fourcc == V4L2_PIX_FMT_BGR24 ||
1637                                fmt->fourcc == V4L2_PIX_FMT_RGB32 ||
1638                                fmt->fourcc == V4L2_PIX_FMT_BGR32) {
1639                        pix->colorspace = V4L2_COLORSPACE_SRGB;
1640                } else {
1641                        if (height > 1280)      /* HD */
1642                                pix->colorspace = V4L2_COLORSPACE_REC709;
1643                        else                    /* SD */
1644                                pix->colorspace = V4L2_COLORSPACE_SMPTE170M;
1645                }
1646        }
1647
1648        memset(pix->reserved, 0, sizeof(pix->reserved));
1649        for (i = 0; i < pix->num_planes; i++) {
1650                plane_fmt = &pix->plane_fmt[i];
1651                depth = fmt->vpdma_fmt[i]->depth;
1652
1653                stride = (pix->width * fmt->vpdma_fmt[VPE_LUMA]->depth) >> 3;
1654                if (stride > plane_fmt->bytesperline)
1655                        plane_fmt->bytesperline = stride;
1656
1657                plane_fmt->bytesperline = ALIGN(plane_fmt->bytesperline,
1658                                                VPDMA_STRIDE_ALIGN);
1659
1660                if (i == VPE_LUMA) {
1661                        plane_fmt->sizeimage = pix->height *
1662                                               plane_fmt->bytesperline;
1663
1664                        if (pix->num_planes == 1 && fmt->coplanar)
1665                                plane_fmt->sizeimage += pix->height *
1666                                        plane_fmt->bytesperline *
1667                                        fmt->vpdma_fmt[VPE_CHROMA]->depth >> 3;
1668
1669                } else { /* i == VIP_CHROMA */
1670                        plane_fmt->sizeimage = (pix->height *
1671                                               plane_fmt->bytesperline *
1672                                               depth) >> 3;
1673                }
1674                memset(plane_fmt->reserved, 0, sizeof(plane_fmt->reserved));
1675        }
1676
1677        return 0;
1678}
1679
1680static int vpe_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
1681{
1682        struct vpe_ctx *ctx = file2ctx(file);
1683        struct vpe_fmt *fmt = find_format(f);
1684
1685        if (V4L2_TYPE_IS_OUTPUT(f->type))
1686                return __vpe_try_fmt(ctx, f, fmt, VPE_FMT_TYPE_OUTPUT);
1687        else
1688                return __vpe_try_fmt(ctx, f, fmt, VPE_FMT_TYPE_CAPTURE);
1689}
1690
1691static int __vpe_s_fmt(struct vpe_ctx *ctx, struct v4l2_format *f)
1692{
1693        struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1694        struct v4l2_plane_pix_format *plane_fmt;
1695        struct vpe_q_data *q_data;
1696        struct vb2_queue *vq;
1697        int i;
1698
1699        vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
1700        if (!vq)
1701                return -EINVAL;
1702
1703        if (vb2_is_busy(vq)) {
1704                vpe_err(ctx->dev, "queue busy\n");
1705                return -EBUSY;
1706        }
1707
1708        q_data = get_q_data(ctx, f->type);
1709        if (!q_data)
1710                return -EINVAL;
1711
1712        q_data->fmt             = find_format(f);
1713        q_data->width           = pix->width;
1714        q_data->height          = pix->height;
1715        q_data->colorspace      = pix->colorspace;
1716        q_data->field           = pix->field;
1717        q_data->nplanes         = pix->num_planes;
1718
1719        for (i = 0; i < pix->num_planes; i++) {
1720                plane_fmt = &pix->plane_fmt[i];
1721
1722                q_data->bytesperline[i] = plane_fmt->bytesperline;
1723                q_data->sizeimage[i]    = plane_fmt->sizeimage;
1724        }
1725
1726        q_data->c_rect.left     = 0;
1727        q_data->c_rect.top      = 0;
1728        q_data->c_rect.width    = q_data->width;
1729        q_data->c_rect.height   = q_data->height;
1730
1731        if (q_data->field == V4L2_FIELD_ALTERNATE)
1732                q_data->flags |= Q_DATA_INTERLACED_ALTERNATE;
1733        else if (q_data->field == V4L2_FIELD_SEQ_TB)
1734                q_data->flags |= Q_DATA_INTERLACED_SEQ_TB;
1735        else
1736                q_data->flags &= ~Q_IS_INTERLACED;
1737
1738        /* the crop height is halved for the case of SEQ_TB buffers */
1739        if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB)
1740                q_data->c_rect.height /= 2;
1741
1742        vpe_dbg(ctx->dev, "Setting format for type %d, wxh: %dx%d, fmt: %d bpl_y %d",
1743                f->type, q_data->width, q_data->height, q_data->fmt->fourcc,
1744                q_data->bytesperline[VPE_LUMA]);
1745        if (q_data->nplanes == 2)
1746                vpe_dbg(ctx->dev, " bpl_uv %d\n",
1747                        q_data->bytesperline[VPE_CHROMA]);
1748
1749        return 0;
1750}
1751
1752static int vpe_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
1753{
1754        int ret;
1755        struct vpe_ctx *ctx = file2ctx(file);
1756
1757        ret = vpe_try_fmt(file, priv, f);
1758        if (ret)
1759                return ret;
1760
1761        ret = __vpe_s_fmt(ctx, f);
1762        if (ret)
1763                return ret;
1764
1765        if (V4L2_TYPE_IS_OUTPUT(f->type))
1766                set_src_registers(ctx);
1767        else
1768                set_dst_registers(ctx);
1769
1770        return set_srcdst_params(ctx);
1771}
1772
1773static int __vpe_try_selection(struct vpe_ctx *ctx, struct v4l2_selection *s)
1774{
1775        struct vpe_q_data *q_data;
1776        int height;
1777
1778        if ((s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
1779            (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT))
1780                return -EINVAL;
1781
1782        q_data = get_q_data(ctx, s->type);
1783        if (!q_data)
1784                return -EINVAL;
1785
1786        switch (s->target) {
1787        case V4L2_SEL_TGT_COMPOSE:
1788                /*
1789                 * COMPOSE target is only valid for capture buffer type, return
1790                 * error for output buffer type
1791                 */
1792                if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1793                        return -EINVAL;
1794                break;
1795        case V4L2_SEL_TGT_CROP:
1796                /*
1797                 * CROP target is only valid for output buffer type, return
1798                 * error for capture buffer type
1799                 */
1800                if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1801                        return -EINVAL;
1802                break;
1803        /*
1804         * bound and default crop/compose targets are invalid targets to
1805         * try/set
1806         */
1807        default:
1808                return -EINVAL;
1809        }
1810
1811        /*
1812         * For SEQ_TB buffers, crop height should be less than the height of
1813         * the field height, not the buffer height
1814         */
1815        if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB)
1816                height = q_data->height / 2;
1817        else
1818                height = q_data->height;
1819
1820        if (s->r.top < 0 || s->r.left < 0) {
1821                vpe_err(ctx->dev, "negative values for top and left\n");
1822                s->r.top = s->r.left = 0;
1823        }
1824
1825        v4l_bound_align_image(&s->r.width, MIN_W, q_data->width, 1,
1826                &s->r.height, MIN_H, height, H_ALIGN, S_ALIGN);
1827
1828        /* adjust left/top if cropping rectangle is out of bounds */
1829        if (s->r.left + s->r.width > q_data->width)
1830                s->r.left = q_data->width - s->r.width;
1831        if (s->r.top + s->r.height > q_data->height)
1832                s->r.top = q_data->height - s->r.height;
1833
1834        return 0;
1835}
1836
1837static int vpe_g_selection(struct file *file, void *fh,
1838                struct v4l2_selection *s)
1839{
1840        struct vpe_ctx *ctx = file2ctx(file);
1841        struct vpe_q_data *q_data;
1842        bool use_c_rect = false;
1843
1844        if ((s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
1845            (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT))
1846                return -EINVAL;
1847
1848        q_data = get_q_data(ctx, s->type);
1849        if (!q_data)
1850                return -EINVAL;
1851
1852        switch (s->target) {
1853        case V4L2_SEL_TGT_COMPOSE_DEFAULT:
1854        case V4L2_SEL_TGT_COMPOSE_BOUNDS:
1855                if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1856                        return -EINVAL;
1857                break;
1858        case V4L2_SEL_TGT_CROP_BOUNDS:
1859        case V4L2_SEL_TGT_CROP_DEFAULT:
1860                if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1861                        return -EINVAL;
1862                break;
1863        case V4L2_SEL_TGT_COMPOSE:
1864                if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1865                        return -EINVAL;
1866                use_c_rect = true;
1867                break;
1868        case V4L2_SEL_TGT_CROP:
1869                if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1870                        return -EINVAL;
1871                use_c_rect = true;
1872                break;
1873        default:
1874                return -EINVAL;
1875        }
1876
1877        if (use_c_rect) {
1878                /*
1879                 * for CROP/COMPOSE target type, return c_rect params from the
1880                 * respective buffer type
1881                 */
1882                s->r = q_data->c_rect;
1883        } else {
1884                /*
1885                 * for DEFAULT/BOUNDS target type, return width and height from
1886                 * S_FMT of the respective buffer type
1887                 */
1888                s->r.left = 0;
1889                s->r.top = 0;
1890                s->r.width = q_data->width;
1891                s->r.height = q_data->height;
1892        }
1893
1894        return 0;
1895}
1896
1897
1898static int vpe_s_selection(struct file *file, void *fh,
1899                struct v4l2_selection *s)
1900{
1901        struct vpe_ctx *ctx = file2ctx(file);
1902        struct vpe_q_data *q_data;
1903        struct v4l2_selection sel = *s;
1904        int ret;
1905
1906        ret = __vpe_try_selection(ctx, &sel);
1907        if (ret)
1908                return ret;
1909
1910        q_data = get_q_data(ctx, sel.type);
1911        if (!q_data)
1912                return -EINVAL;
1913
1914        if ((q_data->c_rect.left == sel.r.left) &&
1915                        (q_data->c_rect.top == sel.r.top) &&
1916                        (q_data->c_rect.width == sel.r.width) &&
1917                        (q_data->c_rect.height == sel.r.height)) {
1918                vpe_dbg(ctx->dev,
1919                        "requested crop/compose values are already set\n");
1920                return 0;
1921        }
1922
1923        q_data->c_rect = sel.r;
1924
1925        return set_srcdst_params(ctx);
1926}
1927
1928/*
1929 * defines number of buffers/frames a context can process with VPE before
1930 * switching to a different context. default value is 1 buffer per context
1931 */
1932#define V4L2_CID_VPE_BUFS_PER_JOB               (V4L2_CID_USER_TI_VPE_BASE + 0)
1933
1934static int vpe_s_ctrl(struct v4l2_ctrl *ctrl)
1935{
1936        struct vpe_ctx *ctx =
1937                container_of(ctrl->handler, struct vpe_ctx, hdl);
1938
1939        switch (ctrl->id) {
1940        case V4L2_CID_VPE_BUFS_PER_JOB:
1941                ctx->bufs_per_job = ctrl->val;
1942                break;
1943
1944        default:
1945                vpe_err(ctx->dev, "Invalid control\n");
1946                return -EINVAL;
1947        }
1948
1949        return 0;
1950}
1951
1952static const struct v4l2_ctrl_ops vpe_ctrl_ops = {
1953        .s_ctrl = vpe_s_ctrl,
1954};
1955
1956static const struct v4l2_ioctl_ops vpe_ioctl_ops = {
1957        .vidioc_querycap                = vpe_querycap,
1958
1959        .vidioc_enum_fmt_vid_cap        = vpe_enum_fmt,
1960        .vidioc_g_fmt_vid_cap_mplane    = vpe_g_fmt,
1961        .vidioc_try_fmt_vid_cap_mplane  = vpe_try_fmt,
1962        .vidioc_s_fmt_vid_cap_mplane    = vpe_s_fmt,
1963
1964        .vidioc_enum_fmt_vid_out        = vpe_enum_fmt,
1965        .vidioc_g_fmt_vid_out_mplane    = vpe_g_fmt,
1966        .vidioc_try_fmt_vid_out_mplane  = vpe_try_fmt,
1967        .vidioc_s_fmt_vid_out_mplane    = vpe_s_fmt,
1968
1969        .vidioc_g_selection             = vpe_g_selection,
1970        .vidioc_s_selection             = vpe_s_selection,
1971
1972        .vidioc_reqbufs                 = v4l2_m2m_ioctl_reqbufs,
1973        .vidioc_querybuf                = v4l2_m2m_ioctl_querybuf,
1974        .vidioc_qbuf                    = v4l2_m2m_ioctl_qbuf,
1975        .vidioc_dqbuf                   = v4l2_m2m_ioctl_dqbuf,
1976        .vidioc_expbuf                  = v4l2_m2m_ioctl_expbuf,
1977        .vidioc_streamon                = v4l2_m2m_ioctl_streamon,
1978        .vidioc_streamoff               = v4l2_m2m_ioctl_streamoff,
1979
1980        .vidioc_subscribe_event         = v4l2_ctrl_subscribe_event,
1981        .vidioc_unsubscribe_event       = v4l2_event_unsubscribe,
1982};
1983
1984/*
1985 * Queue operations
1986 */
1987static int vpe_queue_setup(struct vb2_queue *vq,
1988                           unsigned int *nbuffers, unsigned int *nplanes,
1989                           unsigned int sizes[], struct device *alloc_devs[])
1990{
1991        int i;
1992        struct vpe_ctx *ctx = vb2_get_drv_priv(vq);
1993        struct vpe_q_data *q_data;
1994
1995        q_data = get_q_data(ctx, vq->type);
1996
1997        *nplanes = q_data->nplanes;
1998
1999        for (i = 0; i < *nplanes; i++)
2000                sizes[i] = q_data->sizeimage[i];
2001
2002        vpe_dbg(ctx->dev, "get %d buffer(s) of size %d", *nbuffers,
2003                sizes[VPE_LUMA]);
2004        if (q_data->nplanes == 2)
2005                vpe_dbg(ctx->dev, " and %d\n", sizes[VPE_CHROMA]);
2006
2007        return 0;
2008}
2009
2010static int vpe_buf_prepare(struct vb2_buffer *vb)
2011{
2012        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
2013        struct vpe_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
2014        struct vpe_q_data *q_data;
2015        int i, num_planes;
2016
2017        vpe_dbg(ctx->dev, "type: %d\n", vb->vb2_queue->type);
2018
2019        q_data = get_q_data(ctx, vb->vb2_queue->type);
2020        num_planes = q_data->nplanes;
2021
2022        if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
2023                if (!(q_data->flags & Q_IS_INTERLACED)) {
2024                        vbuf->field = V4L2_FIELD_NONE;
2025                } else {
2026                        if (vbuf->field != V4L2_FIELD_TOP &&
2027                            vbuf->field != V4L2_FIELD_BOTTOM &&
2028                            vbuf->field != V4L2_FIELD_SEQ_TB)
2029                                return -EINVAL;
2030                }
2031        }
2032
2033        for (i = 0; i < num_planes; i++) {
2034                if (vb2_plane_size(vb, i) < q_data->sizeimage[i]) {
2035                        vpe_err(ctx->dev,
2036                                "data will not fit into plane (%lu < %lu)\n",
2037                                vb2_plane_size(vb, i),
2038                                (long) q_data->sizeimage[i]);
2039                        return -EINVAL;
2040                }
2041        }
2042
2043        for (i = 0; i < num_planes; i++)
2044                vb2_set_plane_payload(vb, i, q_data->sizeimage[i]);
2045
2046        return 0;
2047}
2048
2049static void vpe_buf_queue(struct vb2_buffer *vb)
2050{
2051        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
2052        struct vpe_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
2053
2054        v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
2055}
2056
2057static int check_srcdst_sizes(struct vpe_ctx *ctx)
2058{
2059        struct vpe_q_data *s_q_data =  &ctx->q_data[Q_DATA_SRC];
2060        struct vpe_q_data *d_q_data =  &ctx->q_data[Q_DATA_DST];
2061        unsigned int src_w = s_q_data->c_rect.width;
2062        unsigned int src_h = s_q_data->c_rect.height;
2063        unsigned int dst_w = d_q_data->c_rect.width;
2064        unsigned int dst_h = d_q_data->c_rect.height;
2065
2066        if (src_w == dst_w && src_h == dst_h)
2067                return 0;
2068
2069        if (src_h <= SC_MAX_PIXEL_HEIGHT &&
2070            src_w <= SC_MAX_PIXEL_WIDTH &&
2071            dst_h <= SC_MAX_PIXEL_HEIGHT &&
2072            dst_w <= SC_MAX_PIXEL_WIDTH)
2073                return 0;
2074
2075        return -1;
2076}
2077
2078static void vpe_return_all_buffers(struct vpe_ctx *ctx,  struct vb2_queue *q,
2079                                   enum vb2_buffer_state state)
2080{
2081        struct vb2_v4l2_buffer *vb;
2082        unsigned long flags;
2083
2084        for (;;) {
2085                if (V4L2_TYPE_IS_OUTPUT(q->type))
2086                        vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
2087                else
2088                        vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2089                if (!vb)
2090                        break;
2091                spin_lock_irqsave(&ctx->dev->lock, flags);
2092                v4l2_m2m_buf_done(vb, state);
2093                spin_unlock_irqrestore(&ctx->dev->lock, flags);
2094        }
2095
2096        /*
2097         * Cleanup the in-transit vb2 buffers that have been
2098         * removed from their respective queue already but for
2099         * which procecessing has not been completed yet.
2100         */
2101        if (V4L2_TYPE_IS_OUTPUT(q->type)) {
2102                spin_lock_irqsave(&ctx->dev->lock, flags);
2103
2104                if (ctx->src_vbs[2])
2105                        v4l2_m2m_buf_done(ctx->src_vbs[2], state);
2106
2107                if (ctx->src_vbs[1] && (ctx->src_vbs[1] != ctx->src_vbs[2]))
2108                        v4l2_m2m_buf_done(ctx->src_vbs[1], state);
2109
2110                if (ctx->src_vbs[0] &&
2111                    (ctx->src_vbs[0] != ctx->src_vbs[1]) &&
2112                    (ctx->src_vbs[0] != ctx->src_vbs[2]))
2113                        v4l2_m2m_buf_done(ctx->src_vbs[0], state);
2114
2115                ctx->src_vbs[2] = NULL;
2116                ctx->src_vbs[1] = NULL;
2117                ctx->src_vbs[0] = NULL;
2118
2119                spin_unlock_irqrestore(&ctx->dev->lock, flags);
2120        } else {
2121                if (ctx->dst_vb) {
2122                        spin_lock_irqsave(&ctx->dev->lock, flags);
2123
2124                        v4l2_m2m_buf_done(ctx->dst_vb, state);
2125                        ctx->dst_vb = NULL;
2126                        spin_unlock_irqrestore(&ctx->dev->lock, flags);
2127                }
2128        }
2129}
2130
2131static int vpe_start_streaming(struct vb2_queue *q, unsigned int count)
2132{
2133        struct vpe_ctx *ctx = vb2_get_drv_priv(q);
2134
2135        /* Check any of the size exceed maximum scaling sizes */
2136        if (check_srcdst_sizes(ctx)) {
2137                vpe_err(ctx->dev,
2138                        "Conversion setup failed, check source and destination parameters\n"
2139                        );
2140                vpe_return_all_buffers(ctx, q, VB2_BUF_STATE_QUEUED);
2141                return -EINVAL;
2142        }
2143
2144        if (ctx->deinterlacing)
2145                config_edi_input_mode(ctx, 0x0);
2146
2147        if (ctx->sequence != 0)
2148                set_srcdst_params(ctx);
2149
2150        return 0;
2151}
2152
2153static void vpe_stop_streaming(struct vb2_queue *q)
2154{
2155        struct vpe_ctx *ctx = vb2_get_drv_priv(q);
2156
2157        vpe_dump_regs(ctx->dev);
2158        vpdma_dump_regs(ctx->dev->vpdma);
2159
2160        vpe_return_all_buffers(ctx, q, VB2_BUF_STATE_ERROR);
2161}
2162
2163static const struct vb2_ops vpe_qops = {
2164        .queue_setup     = vpe_queue_setup,
2165        .buf_prepare     = vpe_buf_prepare,
2166        .buf_queue       = vpe_buf_queue,
2167        .wait_prepare    = vb2_ops_wait_prepare,
2168        .wait_finish     = vb2_ops_wait_finish,
2169        .start_streaming = vpe_start_streaming,
2170        .stop_streaming  = vpe_stop_streaming,
2171};
2172
2173static int queue_init(void *priv, struct vb2_queue *src_vq,
2174                      struct vb2_queue *dst_vq)
2175{
2176        struct vpe_ctx *ctx = priv;
2177        struct vpe_dev *dev = ctx->dev;
2178        int ret;
2179
2180        memset(src_vq, 0, sizeof(*src_vq));
2181        src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
2182        src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
2183        src_vq->drv_priv = ctx;
2184        src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
2185        src_vq->ops = &vpe_qops;
2186        src_vq->mem_ops = &vb2_dma_contig_memops;
2187        src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
2188        src_vq->lock = &dev->dev_mutex;
2189        src_vq->dev = dev->v4l2_dev.dev;
2190
2191        ret = vb2_queue_init(src_vq);
2192        if (ret)
2193                return ret;
2194
2195        memset(dst_vq, 0, sizeof(*dst_vq));
2196        dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
2197        dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
2198        dst_vq->drv_priv = ctx;
2199        dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
2200        dst_vq->ops = &vpe_qops;
2201        dst_vq->mem_ops = &vb2_dma_contig_memops;
2202        dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
2203        dst_vq->lock = &dev->dev_mutex;
2204        dst_vq->dev = dev->v4l2_dev.dev;
2205
2206        return vb2_queue_init(dst_vq);
2207}
2208
2209static const struct v4l2_ctrl_config vpe_bufs_per_job = {
2210        .ops = &vpe_ctrl_ops,
2211        .id = V4L2_CID_VPE_BUFS_PER_JOB,
2212        .name = "Buffers Per Transaction",
2213        .type = V4L2_CTRL_TYPE_INTEGER,
2214        .def = VPE_DEF_BUFS_PER_JOB,
2215        .min = 1,
2216        .max = VIDEO_MAX_FRAME,
2217        .step = 1,
2218};
2219
2220/*
2221 * File operations
2222 */
2223static int vpe_open(struct file *file)
2224{
2225        struct vpe_dev *dev = video_drvdata(file);
2226        struct vpe_q_data *s_q_data;
2227        struct v4l2_ctrl_handler *hdl;
2228        struct vpe_ctx *ctx;
2229        int ret;
2230
2231        vpe_dbg(dev, "vpe_open\n");
2232
2233        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
2234        if (!ctx)
2235                return -ENOMEM;
2236
2237        ctx->dev = dev;
2238
2239        if (mutex_lock_interruptible(&dev->dev_mutex)) {
2240                ret = -ERESTARTSYS;
2241                goto free_ctx;
2242        }
2243
2244        ret = vpdma_create_desc_list(&ctx->desc_list, VPE_DESC_LIST_SIZE,
2245                        VPDMA_LIST_TYPE_NORMAL);
2246        if (ret != 0)
2247                goto unlock;
2248
2249        ret = vpdma_alloc_desc_buf(&ctx->mmr_adb, sizeof(struct vpe_mmr_adb));
2250        if (ret != 0)
2251                goto free_desc_list;
2252
2253        ret = vpdma_alloc_desc_buf(&ctx->sc_coeff_h, SC_COEF_SRAM_SIZE);
2254        if (ret != 0)
2255                goto free_mmr_adb;
2256
2257        ret = vpdma_alloc_desc_buf(&ctx->sc_coeff_v, SC_COEF_SRAM_SIZE);
2258        if (ret != 0)
2259                goto free_sc_h;
2260
2261        init_adb_hdrs(ctx);
2262
2263        v4l2_fh_init(&ctx->fh, video_devdata(file));
2264        file->private_data = &ctx->fh;
2265
2266        hdl = &ctx->hdl;
2267        v4l2_ctrl_handler_init(hdl, 1);
2268        v4l2_ctrl_new_custom(hdl, &vpe_bufs_per_job, NULL);
2269        if (hdl->error) {
2270                ret = hdl->error;
2271                goto exit_fh;
2272        }
2273        ctx->fh.ctrl_handler = hdl;
2274        v4l2_ctrl_handler_setup(hdl);
2275
2276        s_q_data = &ctx->q_data[Q_DATA_SRC];
2277        s_q_data->fmt = &vpe_formats[2];
2278        s_q_data->width = 1920;
2279        s_q_data->height = 1080;
2280        s_q_data->nplanes = 1;
2281        s_q_data->bytesperline[VPE_LUMA] = (s_q_data->width *
2282                        s_q_data->fmt->vpdma_fmt[VPE_LUMA]->depth) >> 3;
2283        s_q_data->sizeimage[VPE_LUMA] = (s_q_data->bytesperline[VPE_LUMA] *
2284                        s_q_data->height);
2285        s_q_data->colorspace = V4L2_COLORSPACE_REC709;
2286        s_q_data->field = V4L2_FIELD_NONE;
2287        s_q_data->c_rect.left = 0;
2288        s_q_data->c_rect.top = 0;
2289        s_q_data->c_rect.width = s_q_data->width;
2290        s_q_data->c_rect.height = s_q_data->height;
2291        s_q_data->flags = 0;
2292
2293        ctx->q_data[Q_DATA_DST] = *s_q_data;
2294
2295        set_dei_shadow_registers(ctx);
2296        set_src_registers(ctx);
2297        set_dst_registers(ctx);
2298        ret = set_srcdst_params(ctx);
2299        if (ret)
2300                goto exit_fh;
2301
2302        ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, &queue_init);
2303
2304        if (IS_ERR(ctx->fh.m2m_ctx)) {
2305                ret = PTR_ERR(ctx->fh.m2m_ctx);
2306                goto exit_fh;
2307        }
2308
2309        v4l2_fh_add(&ctx->fh);
2310
2311        /*
2312         * for now, just report the creation of the first instance, we can later
2313         * optimize the driver to enable or disable clocks when the first
2314         * instance is created or the last instance released
2315         */
2316        if (atomic_inc_return(&dev->num_instances) == 1)
2317                vpe_dbg(dev, "first instance created\n");
2318
2319        ctx->bufs_per_job = VPE_DEF_BUFS_PER_JOB;
2320
2321        ctx->load_mmrs = true;
2322
2323        vpe_dbg(dev, "created instance %p, m2m_ctx: %p\n",
2324                ctx, ctx->fh.m2m_ctx);
2325
2326        mutex_unlock(&dev->dev_mutex);
2327
2328        return 0;
2329exit_fh:
2330        v4l2_ctrl_handler_free(hdl);
2331        v4l2_fh_exit(&ctx->fh);
2332        vpdma_free_desc_buf(&ctx->sc_coeff_v);
2333free_sc_h:
2334        vpdma_free_desc_buf(&ctx->sc_coeff_h);
2335free_mmr_adb:
2336        vpdma_free_desc_buf(&ctx->mmr_adb);
2337free_desc_list:
2338        vpdma_free_desc_list(&ctx->desc_list);
2339unlock:
2340        mutex_unlock(&dev->dev_mutex);
2341free_ctx:
2342        kfree(ctx);
2343        return ret;
2344}
2345
2346static int vpe_release(struct file *file)
2347{
2348        struct vpe_dev *dev = video_drvdata(file);
2349        struct vpe_ctx *ctx = file2ctx(file);
2350
2351        vpe_dbg(dev, "releasing instance %p\n", ctx);
2352
2353        mutex_lock(&dev->dev_mutex);
2354        free_mv_buffers(ctx);
2355        vpdma_free_desc_list(&ctx->desc_list);
2356        vpdma_free_desc_buf(&ctx->mmr_adb);
2357
2358        vpdma_free_desc_buf(&ctx->sc_coeff_v);
2359        vpdma_free_desc_buf(&ctx->sc_coeff_h);
2360
2361        v4l2_fh_del(&ctx->fh);
2362        v4l2_fh_exit(&ctx->fh);
2363        v4l2_ctrl_handler_free(&ctx->hdl);
2364        v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
2365
2366        kfree(ctx);
2367
2368        /*
2369         * for now, just report the release of the last instance, we can later
2370         * optimize the driver to enable or disable clocks when the first
2371         * instance is created or the last instance released
2372         */
2373        if (atomic_dec_return(&dev->num_instances) == 0)
2374                vpe_dbg(dev, "last instance released\n");
2375
2376        mutex_unlock(&dev->dev_mutex);
2377
2378        return 0;
2379}
2380
2381static const struct v4l2_file_operations vpe_fops = {
2382        .owner          = THIS_MODULE,
2383        .open           = vpe_open,
2384        .release        = vpe_release,
2385        .poll           = v4l2_m2m_fop_poll,
2386        .unlocked_ioctl = video_ioctl2,
2387        .mmap           = v4l2_m2m_fop_mmap,
2388};
2389
2390static const struct video_device vpe_videodev = {
2391        .name           = VPE_MODULE_NAME,
2392        .fops           = &vpe_fops,
2393        .ioctl_ops      = &vpe_ioctl_ops,
2394        .minor          = -1,
2395        .release        = video_device_release_empty,
2396        .vfl_dir        = VFL_DIR_M2M,
2397        .device_caps    = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING,
2398};
2399
2400static const struct v4l2_m2m_ops m2m_ops = {
2401        .device_run     = device_run,
2402        .job_ready      = job_ready,
2403        .job_abort      = job_abort,
2404};
2405
2406static int vpe_runtime_get(struct platform_device *pdev)
2407{
2408        int r;
2409
2410        dev_dbg(&pdev->dev, "vpe_runtime_get\n");
2411
2412        r = pm_runtime_get_sync(&pdev->dev);
2413        WARN_ON(r < 0);
2414        return r < 0 ? r : 0;
2415}
2416
2417static void vpe_runtime_put(struct platform_device *pdev)
2418{
2419
2420        int r;
2421
2422        dev_dbg(&pdev->dev, "vpe_runtime_put\n");
2423
2424        r = pm_runtime_put_sync(&pdev->dev);
2425        WARN_ON(r < 0 && r != -ENOSYS);
2426}
2427
2428static void vpe_fw_cb(struct platform_device *pdev)
2429{
2430        struct vpe_dev *dev = platform_get_drvdata(pdev);
2431        struct video_device *vfd;
2432        int ret;
2433
2434        vfd = &dev->vfd;
2435        *vfd = vpe_videodev;
2436        vfd->lock = &dev->dev_mutex;
2437        vfd->v4l2_dev = &dev->v4l2_dev;
2438
2439        ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
2440        if (ret) {
2441                vpe_err(dev, "Failed to register video device\n");
2442
2443                vpe_set_clock_enable(dev, 0);
2444                vpe_runtime_put(pdev);
2445                pm_runtime_disable(&pdev->dev);
2446                v4l2_m2m_release(dev->m2m_dev);
2447                v4l2_device_unregister(&dev->v4l2_dev);
2448
2449                return;
2450        }
2451
2452        video_set_drvdata(vfd, dev);
2453        dev_info(dev->v4l2_dev.dev, "Device registered as /dev/video%d\n",
2454                vfd->num);
2455}
2456
2457static int vpe_probe(struct platform_device *pdev)
2458{
2459        struct vpe_dev *dev;
2460        int ret, irq, func;
2461
2462        dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
2463        if (!dev)
2464                return -ENOMEM;
2465
2466        spin_lock_init(&dev->lock);
2467
2468        ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
2469        if (ret)
2470                return ret;
2471
2472        atomic_set(&dev->num_instances, 0);
2473        mutex_init(&dev->dev_mutex);
2474
2475        dev->res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
2476                        "vpe_top");
2477        /*
2478         * HACK: we get resource info from device tree in the form of a list of
2479         * VPE sub blocks, the driver currently uses only the base of vpe_top
2480         * for register access, the driver should be changed later to access
2481         * registers based on the sub block base addresses
2482         */
2483        dev->base = devm_ioremap(&pdev->dev, dev->res->start, SZ_32K);
2484        if (!dev->base) {
2485                ret = -ENOMEM;
2486                goto v4l2_dev_unreg;
2487        }
2488
2489        irq = platform_get_irq(pdev, 0);
2490        ret = devm_request_irq(&pdev->dev, irq, vpe_irq, 0, VPE_MODULE_NAME,
2491                        dev);
2492        if (ret)
2493                goto v4l2_dev_unreg;
2494
2495        platform_set_drvdata(pdev, dev);
2496
2497        dev->m2m_dev = v4l2_m2m_init(&m2m_ops);
2498        if (IS_ERR(dev->m2m_dev)) {
2499                vpe_err(dev, "Failed to init mem2mem device\n");
2500                ret = PTR_ERR(dev->m2m_dev);
2501                goto v4l2_dev_unreg;
2502        }
2503
2504        pm_runtime_enable(&pdev->dev);
2505
2506        ret = vpe_runtime_get(pdev);
2507        if (ret)
2508                goto rel_m2m;
2509
2510        /* Perform clk enable followed by reset */
2511        vpe_set_clock_enable(dev, 1);
2512
2513        vpe_top_reset(dev);
2514
2515        func = read_field_reg(dev, VPE_PID, VPE_PID_FUNC_MASK,
2516                VPE_PID_FUNC_SHIFT);
2517        vpe_dbg(dev, "VPE PID function %x\n", func);
2518
2519        vpe_top_vpdma_reset(dev);
2520
2521        dev->sc = sc_create(pdev, "sc");
2522        if (IS_ERR(dev->sc)) {
2523                ret = PTR_ERR(dev->sc);
2524                goto runtime_put;
2525        }
2526
2527        dev->csc = csc_create(pdev, "csc");
2528        if (IS_ERR(dev->csc)) {
2529                ret = PTR_ERR(dev->csc);
2530                goto runtime_put;
2531        }
2532
2533        dev->vpdma = &dev->vpdma_data;
2534        ret = vpdma_create(pdev, dev->vpdma, vpe_fw_cb);
2535        if (ret)
2536                goto runtime_put;
2537
2538        return 0;
2539
2540runtime_put:
2541        vpe_runtime_put(pdev);
2542rel_m2m:
2543        pm_runtime_disable(&pdev->dev);
2544        v4l2_m2m_release(dev->m2m_dev);
2545v4l2_dev_unreg:
2546        v4l2_device_unregister(&dev->v4l2_dev);
2547
2548        return ret;
2549}
2550
2551static int vpe_remove(struct platform_device *pdev)
2552{
2553        struct vpe_dev *dev = platform_get_drvdata(pdev);
2554
2555        v4l2_info(&dev->v4l2_dev, "Removing " VPE_MODULE_NAME);
2556
2557        v4l2_m2m_release(dev->m2m_dev);
2558        video_unregister_device(&dev->vfd);
2559        v4l2_device_unregister(&dev->v4l2_dev);
2560
2561        vpe_set_clock_enable(dev, 0);
2562        vpe_runtime_put(pdev);
2563        pm_runtime_disable(&pdev->dev);
2564
2565        return 0;
2566}
2567
2568#if defined(CONFIG_OF)
2569static const struct of_device_id vpe_of_match[] = {
2570        {
2571                .compatible = "ti,vpe",
2572        },
2573        {},
2574};
2575MODULE_DEVICE_TABLE(of, vpe_of_match);
2576#endif
2577
2578static struct platform_driver vpe_pdrv = {
2579        .probe          = vpe_probe,
2580        .remove         = vpe_remove,
2581        .driver         = {
2582                .name   = VPE_MODULE_NAME,
2583                .of_match_table = of_match_ptr(vpe_of_match),
2584        },
2585};
2586
2587module_platform_driver(vpe_pdrv);
2588
2589MODULE_DESCRIPTION("TI VPE driver");
2590MODULE_AUTHOR("Dale Farnsworth, <dale@farnsworth.org>");
2591MODULE_LICENSE("GPL");
2592