media: coda: align internal mpeg4 framebuffers to 16x16 macroblocks
[platform/kernel/linux-rpi.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/platform_device.h>
20 #include <linux/reset.h>
21 #include <linux/slab.h>
22 #include <linux/videodev2.h>
23
24 #include <media/v4l2-common.h>
25 #include <media/v4l2-ctrls.h>
26 #include <media/v4l2-fh.h>
27 #include <media/v4l2-mem2mem.h>
28 #include <media/videobuf2-v4l2.h>
29 #include <media/videobuf2-dma-contig.h>
30 #include <media/videobuf2-vmalloc.h>
31
32 #include "coda.h"
33 #include "imx-vdoa.h"
34 #define CREATE_TRACE_POINTS
35 #include "trace.h"
36
37 #define CODA_PARA_BUF_SIZE      (10 * 1024)
38 #define CODA7_PS_BUF_SIZE       0x28000
39 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
40
41 #define CODA_DEFAULT_GAMMA      4096
42 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
43
44 static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
45
46 static inline int coda_is_initialized(struct coda_dev *dev)
47 {
48         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
49 }
50
51 static inline unsigned long coda_isbusy(struct coda_dev *dev)
52 {
53         return coda_read(dev, CODA_REG_BIT_BUSY);
54 }
55
56 static int coda_wait_timeout(struct coda_dev *dev)
57 {
58         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
59
60         while (coda_isbusy(dev)) {
61                 if (time_after(jiffies, timeout))
62                         return -ETIMEDOUT;
63         }
64         return 0;
65 }
66
67 static void coda_command_async(struct coda_ctx *ctx, int cmd)
68 {
69         struct coda_dev *dev = ctx->dev;
70
71         if (dev->devtype->product == CODA_960 ||
72             dev->devtype->product == CODA_7541) {
73                 /* Restore context related registers to CODA */
74                 coda_write(dev, ctx->bit_stream_param,
75                                 CODA_REG_BIT_BIT_STREAM_PARAM);
76                 coda_write(dev, ctx->frm_dis_flg,
77                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
78                 coda_write(dev, ctx->frame_mem_ctrl,
79                                 CODA_REG_BIT_FRAME_MEM_CTRL);
80                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
81         }
82
83         if (dev->devtype->product == CODA_960) {
84                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
85                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
86         }
87
88         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
89
90         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
91         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
92         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
93
94         trace_coda_bit_run(ctx, cmd);
95
96         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
97 }
98
99 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
100 {
101         struct coda_dev *dev = ctx->dev;
102         int ret;
103
104         coda_command_async(ctx, cmd);
105         ret = coda_wait_timeout(dev);
106         trace_coda_bit_done(ctx);
107
108         return ret;
109 }
110
111 int coda_hw_reset(struct coda_ctx *ctx)
112 {
113         struct coda_dev *dev = ctx->dev;
114         unsigned long timeout;
115         unsigned int idx;
116         int ret;
117
118         if (!dev->rstc)
119                 return -ENOENT;
120
121         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
122
123         if (dev->devtype->product == CODA_960) {
124                 timeout = jiffies + msecs_to_jiffies(100);
125                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
126                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
127                         if (time_after(jiffies, timeout))
128                                 return -ETIME;
129                         cpu_relax();
130                 }
131         }
132
133         ret = reset_control_reset(dev->rstc);
134         if (ret < 0)
135                 return ret;
136
137         if (dev->devtype->product == CODA_960)
138                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
139         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
140         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
141         ret = coda_wait_timeout(dev);
142         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
143
144         return ret;
145 }
146
147 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
148 {
149         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
150         struct coda_dev *dev = ctx->dev;
151         u32 rd_ptr;
152
153         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
154         kfifo->out = (kfifo->in & ~kfifo->mask) |
155                       (rd_ptr - ctx->bitstream.paddr);
156         if (kfifo->out > kfifo->in)
157                 kfifo->out -= kfifo->mask + 1;
158 }
159
160 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
161 {
162         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
163         struct coda_dev *dev = ctx->dev;
164         u32 rd_ptr, wr_ptr;
165
166         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
167         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
168         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
169         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
170 }
171
172 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
173 {
174         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
175         struct coda_dev *dev = ctx->dev;
176         u32 wr_ptr;
177
178         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
179         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
180 }
181
182 static int coda_bitstream_pad(struct coda_ctx *ctx, u32 size)
183 {
184         unsigned char *buf;
185         u32 n;
186
187         if (size < 6)
188                 size = 6;
189
190         buf = kmalloc(size, GFP_KERNEL);
191         if (!buf)
192                 return -ENOMEM;
193
194         coda_h264_filler_nal(size, buf);
195         n = kfifo_in(&ctx->bitstream_fifo, buf, size);
196         kfree(buf);
197
198         return (n < size) ? -ENOSPC : 0;
199 }
200
201 static int coda_bitstream_queue(struct coda_ctx *ctx,
202                                 struct vb2_v4l2_buffer *src_buf)
203 {
204         u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
205         u32 n;
206
207         n = kfifo_in(&ctx->bitstream_fifo,
208                         vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
209         if (n < src_size)
210                 return -ENOSPC;
211
212         src_buf->sequence = ctx->qsequence++;
213
214         return 0;
215 }
216
217 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
218                                      struct vb2_v4l2_buffer *src_buf)
219 {
220         unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
221         int ret;
222
223         if (coda_get_bitstream_payload(ctx) + payload + 512 >=
224             ctx->bitstream.size)
225                 return false;
226
227         if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
228                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
229                 return true;
230         }
231
232         /* Add zero padding before the first H.264 buffer, if it is too small */
233         if (ctx->qsequence == 0 && payload < 512 &&
234             ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
235                 coda_bitstream_pad(ctx, 512 - payload);
236
237         ret = coda_bitstream_queue(ctx, src_buf);
238         if (ret < 0) {
239                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
240                 return false;
241         }
242         /* Sync read pointer to device */
243         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
244                 coda_kfifo_sync_to_device_write(ctx);
245
246         ctx->hold = false;
247
248         return true;
249 }
250
251 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list)
252 {
253         struct vb2_v4l2_buffer *src_buf;
254         struct coda_buffer_meta *meta;
255         unsigned long flags;
256         u32 start;
257
258         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
259                 return;
260
261         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
262                 /*
263                  * Only queue a single JPEG into the bitstream buffer, except
264                  * to increase payload over 512 bytes or if in hold state.
265                  */
266                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
267                     (coda_get_bitstream_payload(ctx) >= 512) && !ctx->hold)
268                         break;
269
270                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
271
272                 /* Drop frames that do not start/end with a SOI/EOI markers */
273                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
274                     !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) {
275                         v4l2_err(&ctx->dev->v4l2_dev,
276                                  "dropping invalid JPEG frame %d\n",
277                                  ctx->qsequence);
278                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
279                         if (buffer_list) {
280                                 struct v4l2_m2m_buffer *m2m_buf;
281
282                                 m2m_buf = container_of(src_buf,
283                                                        struct v4l2_m2m_buffer,
284                                                        vb);
285                                 list_add_tail(&m2m_buf->list, buffer_list);
286                         } else {
287                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
288                         }
289                         continue;
290                 }
291
292                 /* Dump empty buffers */
293                 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
294                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
295                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
296                         continue;
297                 }
298
299                 /* Buffer start position */
300                 start = ctx->bitstream_fifo.kfifo.in &
301                         ctx->bitstream_fifo.kfifo.mask;
302
303                 if (coda_bitstream_try_queue(ctx, src_buf)) {
304                         /*
305                          * Source buffer is queued in the bitstream ringbuffer;
306                          * queue the timestamp and mark source buffer as done
307                          */
308                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
309
310                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
311                         if (meta) {
312                                 meta->sequence = src_buf->sequence;
313                                 meta->timecode = src_buf->timecode;
314                                 meta->timestamp = src_buf->vb2_buf.timestamp;
315                                 meta->start = start;
316                                 meta->end = ctx->bitstream_fifo.kfifo.in &
317                                             ctx->bitstream_fifo.kfifo.mask;
318                                 spin_lock_irqsave(&ctx->buffer_meta_lock,
319                                                   flags);
320                                 list_add_tail(&meta->list,
321                                               &ctx->buffer_meta_list);
322                                 ctx->num_metas++;
323                                 spin_unlock_irqrestore(&ctx->buffer_meta_lock,
324                                                        flags);
325
326                                 trace_coda_bit_queue(ctx, src_buf, meta);
327                         }
328
329                         if (buffer_list) {
330                                 struct v4l2_m2m_buffer *m2m_buf;
331
332                                 m2m_buf = container_of(src_buf,
333                                                        struct v4l2_m2m_buffer,
334                                                        vb);
335                                 list_add_tail(&m2m_buf->list, buffer_list);
336                         } else {
337                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
338                         }
339                 } else {
340                         break;
341                 }
342         }
343 }
344
345 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
346 {
347         struct coda_dev *dev = ctx->dev;
348
349         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
350
351         /* If this context is currently running, update the hardware flag */
352         if ((dev->devtype->product == CODA_960) &&
353             coda_isbusy(dev) &&
354             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
355                 coda_write(dev, ctx->bit_stream_param,
356                            CODA_REG_BIT_BIT_STREAM_PARAM);
357         }
358 }
359
360 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
361 {
362         struct coda_dev *dev = ctx->dev;
363         u32 *p = ctx->parabuf.vaddr;
364
365         if (dev->devtype->product == CODA_DX6)
366                 p[index] = value;
367         else
368                 p[index ^ 1] = value;
369 }
370
371 static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
372                                          struct coda_aux_buf *buf, size_t size,
373                                          const char *name)
374 {
375         return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
376 }
377
378
379 static void coda_free_framebuffers(struct coda_ctx *ctx)
380 {
381         int i;
382
383         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
384                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
385 }
386
387 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
388                                    struct coda_q_data *q_data, u32 fourcc)
389 {
390         struct coda_dev *dev = ctx->dev;
391         int width, height;
392         int ysize;
393         int ret;
394         int i;
395
396         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
397             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 ||
398             ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) {
399                 width = round_up(q_data->width, 16);
400                 height = round_up(q_data->height, 16);
401         } else {
402                 width = round_up(q_data->width, 8);
403                 height = q_data->height;
404         }
405         ysize = width * height;
406
407         /* Allocate frame buffers */
408         for (i = 0; i < ctx->num_internal_frames; i++) {
409                 size_t size;
410                 char *name;
411
412                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
413                         size = round_up(ysize, 4096) + ysize / 2;
414                 else
415                         size = ysize + ysize / 2;
416                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
417                     dev->devtype->product != CODA_DX6)
418                         size += ysize / 4;
419                 name = kasprintf(GFP_KERNEL, "fb%d", i);
420                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
421                                              size, name);
422                 kfree(name);
423                 if (ret < 0) {
424                         coda_free_framebuffers(ctx);
425                         return ret;
426                 }
427         }
428
429         /* Register frame buffers in the parameter buffer */
430         for (i = 0; i < ctx->num_internal_frames; i++) {
431                 u32 y, cb, cr, mvcol;
432
433                 /* Start addresses of Y, Cb, Cr planes */
434                 y = ctx->internal_frames[i].paddr;
435                 cb = y + ysize;
436                 cr = y + ysize + ysize/4;
437                 mvcol = y + ysize + ysize/4 + ysize/4;
438                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
439                         cb = round_up(cb, 4096);
440                         mvcol = cb + ysize/2;
441                         cr = 0;
442                         /* Packed 20-bit MSB of base addresses */
443                         /* YYYYYCCC, CCyyyyyc, cccc.... */
444                         y = (y & 0xfffff000) | cb >> 20;
445                         cb = (cb & 0x000ff000) << 12;
446                 }
447                 coda_parabuf_write(ctx, i * 3 + 0, y);
448                 coda_parabuf_write(ctx, i * 3 + 1, cb);
449                 coda_parabuf_write(ctx, i * 3 + 2, cr);
450
451                 /* mvcol buffer for h.264 */
452                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
453                     dev->devtype->product != CODA_DX6)
454                         coda_parabuf_write(ctx, 96 + i, mvcol);
455         }
456
457         /* mvcol buffer for mpeg4 */
458         if ((dev->devtype->product != CODA_DX6) &&
459             (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4))
460                 coda_parabuf_write(ctx, 97, ctx->internal_frames[0].paddr +
461                                             ysize + ysize/4 + ysize/4);
462
463         return 0;
464 }
465
466 static void coda_free_context_buffers(struct coda_ctx *ctx)
467 {
468         struct coda_dev *dev = ctx->dev;
469
470         coda_free_aux_buf(dev, &ctx->slicebuf);
471         coda_free_aux_buf(dev, &ctx->psbuf);
472         if (dev->devtype->product != CODA_DX6)
473                 coda_free_aux_buf(dev, &ctx->workbuf);
474         coda_free_aux_buf(dev, &ctx->parabuf);
475 }
476
477 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
478                                       struct coda_q_data *q_data)
479 {
480         struct coda_dev *dev = ctx->dev;
481         size_t size;
482         int ret;
483
484         if (!ctx->parabuf.vaddr) {
485                 ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
486                                              CODA_PARA_BUF_SIZE, "parabuf");
487                 if (ret < 0)
488                         return ret;
489         }
490
491         if (dev->devtype->product == CODA_DX6)
492                 return 0;
493
494         if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
495                 /* worst case slice size */
496                 size = (DIV_ROUND_UP(q_data->width, 16) *
497                         DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
498                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
499                                              "slicebuf");
500                 if (ret < 0)
501                         goto err;
502         }
503
504         if (!ctx->psbuf.vaddr && dev->devtype->product == CODA_7541) {
505                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
506                                              CODA7_PS_BUF_SIZE, "psbuf");
507                 if (ret < 0)
508                         goto err;
509         }
510
511         if (!ctx->workbuf.vaddr) {
512                 size = dev->devtype->workbuf_size;
513                 if (dev->devtype->product == CODA_960 &&
514                     q_data->fourcc == V4L2_PIX_FMT_H264)
515                         size += CODA9_PS_SAVE_SIZE;
516                 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
517                                              "workbuf");
518                 if (ret < 0)
519                         goto err;
520         }
521
522         return 0;
523
524 err:
525         coda_free_context_buffers(ctx);
526         return ret;
527 }
528
529 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
530                               int header_code, u8 *header, int *size)
531 {
532         struct vb2_buffer *vb = &buf->vb2_buf;
533         struct coda_dev *dev = ctx->dev;
534         size_t bufsize;
535         int ret;
536         int i;
537
538         if (dev->devtype->product == CODA_960)
539                 memset(vb2_plane_vaddr(vb, 0), 0, 64);
540
541         coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
542                    CODA_CMD_ENC_HEADER_BB_START);
543         bufsize = vb2_plane_size(vb, 0);
544         if (dev->devtype->product == CODA_960)
545                 bufsize /= 1024;
546         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
547         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
548         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
549         if (ret < 0) {
550                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
551                 return ret;
552         }
553
554         if (dev->devtype->product == CODA_960) {
555                 for (i = 63; i > 0; i--)
556                         if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
557                                 break;
558                 *size = i + 1;
559         } else {
560                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
561                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
562         }
563         memcpy(header, vb2_plane_vaddr(vb, 0), *size);
564
565         return 0;
566 }
567
568 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
569 {
570         phys_addr_t ret;
571
572         size = round_up(size, 1024);
573         if (size > iram->remaining)
574                 return 0;
575         iram->remaining -= size;
576
577         ret = iram->next_paddr;
578         iram->next_paddr += size;
579
580         return ret;
581 }
582
583 static void coda_setup_iram(struct coda_ctx *ctx)
584 {
585         struct coda_iram_info *iram_info = &ctx->iram_info;
586         struct coda_dev *dev = ctx->dev;
587         int w64, w128;
588         int mb_width;
589         int dbk_bits;
590         int bit_bits;
591         int ip_bits;
592
593         memset(iram_info, 0, sizeof(*iram_info));
594         iram_info->next_paddr = dev->iram.paddr;
595         iram_info->remaining = dev->iram.size;
596
597         if (!dev->iram.vaddr)
598                 return;
599
600         switch (dev->devtype->product) {
601         case CODA_7541:
602                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
603                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
604                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
605                 break;
606         case CODA_960:
607                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
608                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
609                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
610                 break;
611         default: /* CODA_DX6 */
612                 return;
613         }
614
615         if (ctx->inst_type == CODA_INST_ENCODER) {
616                 struct coda_q_data *q_data_src;
617
618                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
619                 mb_width = DIV_ROUND_UP(q_data_src->width, 16);
620                 w128 = mb_width * 128;
621                 w64 = mb_width * 64;
622
623                 /* Prioritize in case IRAM is too small for everything */
624                 if (dev->devtype->product == CODA_7541) {
625                         iram_info->search_ram_size = round_up(mb_width * 16 *
626                                                               36 + 2048, 1024);
627                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
628                                                 iram_info->search_ram_size);
629                         if (!iram_info->search_ram_paddr) {
630                                 pr_err("IRAM is smaller than the search ram size\n");
631                                 goto out;
632                         }
633                         iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE |
634                                                    CODA7_USE_ME_ENABLE;
635                 }
636
637                 /* Only H.264BP and H.263P3 are considered */
638                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
639                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
640                 if (!iram_info->buf_dbk_c_use)
641                         goto out;
642                 iram_info->axi_sram_use |= dbk_bits;
643
644                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
645                 if (!iram_info->buf_bit_use)
646                         goto out;
647                 iram_info->axi_sram_use |= bit_bits;
648
649                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
650                 if (!iram_info->buf_ip_ac_dc_use)
651                         goto out;
652                 iram_info->axi_sram_use |= ip_bits;
653
654                 /* OVL and BTP disabled for encoder */
655         } else if (ctx->inst_type == CODA_INST_DECODER) {
656                 struct coda_q_data *q_data_dst;
657
658                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
659                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
660                 w128 = mb_width * 128;
661
662                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
663                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
664                 if (!iram_info->buf_dbk_c_use)
665                         goto out;
666                 iram_info->axi_sram_use |= dbk_bits;
667
668                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
669                 if (!iram_info->buf_bit_use)
670                         goto out;
671                 iram_info->axi_sram_use |= bit_bits;
672
673                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
674                 if (!iram_info->buf_ip_ac_dc_use)
675                         goto out;
676                 iram_info->axi_sram_use |= ip_bits;
677
678                 /* OVL and BTP unused as there is no VC1 support yet */
679         }
680
681 out:
682         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
683                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
684                          "IRAM smaller than needed\n");
685
686         if (dev->devtype->product == CODA_7541) {
687                 /* TODO - Enabling these causes picture errors on CODA7541 */
688                 if (ctx->inst_type == CODA_INST_DECODER) {
689                         /* fw 1.4.50 */
690                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
691                                                      CODA7_USE_IP_ENABLE);
692                 } else {
693                         /* fw 13.4.29 */
694                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
695                                                      CODA7_USE_HOST_DBK_ENABLE |
696                                                      CODA7_USE_IP_ENABLE |
697                                                      CODA7_USE_DBK_ENABLE);
698                 }
699         }
700 }
701
702 static u32 coda_supported_firmwares[] = {
703         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
704         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
705         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
706 };
707
708 static bool coda_firmware_supported(u32 vernum)
709 {
710         int i;
711
712         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
713                 if (vernum == coda_supported_firmwares[i])
714                         return true;
715         return false;
716 }
717
718 int coda_check_firmware(struct coda_dev *dev)
719 {
720         u16 product, major, minor, release;
721         u32 data;
722         int ret;
723
724         ret = clk_prepare_enable(dev->clk_per);
725         if (ret)
726                 goto err_clk_per;
727
728         ret = clk_prepare_enable(dev->clk_ahb);
729         if (ret)
730                 goto err_clk_ahb;
731
732         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
733         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
734         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
735         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
736         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
737         if (coda_wait_timeout(dev)) {
738                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
739                 ret = -EIO;
740                 goto err_run_cmd;
741         }
742
743         if (dev->devtype->product == CODA_960) {
744                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
745                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
746                           data);
747         }
748
749         /* Check we are compatible with the loaded firmware */
750         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
751         product = CODA_FIRMWARE_PRODUCT(data);
752         major = CODA_FIRMWARE_MAJOR(data);
753         minor = CODA_FIRMWARE_MINOR(data);
754         release = CODA_FIRMWARE_RELEASE(data);
755
756         clk_disable_unprepare(dev->clk_per);
757         clk_disable_unprepare(dev->clk_ahb);
758
759         if (product != dev->devtype->product) {
760                 v4l2_err(&dev->v4l2_dev,
761                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
762                          coda_product_name(dev->devtype->product),
763                          coda_product_name(product), major, minor, release);
764                 return -EINVAL;
765         }
766
767         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
768                   coda_product_name(product));
769
770         if (coda_firmware_supported(data)) {
771                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
772                           major, minor, release);
773         } else {
774                 v4l2_warn(&dev->v4l2_dev,
775                           "Unsupported firmware version: %u.%u.%u\n",
776                           major, minor, release);
777         }
778
779         return 0;
780
781 err_run_cmd:
782         clk_disable_unprepare(dev->clk_ahb);
783 err_clk_ahb:
784         clk_disable_unprepare(dev->clk_per);
785 err_clk_per:
786         return ret;
787 }
788
789 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
790 {
791         u32 cache_size, cache_config;
792
793         if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
794                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
795                 cache_size = 0x20262024;
796                 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
797         } else {
798                 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
799                 cache_size = 0x02440243;
800                 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
801         }
802         coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
803         if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) {
804                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
805                                 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
806                                 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
807         } else {
808                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
809                                 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
810                                 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
811         }
812         coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
813 }
814
815 /*
816  * Encoder context operations
817  */
818
819 static int coda_encoder_reqbufs(struct coda_ctx *ctx,
820                                 struct v4l2_requestbuffers *rb)
821 {
822         struct coda_q_data *q_data_src;
823         int ret;
824
825         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
826                 return 0;
827
828         if (rb->count) {
829                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
830                 ret = coda_alloc_context_buffers(ctx, q_data_src);
831                 if (ret < 0)
832                         return ret;
833         } else {
834                 coda_free_context_buffers(ctx);
835         }
836
837         return 0;
838 }
839
840 static int coda_start_encoding(struct coda_ctx *ctx)
841 {
842         struct coda_dev *dev = ctx->dev;
843         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
844         struct coda_q_data *q_data_src, *q_data_dst;
845         u32 bitstream_buf, bitstream_size;
846         struct vb2_v4l2_buffer *buf;
847         int gamma, ret, value;
848         u32 dst_fourcc;
849         int num_fb;
850         u32 stride;
851
852         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
853         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
854         dst_fourcc = q_data_dst->fourcc;
855
856         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
857         bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
858         bitstream_size = q_data_dst->sizeimage;
859
860         if (!coda_is_initialized(dev)) {
861                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
862                 return -EFAULT;
863         }
864
865         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
866                 if (!ctx->params.jpeg_qmat_tab[0])
867                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
868                 if (!ctx->params.jpeg_qmat_tab[1])
869                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
870                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
871         }
872
873         mutex_lock(&dev->coda_mutex);
874
875         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
876         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
877         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
878         switch (dev->devtype->product) {
879         case CODA_DX6:
880                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
881                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
882                 break;
883         case CODA_960:
884                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
885                 /* fallthrough */
886         case CODA_7541:
887                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
888                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
889                 break;
890         }
891
892         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
893                                  CODA9_FRAME_TILED2LINEAR);
894         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
895                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
896         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
897                 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
898         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
899
900         if (dev->devtype->product == CODA_DX6) {
901                 /* Configure the coda */
902                 coda_write(dev, dev->iram.paddr,
903                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
904         }
905
906         /* Could set rotation here if needed */
907         value = 0;
908         switch (dev->devtype->product) {
909         case CODA_DX6:
910                 value = (q_data_src->width & CODADX6_PICWIDTH_MASK)
911                         << CODADX6_PICWIDTH_OFFSET;
912                 value |= (q_data_src->height & CODADX6_PICHEIGHT_MASK)
913                          << CODA_PICHEIGHT_OFFSET;
914                 break;
915         case CODA_7541:
916                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
917                         value = (round_up(q_data_src->width, 16) &
918                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
919                         value |= (round_up(q_data_src->height, 16) &
920                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
921                         break;
922                 }
923                 /* fallthrough */
924         case CODA_960:
925                 value = (q_data_src->width & CODA7_PICWIDTH_MASK)
926                         << CODA7_PICWIDTH_OFFSET;
927                 value |= (q_data_src->height & CODA7_PICHEIGHT_MASK)
928                          << CODA_PICHEIGHT_OFFSET;
929         }
930         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
931         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
932                 ctx->params.framerate = 0;
933         coda_write(dev, ctx->params.framerate,
934                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
935
936         ctx->params.codec_mode = ctx->codec->mode;
937         switch (dst_fourcc) {
938         case V4L2_PIX_FMT_MPEG4:
939                 if (dev->devtype->product == CODA_960)
940                         coda_write(dev, CODA9_STD_MPEG4,
941                                    CODA_CMD_ENC_SEQ_COD_STD);
942                 else
943                         coda_write(dev, CODA_STD_MPEG4,
944                                    CODA_CMD_ENC_SEQ_COD_STD);
945                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
946                 break;
947         case V4L2_PIX_FMT_H264:
948                 if (dev->devtype->product == CODA_960)
949                         coda_write(dev, CODA9_STD_H264,
950                                    CODA_CMD_ENC_SEQ_COD_STD);
951                 else
952                         coda_write(dev, CODA_STD_H264,
953                                    CODA_CMD_ENC_SEQ_COD_STD);
954                 if (ctx->params.h264_deblk_enabled) {
955                         value = ((ctx->params.h264_deblk_alpha &
956                                   CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
957                                  CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
958                                 ((ctx->params.h264_deblk_beta &
959                                   CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
960                                  CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
961                 } else {
962                         value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET;
963                 }
964                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
965                 break;
966         case V4L2_PIX_FMT_JPEG:
967                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
968                 coda_write(dev, ctx->params.jpeg_restart_interval,
969                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
970                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
971                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
972                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
973
974                 coda_jpeg_write_tables(ctx);
975                 break;
976         default:
977                 v4l2_err(v4l2_dev,
978                          "dst format (0x%08x) invalid.\n", dst_fourcc);
979                 ret = -EINVAL;
980                 goto out;
981         }
982
983         /*
984          * slice mode and GOP size registers are used for thumb size/offset
985          * in JPEG mode
986          */
987         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
988                 switch (ctx->params.slice_mode) {
989                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
990                         value = 0;
991                         break;
992                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
993                         value  = (ctx->params.slice_max_mb &
994                                   CODA_SLICING_SIZE_MASK)
995                                  << CODA_SLICING_SIZE_OFFSET;
996                         value |= (1 & CODA_SLICING_UNIT_MASK)
997                                  << CODA_SLICING_UNIT_OFFSET;
998                         value |=  1 & CODA_SLICING_MODE_MASK;
999                         break;
1000                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
1001                         value  = (ctx->params.slice_max_bits &
1002                                   CODA_SLICING_SIZE_MASK)
1003                                  << CODA_SLICING_SIZE_OFFSET;
1004                         value |= (0 & CODA_SLICING_UNIT_MASK)
1005                                  << CODA_SLICING_UNIT_OFFSET;
1006                         value |=  1 & CODA_SLICING_MODE_MASK;
1007                         break;
1008                 }
1009                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
1010                 value = ctx->params.gop_size;
1011                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
1012         }
1013
1014         if (ctx->params.bitrate) {
1015                 /* Rate control enabled */
1016                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
1017                         << CODA_RATECONTROL_BITRATE_OFFSET;
1018                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
1019                 value |= (ctx->params.vbv_delay &
1020                           CODA_RATECONTROL_INITIALDELAY_MASK)
1021                          << CODA_RATECONTROL_INITIALDELAY_OFFSET;
1022                 if (dev->devtype->product == CODA_960)
1023                         value |= BIT(31); /* disable autoskip */
1024         } else {
1025                 value = 0;
1026         }
1027         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
1028
1029         coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
1030         coda_write(dev, ctx->params.intra_refresh,
1031                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
1032
1033         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
1034         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
1035
1036
1037         value = 0;
1038         if (dev->devtype->product == CODA_960)
1039                 gamma = CODA9_DEFAULT_GAMMA;
1040         else
1041                 gamma = CODA_DEFAULT_GAMMA;
1042         if (gamma > 0) {
1043                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1044                            CODA_CMD_ENC_SEQ_RC_GAMMA);
1045         }
1046
1047         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1048                 coda_write(dev,
1049                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1050                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1051                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1052         }
1053         if (dev->devtype->product == CODA_960) {
1054                 if (ctx->params.h264_max_qp)
1055                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1056                 if (CODA_DEFAULT_GAMMA > 0)
1057                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1058         } else {
1059                 if (CODA_DEFAULT_GAMMA > 0) {
1060                         if (dev->devtype->product == CODA_DX6)
1061                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1062                         else
1063                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1064                 }
1065                 if (ctx->params.h264_min_qp)
1066                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1067                 if (ctx->params.h264_max_qp)
1068                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1069         }
1070         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1071
1072         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1073
1074         coda_setup_iram(ctx);
1075
1076         if (dst_fourcc == V4L2_PIX_FMT_H264) {
1077                 switch (dev->devtype->product) {
1078                 case CODA_DX6:
1079                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1080                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1081                         break;
1082                 case CODA_7541:
1083                         coda_write(dev, ctx->iram_info.search_ram_paddr,
1084                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1085                         coda_write(dev, ctx->iram_info.search_ram_size,
1086                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1087                         break;
1088                 case CODA_960:
1089                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1090                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1091                 }
1092         }
1093
1094         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1095         if (ret < 0) {
1096                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1097                 goto out;
1098         }
1099
1100         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1101                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1102                 ret = -EFAULT;
1103                 goto out;
1104         }
1105         ctx->initialized = 1;
1106
1107         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1108                 if (dev->devtype->product == CODA_960)
1109                         ctx->num_internal_frames = 4;
1110                 else
1111                         ctx->num_internal_frames = 2;
1112                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1113                 if (ret < 0) {
1114                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1115                         goto out;
1116                 }
1117                 num_fb = 2;
1118                 stride = q_data_src->bytesperline;
1119         } else {
1120                 ctx->num_internal_frames = 0;
1121                 num_fb = 0;
1122                 stride = 0;
1123         }
1124         coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1125         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1126
1127         if (dev->devtype->product == CODA_7541) {
1128                 coda_write(dev, q_data_src->bytesperline,
1129                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1130         }
1131         if (dev->devtype->product != CODA_DX6) {
1132                 coda_write(dev, ctx->iram_info.buf_bit_use,
1133                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1134                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1135                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1136                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1137                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1138                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1139                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1140                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1141                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1142                 if (dev->devtype->product == CODA_960) {
1143                         coda_write(dev, ctx->iram_info.buf_btp_use,
1144                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1145
1146                         coda9_set_frame_cache(ctx, q_data_src->fourcc);
1147
1148                         /* FIXME */
1149                         coda_write(dev, ctx->internal_frames[2].paddr,
1150                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1151                         coda_write(dev, ctx->internal_frames[3].paddr,
1152                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1153                 }
1154         }
1155
1156         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1157         if (ret < 0) {
1158                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1159                 goto out;
1160         }
1161
1162         /* Save stream headers */
1163         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1164         switch (dst_fourcc) {
1165         case V4L2_PIX_FMT_H264:
1166                 /*
1167                  * Get SPS in the first frame and copy it to an
1168                  * intermediate buffer.
1169                  */
1170                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1171                                          &ctx->vpu_header[0][0],
1172                                          &ctx->vpu_header_size[0]);
1173                 if (ret < 0)
1174                         goto out;
1175
1176                 /*
1177                  * Get PPS in the first frame and copy it to an
1178                  * intermediate buffer.
1179                  */
1180                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1181                                          &ctx->vpu_header[1][0],
1182                                          &ctx->vpu_header_size[1]);
1183                 if (ret < 0)
1184                         goto out;
1185
1186                 /*
1187                  * Length of H.264 headers is variable and thus it might not be
1188                  * aligned for the coda to append the encoded frame. In that is
1189                  * the case a filler NAL must be added to header 2.
1190                  */
1191                 ctx->vpu_header_size[2] = coda_h264_padding(
1192                                         (ctx->vpu_header_size[0] +
1193                                          ctx->vpu_header_size[1]),
1194                                          ctx->vpu_header[2]);
1195                 break;
1196         case V4L2_PIX_FMT_MPEG4:
1197                 /*
1198                  * Get VOS in the first frame and copy it to an
1199                  * intermediate buffer
1200                  */
1201                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1202                                          &ctx->vpu_header[0][0],
1203                                          &ctx->vpu_header_size[0]);
1204                 if (ret < 0)
1205                         goto out;
1206
1207                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1208                                          &ctx->vpu_header[1][0],
1209                                          &ctx->vpu_header_size[1]);
1210                 if (ret < 0)
1211                         goto out;
1212
1213                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1214                                          &ctx->vpu_header[2][0],
1215                                          &ctx->vpu_header_size[2]);
1216                 if (ret < 0)
1217                         goto out;
1218                 break;
1219         default:
1220                 /* No more formats need to save headers at the moment */
1221                 break;
1222         }
1223
1224 out:
1225         mutex_unlock(&dev->coda_mutex);
1226         return ret;
1227 }
1228
1229 static int coda_prepare_encode(struct coda_ctx *ctx)
1230 {
1231         struct coda_q_data *q_data_src, *q_data_dst;
1232         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1233         struct coda_dev *dev = ctx->dev;
1234         int force_ipicture;
1235         int quant_param = 0;
1236         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1237         u32 rot_mode = 0;
1238         u32 dst_fourcc;
1239         u32 reg;
1240
1241         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1242         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1243         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1244         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1245         dst_fourcc = q_data_dst->fourcc;
1246
1247         src_buf->sequence = ctx->osequence;
1248         dst_buf->sequence = ctx->osequence;
1249         ctx->osequence++;
1250
1251         force_ipicture = ctx->params.force_ipicture;
1252         if (force_ipicture)
1253                 ctx->params.force_ipicture = false;
1254         else if (ctx->params.gop_size != 0 &&
1255                  (src_buf->sequence % ctx->params.gop_size) == 0)
1256                 force_ipicture = 1;
1257
1258         /*
1259          * Workaround coda firmware BUG that only marks the first
1260          * frame as IDR. This is a problem for some decoders that can't
1261          * recover when a frame is lost.
1262          */
1263         if (!force_ipicture) {
1264                 src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1265                 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1266         } else {
1267                 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1268                 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1269         }
1270
1271         if (dev->devtype->product == CODA_960)
1272                 coda_set_gdi_regs(ctx);
1273
1274         /*
1275          * Copy headers in front of the first frame and forced I frames for
1276          * H.264 only. In MPEG4 they are already copied by the CODA.
1277          */
1278         if (src_buf->sequence == 0 || force_ipicture) {
1279                 pic_stream_buffer_addr =
1280                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1281                         ctx->vpu_header_size[0] +
1282                         ctx->vpu_header_size[1] +
1283                         ctx->vpu_header_size[2];
1284                 pic_stream_buffer_size = q_data_dst->sizeimage -
1285                         ctx->vpu_header_size[0] -
1286                         ctx->vpu_header_size[1] -
1287                         ctx->vpu_header_size[2];
1288                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1289                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1290                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1291                         + ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1292                         ctx->vpu_header_size[1]);
1293                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1294                         + ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1295                         &ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1296         } else {
1297                 pic_stream_buffer_addr =
1298                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1299                 pic_stream_buffer_size = q_data_dst->sizeimage;
1300         }
1301
1302         if (force_ipicture) {
1303                 switch (dst_fourcc) {
1304                 case V4L2_PIX_FMT_H264:
1305                         quant_param = ctx->params.h264_intra_qp;
1306                         break;
1307                 case V4L2_PIX_FMT_MPEG4:
1308                         quant_param = ctx->params.mpeg4_intra_qp;
1309                         break;
1310                 case V4L2_PIX_FMT_JPEG:
1311                         quant_param = 30;
1312                         break;
1313                 default:
1314                         v4l2_warn(&ctx->dev->v4l2_dev,
1315                                 "cannot set intra qp, fmt not supported\n");
1316                         break;
1317                 }
1318         } else {
1319                 switch (dst_fourcc) {
1320                 case V4L2_PIX_FMT_H264:
1321                         quant_param = ctx->params.h264_inter_qp;
1322                         break;
1323                 case V4L2_PIX_FMT_MPEG4:
1324                         quant_param = ctx->params.mpeg4_inter_qp;
1325                         break;
1326                 default:
1327                         v4l2_warn(&ctx->dev->v4l2_dev,
1328                                 "cannot set inter qp, fmt not supported\n");
1329                         break;
1330                 }
1331         }
1332
1333         /* submit */
1334         if (ctx->params.rot_mode)
1335                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1336         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1337         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1338
1339         if (dev->devtype->product == CODA_960) {
1340                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1341                 coda_write(dev, q_data_src->width, CODA9_CMD_ENC_PIC_SRC_STRIDE);
1342                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1343
1344                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1345         } else {
1346                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1347         }
1348         coda_write_base(ctx, q_data_src, src_buf, reg);
1349
1350         coda_write(dev, force_ipicture << 1 & 0x2,
1351                    CODA_CMD_ENC_PIC_OPTION);
1352
1353         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1354         coda_write(dev, pic_stream_buffer_size / 1024,
1355                    CODA_CMD_ENC_PIC_BB_SIZE);
1356
1357         if (!ctx->streamon_out) {
1358                 /* After streamoff on the output side, set stream end flag */
1359                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1360                 coda_write(dev, ctx->bit_stream_param,
1361                            CODA_REG_BIT_BIT_STREAM_PARAM);
1362         }
1363
1364         if (dev->devtype->product != CODA_DX6)
1365                 coda_write(dev, ctx->iram_info.axi_sram_use,
1366                                 CODA7_REG_BIT_AXI_SRAM_USE);
1367
1368         trace_coda_enc_pic_run(ctx, src_buf);
1369
1370         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1371
1372         return 0;
1373 }
1374
1375 static void coda_finish_encode(struct coda_ctx *ctx)
1376 {
1377         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1378         struct coda_dev *dev = ctx->dev;
1379         u32 wr_ptr, start_ptr;
1380
1381         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1382         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1383
1384         trace_coda_enc_pic_done(ctx, dst_buf);
1385
1386         /* Get results from the coda */
1387         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1388         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1389
1390         /* Calculate bytesused field */
1391         if (dst_buf->sequence == 0 ||
1392             src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1393                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1394                                         ctx->vpu_header_size[0] +
1395                                         ctx->vpu_header_size[1] +
1396                                         ctx->vpu_header_size[2]);
1397         } else {
1398                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1399         }
1400
1401         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1402                  wr_ptr - start_ptr);
1403
1404         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1405         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1406
1407         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1408                 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1409                 dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1410         } else {
1411                 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1412                 dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1413         }
1414
1415         dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp;
1416         dst_buf->field = src_buf->field;
1417         dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1418         dst_buf->flags |=
1419                 src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1420         dst_buf->timecode = src_buf->timecode;
1421
1422         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1423
1424         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1425         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1426
1427         ctx->gopcounter--;
1428         if (ctx->gopcounter < 0)
1429                 ctx->gopcounter = ctx->params.gop_size - 1;
1430
1431         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1432                 "job finished: encoding frame (%d) (%s)\n",
1433                 dst_buf->sequence,
1434                 (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1435                 "KEYFRAME" : "PFRAME");
1436 }
1437
1438 static void coda_seq_end_work(struct work_struct *work)
1439 {
1440         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1441         struct coda_dev *dev = ctx->dev;
1442
1443         mutex_lock(&ctx->buffer_mutex);
1444         mutex_lock(&dev->coda_mutex);
1445
1446         if (ctx->initialized == 0)
1447                 goto out;
1448
1449         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1450                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1451                  __func__);
1452         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1453                 v4l2_err(&dev->v4l2_dev,
1454                          "CODA_COMMAND_SEQ_END failed\n");
1455         }
1456
1457         /*
1458          * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1459          * from the output stream after the h.264 decoder has run. Resetting the
1460          * hardware after the decoder has finished seems to help.
1461          */
1462         if (dev->devtype->product == CODA_960)
1463                 coda_hw_reset(ctx);
1464
1465         kfifo_init(&ctx->bitstream_fifo,
1466                 ctx->bitstream.vaddr, ctx->bitstream.size);
1467
1468         coda_free_framebuffers(ctx);
1469
1470         ctx->initialized = 0;
1471
1472 out:
1473         mutex_unlock(&dev->coda_mutex);
1474         mutex_unlock(&ctx->buffer_mutex);
1475 }
1476
1477 static void coda_bit_release(struct coda_ctx *ctx)
1478 {
1479         mutex_lock(&ctx->buffer_mutex);
1480         coda_free_framebuffers(ctx);
1481         coda_free_context_buffers(ctx);
1482         coda_free_bitstream_buffer(ctx);
1483         mutex_unlock(&ctx->buffer_mutex);
1484 }
1485
1486 const struct coda_context_ops coda_bit_encode_ops = {
1487         .queue_init = coda_encoder_queue_init,
1488         .reqbufs = coda_encoder_reqbufs,
1489         .start_streaming = coda_start_encoding,
1490         .prepare_run = coda_prepare_encode,
1491         .finish_run = coda_finish_encode,
1492         .seq_end_work = coda_seq_end_work,
1493         .release = coda_bit_release,
1494 };
1495
1496 /*
1497  * Decoder context operations
1498  */
1499
1500 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1501                                        struct coda_q_data *q_data)
1502 {
1503         if (ctx->bitstream.vaddr)
1504                 return 0;
1505
1506         ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1507         ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
1508                                             ctx->bitstream.size,
1509                                             &ctx->bitstream.paddr, GFP_KERNEL);
1510         if (!ctx->bitstream.vaddr) {
1511                 v4l2_err(&ctx->dev->v4l2_dev,
1512                          "failed to allocate bitstream ringbuffer");
1513                 return -ENOMEM;
1514         }
1515         kfifo_init(&ctx->bitstream_fifo,
1516                    ctx->bitstream.vaddr, ctx->bitstream.size);
1517
1518         return 0;
1519 }
1520
1521 static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1522 {
1523         if (ctx->bitstream.vaddr == NULL)
1524                 return;
1525
1526         dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1527                     ctx->bitstream.vaddr, ctx->bitstream.paddr);
1528         ctx->bitstream.vaddr = NULL;
1529         kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1530 }
1531
1532 static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1533                                 struct v4l2_requestbuffers *rb)
1534 {
1535         struct coda_q_data *q_data_src;
1536         int ret;
1537
1538         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1539                 return 0;
1540
1541         if (rb->count) {
1542                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1543                 ret = coda_alloc_context_buffers(ctx, q_data_src);
1544                 if (ret < 0)
1545                         return ret;
1546                 ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1547                 if (ret < 0) {
1548                         coda_free_context_buffers(ctx);
1549                         return ret;
1550                 }
1551         } else {
1552                 coda_free_bitstream_buffer(ctx);
1553                 coda_free_context_buffers(ctx);
1554         }
1555
1556         return 0;
1557 }
1558
1559 static bool coda_reorder_enable(struct coda_ctx *ctx)
1560 {
1561         const char * const *profile_names;
1562         const char * const *level_names;
1563         struct coda_dev *dev = ctx->dev;
1564         int profile, level;
1565
1566         if (dev->devtype->product != CODA_7541 &&
1567             dev->devtype->product != CODA_960)
1568                 return false;
1569
1570         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1571                 return false;
1572
1573         if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264)
1574                 return true;
1575
1576         profile = coda_h264_profile(ctx->params.h264_profile_idc);
1577         if (profile < 0) {
1578                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Profile: %d\n",
1579                          ctx->params.h264_profile_idc);
1580                 return false;
1581         }
1582
1583         level = coda_h264_level(ctx->params.h264_level_idc);
1584         if (level < 0) {
1585                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Level: %d\n",
1586                          ctx->params.h264_level_idc);
1587                 return false;
1588         }
1589
1590         profile_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_PROFILE);
1591         level_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_LEVEL);
1592
1593         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "H264 Profile/Level: %s L%s\n",
1594                  profile_names[profile], level_names[level]);
1595
1596         /* Baseline profile does not support reordering */
1597         return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
1598 }
1599
1600 static int __coda_start_decoding(struct coda_ctx *ctx)
1601 {
1602         struct coda_q_data *q_data_src, *q_data_dst;
1603         u32 bitstream_buf, bitstream_size;
1604         struct coda_dev *dev = ctx->dev;
1605         int width, height;
1606         u32 src_fourcc, dst_fourcc;
1607         u32 val;
1608         int ret;
1609
1610         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1611                  "Video Data Order Adapter: %s\n",
1612                  ctx->use_vdoa ? "Enabled" : "Disabled");
1613
1614         /* Start decoding */
1615         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1616         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1617         bitstream_buf = ctx->bitstream.paddr;
1618         bitstream_size = ctx->bitstream.size;
1619         src_fourcc = q_data_src->fourcc;
1620         dst_fourcc = q_data_dst->fourcc;
1621
1622         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1623
1624         /* Update coda bitstream read and write pointers from kfifo */
1625         coda_kfifo_sync_to_device_full(ctx);
1626
1627         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1628                                  CODA9_FRAME_TILED2LINEAR);
1629         if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV)
1630                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1631         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1632                 ctx->frame_mem_ctrl |= (0x3 << 9) |
1633                         ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR);
1634         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1635
1636         ctx->display_idx = -1;
1637         ctx->frm_dis_flg = 0;
1638         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1639
1640         coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE,
1641                         CODA_REG_BIT_BIT_STREAM_PARAM);
1642
1643         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1644         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1645         val = 0;
1646         if (coda_reorder_enable(ctx))
1647                 val |= CODA_REORDER_ENABLE;
1648         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1649                 val |= CODA_NO_INT_ENABLE;
1650         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1651
1652         ctx->params.codec_mode = ctx->codec->mode;
1653         if (dev->devtype->product == CODA_960 &&
1654             src_fourcc == V4L2_PIX_FMT_MPEG4)
1655                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1656         else
1657                 ctx->params.codec_mode_aux = 0;
1658         if (src_fourcc == V4L2_PIX_FMT_H264) {
1659                 if (dev->devtype->product == CODA_7541) {
1660                         coda_write(dev, ctx->psbuf.paddr,
1661                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1662                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1663                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1664                 }
1665                 if (dev->devtype->product == CODA_960) {
1666                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1667                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1668                 }
1669         }
1670         if (dev->devtype->product != CODA_960)
1671                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1672
1673         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) {
1674                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1675                 coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1676                 return -ETIMEDOUT;
1677         }
1678         ctx->initialized = 1;
1679
1680         /* Update kfifo out pointer from coda bitstream read pointer */
1681         coda_kfifo_sync_from_device(ctx);
1682
1683         coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1684
1685         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1686                 v4l2_err(&dev->v4l2_dev,
1687                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1688                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1689                 return -EAGAIN;
1690         }
1691
1692         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1693         if (dev->devtype->product == CODA_DX6) {
1694                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1695                 height = val & CODADX6_PICHEIGHT_MASK;
1696         } else {
1697                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1698                 height = val & CODA7_PICHEIGHT_MASK;
1699         }
1700
1701         if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1702                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1703                          width, height, q_data_dst->bytesperline,
1704                          q_data_dst->height);
1705                 return -EINVAL;
1706         }
1707
1708         width = round_up(width, 16);
1709         height = round_up(height, 16);
1710
1711         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1712                  __func__, ctx->idx, width, height);
1713
1714         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1715         /*
1716          * If the VDOA is used, the decoder needs one additional frame,
1717          * because the frames are freed when the next frame is decoded.
1718          * Otherwise there are visible errors in the decoded frames (green
1719          * regions in displayed frames) and a broken order of frames (earlier
1720          * frames are sporadically displayed after later frames).
1721          */
1722         if (ctx->use_vdoa)
1723                 ctx->num_internal_frames += 1;
1724         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1725                 v4l2_err(&dev->v4l2_dev,
1726                          "not enough framebuffers to decode (%d < %d)\n",
1727                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1728                 return -EINVAL;
1729         }
1730
1731         if (src_fourcc == V4L2_PIX_FMT_H264) {
1732                 u32 left_right;
1733                 u32 top_bottom;
1734
1735                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1736                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1737
1738                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1739                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1740                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1741                                          (left_right & 0x3ff);
1742                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1743                                           (top_bottom & 0x3ff);
1744         }
1745
1746         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1747         if (ret < 0) {
1748                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1749                 return ret;
1750         }
1751
1752         /* Tell the decoder how many frame buffers we allocated. */
1753         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1754         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1755
1756         if (dev->devtype->product != CODA_DX6) {
1757                 /* Set secondary AXI IRAM */
1758                 coda_setup_iram(ctx);
1759
1760                 coda_write(dev, ctx->iram_info.buf_bit_use,
1761                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1762                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1763                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1764                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1765                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1766                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1767                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1768                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1769                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1770                 if (dev->devtype->product == CODA_960) {
1771                         coda_write(dev, ctx->iram_info.buf_btp_use,
1772                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1773
1774                         coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1775                         coda9_set_frame_cache(ctx, dst_fourcc);
1776                 }
1777         }
1778
1779         if (src_fourcc == V4L2_PIX_FMT_H264) {
1780                 coda_write(dev, ctx->slicebuf.paddr,
1781                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1782                 coda_write(dev, ctx->slicebuf.size / 1024,
1783                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1784         }
1785
1786         if (dev->devtype->product == CODA_7541) {
1787                 int max_mb_x = 1920 / 16;
1788                 int max_mb_y = 1088 / 16;
1789                 int max_mb_num = max_mb_x * max_mb_y;
1790
1791                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1792                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1793         } else if (dev->devtype->product == CODA_960) {
1794                 int max_mb_x = 1920 / 16;
1795                 int max_mb_y = 1088 / 16;
1796                 int max_mb_num = max_mb_x * max_mb_y;
1797
1798                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1799                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1800         }
1801
1802         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1803                 v4l2_err(&ctx->dev->v4l2_dev,
1804                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1805                 return -ETIMEDOUT;
1806         }
1807
1808         return 0;
1809 }
1810
1811 static int coda_start_decoding(struct coda_ctx *ctx)
1812 {
1813         struct coda_dev *dev = ctx->dev;
1814         int ret;
1815
1816         mutex_lock(&dev->coda_mutex);
1817         ret = __coda_start_decoding(ctx);
1818         mutex_unlock(&dev->coda_mutex);
1819
1820         return ret;
1821 }
1822
1823 static int coda_prepare_decode(struct coda_ctx *ctx)
1824 {
1825         struct vb2_v4l2_buffer *dst_buf;
1826         struct coda_dev *dev = ctx->dev;
1827         struct coda_q_data *q_data_dst;
1828         struct coda_buffer_meta *meta;
1829         unsigned long flags;
1830         u32 rot_mode = 0;
1831         u32 reg_addr, reg_stride;
1832
1833         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1834         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1835
1836         /* Try to copy source buffer contents into the bitstream ringbuffer */
1837         mutex_lock(&ctx->bitstream_mutex);
1838         coda_fill_bitstream(ctx, NULL);
1839         mutex_unlock(&ctx->bitstream_mutex);
1840
1841         if (coda_get_bitstream_payload(ctx) < 512 &&
1842             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1843                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1844                          "bitstream payload: %d, skipping\n",
1845                          coda_get_bitstream_payload(ctx));
1846                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1847                 return -EAGAIN;
1848         }
1849
1850         /* Run coda_start_decoding (again) if not yet initialized */
1851         if (!ctx->initialized) {
1852                 int ret = __coda_start_decoding(ctx);
1853
1854                 if (ret < 0) {
1855                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1856                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1857                         return -EAGAIN;
1858                 } else {
1859                         ctx->initialized = 1;
1860                 }
1861         }
1862
1863         if (dev->devtype->product == CODA_960)
1864                 coda_set_gdi_regs(ctx);
1865
1866         if (ctx->use_vdoa &&
1867             ctx->display_idx >= 0 &&
1868             ctx->display_idx < ctx->num_internal_frames) {
1869                 vdoa_device_run(ctx->vdoa,
1870                                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0),
1871                                 ctx->internal_frames[ctx->display_idx].paddr);
1872         } else {
1873                 if (dev->devtype->product == CODA_960) {
1874                         /*
1875                          * The CODA960 seems to have an internal list of
1876                          * buffers with 64 entries that includes the
1877                          * registered frame buffers as well as the rotator
1878                          * buffer output.
1879                          *
1880                          * ROT_INDEX needs to be < 0x40, but >
1881                          * ctx->num_internal_frames.
1882                          */
1883                         coda_write(dev,
1884                                    CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1885                                    CODA9_CMD_DEC_PIC_ROT_INDEX);
1886
1887                         reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1888                         reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1889                 } else {
1890                         reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1891                         reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1892                 }
1893                 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1894                 coda_write(dev, q_data_dst->bytesperline, reg_stride);
1895
1896                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1897         }
1898
1899         coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE);
1900
1901         switch (dev->devtype->product) {
1902         case CODA_DX6:
1903                 /* TBD */
1904         case CODA_7541:
1905                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1906                 break;
1907         case CODA_960:
1908                 /* 'hardcode to use interrupt disable mode'? */
1909                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1910                 break;
1911         }
1912
1913         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1914
1915         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1916         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1917
1918         if (dev->devtype->product != CODA_DX6)
1919                 coda_write(dev, ctx->iram_info.axi_sram_use,
1920                                 CODA7_REG_BIT_AXI_SRAM_USE);
1921
1922         spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1923         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1924                                         struct coda_buffer_meta, list);
1925
1926         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1927
1928                 /* If this is the last buffer in the bitstream, add padding */
1929                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1930                                   ctx->bitstream_fifo.kfifo.mask)) {
1931                         static unsigned char buf[512];
1932                         unsigned int pad;
1933
1934                         /* Pad to multiple of 256 and then add 256 more */
1935                         pad = ((0 - meta->end) & 0xff) + 256;
1936
1937                         memset(buf, 0xff, sizeof(buf));
1938
1939                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
1940                 }
1941         }
1942         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1943
1944         coda_kfifo_sync_to_device_full(ctx);
1945
1946         /* Clear decode success flag */
1947         coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
1948
1949         trace_coda_dec_pic_run(ctx, meta);
1950
1951         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1952
1953         return 0;
1954 }
1955
1956 static void coda_finish_decode(struct coda_ctx *ctx)
1957 {
1958         struct coda_dev *dev = ctx->dev;
1959         struct coda_q_data *q_data_src;
1960         struct coda_q_data *q_data_dst;
1961         struct vb2_v4l2_buffer *dst_buf;
1962         struct coda_buffer_meta *meta;
1963         unsigned long payload;
1964         unsigned long flags;
1965         int width, height;
1966         int decoded_idx;
1967         int display_idx;
1968         u32 src_fourcc;
1969         int success;
1970         u32 err_mb;
1971         int err_vdoa = 0;
1972         u32 val;
1973
1974         /* Update kfifo out pointer from coda bitstream read pointer */
1975         coda_kfifo_sync_from_device(ctx);
1976
1977         /*
1978          * in stream-end mode, the read pointer can overshoot the write pointer
1979          * by up to 512 bytes
1980          */
1981         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
1982                 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
1983                         kfifo_init(&ctx->bitstream_fifo,
1984                                 ctx->bitstream.vaddr, ctx->bitstream.size);
1985         }
1986
1987         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1988         src_fourcc = q_data_src->fourcc;
1989
1990         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
1991         if (val != 1)
1992                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
1993
1994         success = val & 0x1;
1995         if (!success)
1996                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
1997
1998         if (src_fourcc == V4L2_PIX_FMT_H264) {
1999                 if (val & (1 << 3))
2000                         v4l2_err(&dev->v4l2_dev,
2001                                  "insufficient PS buffer space (%d bytes)\n",
2002                                  ctx->psbuf.size);
2003                 if (val & (1 << 2))
2004                         v4l2_err(&dev->v4l2_dev,
2005                                  "insufficient slice buffer space (%d bytes)\n",
2006                                  ctx->slicebuf.size);
2007         }
2008
2009         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
2010         width = (val >> 16) & 0xffff;
2011         height = val & 0xffff;
2012
2013         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2014
2015         /* frame crop information */
2016         if (src_fourcc == V4L2_PIX_FMT_H264) {
2017                 u32 left_right;
2018                 u32 top_bottom;
2019
2020                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
2021                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
2022
2023                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
2024                         /* Keep current crop information */
2025                 } else {
2026                         struct v4l2_rect *rect = &q_data_dst->rect;
2027
2028                         rect->left = left_right >> 16 & 0xffff;
2029                         rect->top = top_bottom >> 16 & 0xffff;
2030                         rect->width = width - rect->left -
2031                                       (left_right & 0xffff);
2032                         rect->height = height - rect->top -
2033                                        (top_bottom & 0xffff);
2034                 }
2035         } else {
2036                 /* no cropping */
2037         }
2038
2039         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
2040         if (err_mb > 0)
2041                 v4l2_err(&dev->v4l2_dev,
2042                          "errors in %d macroblocks\n", err_mb);
2043
2044         if (dev->devtype->product == CODA_7541) {
2045                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
2046                 if (val == 0) {
2047                         /* not enough bitstream data */
2048                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2049                                  "prescan failed: %d\n", val);
2050                         ctx->hold = true;
2051                         return;
2052                 }
2053         }
2054
2055         /* Wait until the VDOA finished writing the previous display frame */
2056         if (ctx->use_vdoa &&
2057             ctx->display_idx >= 0 &&
2058             ctx->display_idx < ctx->num_internal_frames) {
2059                 err_vdoa = vdoa_wait_for_completion(ctx->vdoa);
2060         }
2061
2062         ctx->frm_dis_flg = coda_read(dev,
2063                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2064
2065         /* The previous display frame was copied out and can be overwritten */
2066         if (ctx->display_idx >= 0 &&
2067             ctx->display_idx < ctx->num_internal_frames) {
2068                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
2069                 coda_write(dev, ctx->frm_dis_flg,
2070                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2071         }
2072
2073         /*
2074          * The index of the last decoded frame, not necessarily in
2075          * display order, and the index of the next display frame.
2076          * The latter could have been decoded in a previous run.
2077          */
2078         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
2079         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
2080
2081         if (decoded_idx == -1) {
2082                 /* no frame was decoded, but we might have a display frame */
2083                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
2084                         ctx->sequence_offset++;
2085                 else if (ctx->display_idx < 0)
2086                         ctx->hold = true;
2087         } else if (decoded_idx == -2) {
2088                 /* no frame was decoded, we still return remaining buffers */
2089         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
2090                 v4l2_err(&dev->v4l2_dev,
2091                          "decoded frame index out of range: %d\n", decoded_idx);
2092         } else {
2093                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM) - 1;
2094                 val -= ctx->sequence_offset;
2095                 spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
2096                 if (!list_empty(&ctx->buffer_meta_list)) {
2097                         meta = list_first_entry(&ctx->buffer_meta_list,
2098                                               struct coda_buffer_meta, list);
2099                         list_del(&meta->list);
2100                         ctx->num_metas--;
2101                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2102                         /*
2103                          * Clamp counters to 16 bits for comparison, as the HW
2104                          * counter rolls over at this point for h.264. This
2105                          * may be different for other formats, but using 16 bits
2106                          * should be enough to detect most errors and saves us
2107                          * from doing different things based on the format.
2108                          */
2109                         if ((val & 0xffff) != (meta->sequence & 0xffff)) {
2110                                 v4l2_err(&dev->v4l2_dev,
2111                                          "sequence number mismatch (%d(%d) != %d)\n",
2112                                          val, ctx->sequence_offset,
2113                                          meta->sequence);
2114                         }
2115                         ctx->frame_metas[decoded_idx] = *meta;
2116                         kfree(meta);
2117                 } else {
2118                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2119                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
2120                         memset(&ctx->frame_metas[decoded_idx], 0,
2121                                sizeof(struct coda_buffer_meta));
2122                         ctx->frame_metas[decoded_idx].sequence = val;
2123                         ctx->sequence_offset++;
2124                 }
2125
2126                 trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2127
2128                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2129                 if (val == 0)
2130                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2131                 else if (val == 1)
2132                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2133                 else
2134                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2135
2136                 ctx->frame_errors[decoded_idx] = err_mb;
2137         }
2138
2139         if (display_idx == -1) {
2140                 /*
2141                  * no more frames to be decoded, but there could still
2142                  * be rotator output to dequeue
2143                  */
2144                 ctx->hold = true;
2145         } else if (display_idx == -3) {
2146                 /* possibly prescan failure */
2147         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2148                 v4l2_err(&dev->v4l2_dev,
2149                          "presentation frame index out of range: %d\n",
2150                          display_idx);
2151         }
2152
2153         /* If a frame was copied out, return it */
2154         if (ctx->display_idx >= 0 &&
2155             ctx->display_idx < ctx->num_internal_frames) {
2156                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2157                 dst_buf->sequence = ctx->osequence++;
2158
2159                 dst_buf->field = V4L2_FIELD_NONE;
2160                 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2161                                              V4L2_BUF_FLAG_PFRAME |
2162                                              V4L2_BUF_FLAG_BFRAME);
2163                 dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2164                 meta = &ctx->frame_metas[ctx->display_idx];
2165                 dst_buf->timecode = meta->timecode;
2166                 dst_buf->vb2_buf.timestamp = meta->timestamp;
2167
2168                 trace_coda_dec_rot_done(ctx, dst_buf, meta);
2169
2170                 switch (q_data_dst->fourcc) {
2171                 case V4L2_PIX_FMT_YUYV:
2172                         payload = width * height * 2;
2173                         break;
2174                 case V4L2_PIX_FMT_YUV420:
2175                 case V4L2_PIX_FMT_YVU420:
2176                 case V4L2_PIX_FMT_NV12:
2177                 default:
2178                         payload = width * height * 3 / 2;
2179                         break;
2180                 case V4L2_PIX_FMT_YUV422P:
2181                         payload = width * height * 2;
2182                         break;
2183                 }
2184                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2185
2186                 if (ctx->frame_errors[ctx->display_idx] || err_vdoa)
2187                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2188                 else
2189                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
2190
2191                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2192                         "job finished: decoding frame (%d) (%s)\n",
2193                         dst_buf->sequence,
2194                         (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2195                         "KEYFRAME" : "PFRAME");
2196         } else {
2197                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2198                         "job finished: no frame decoded\n");
2199         }
2200
2201         /* The rotator will copy the current display frame next time */
2202         ctx->display_idx = display_idx;
2203 }
2204
2205 static void coda_decode_timeout(struct coda_ctx *ctx)
2206 {
2207         struct vb2_v4l2_buffer *dst_buf;
2208
2209         /*
2210          * For now this only handles the case where we would deadlock with
2211          * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS,
2212          * but after a failed decode run we would hold the context and wait for
2213          * userspace to queue more buffers.
2214          */
2215         if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))
2216                 return;
2217
2218         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2219         dst_buf->sequence = ctx->qsequence - 1;
2220
2221         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2222 }
2223
2224 const struct coda_context_ops coda_bit_decode_ops = {
2225         .queue_init = coda_decoder_queue_init,
2226         .reqbufs = coda_decoder_reqbufs,
2227         .start_streaming = coda_start_decoding,
2228         .prepare_run = coda_prepare_decode,
2229         .finish_run = coda_finish_decode,
2230         .run_timeout = coda_decode_timeout,
2231         .seq_end_work = coda_seq_end_work,
2232         .release = coda_bit_release,
2233 };
2234
2235 irqreturn_t coda_irq_handler(int irq, void *data)
2236 {
2237         struct coda_dev *dev = data;
2238         struct coda_ctx *ctx;
2239
2240         /* read status register to attend the IRQ */
2241         coda_read(dev, CODA_REG_BIT_INT_STATUS);
2242         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2243                       CODA_REG_BIT_INT_CLEAR);
2244
2245         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2246         if (ctx == NULL) {
2247                 v4l2_err(&dev->v4l2_dev,
2248                          "Instance released before the end of transaction\n");
2249                 mutex_unlock(&dev->coda_mutex);
2250                 return IRQ_HANDLED;
2251         }
2252
2253         trace_coda_bit_done(ctx);
2254
2255         if (ctx->aborting) {
2256                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2257                          "task has been aborted\n");
2258         }
2259
2260         if (coda_isbusy(ctx->dev)) {
2261                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2262                          "coda is still busy!!!!\n");
2263                 return IRQ_NONE;
2264         }
2265
2266         complete(&ctx->completion);
2267
2268         return IRQ_HANDLED;
2269 }