drm/nv98/crypt: non-stub implementation of the engine hooks
authorBen Skeggs <bskeggs@redhat.com>
Wed, 2 May 2012 11:00:20 +0000 (21:00 +1000)
committerBen Skeggs <bskeggs@redhat.com>
Thu, 24 May 2012 06:56:13 +0000 (16:56 +1000)
fuc is from pscnv driver.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
drivers/gpu/drm/nouveau/nv98_crypt.c
drivers/gpu/drm/nouveau/nv98_crypt.fuc [new file with mode: 0644]
drivers/gpu/drm/nouveau/nv98_crypt.fuc.h [new file with mode: 0644]

index db94ff0..e25e13f 100644 (file)
  */
 
 #include "drmP.h"
+
 #include "nouveau_drv.h"
 #include "nouveau_util.h"
 #include "nouveau_vm.h"
 #include "nouveau_ramht.h"
 
-struct nv98_crypt_engine {
+#include "nv98_crypt.fuc.h"
+
+struct nv98_crypt_priv {
        struct nouveau_exec_engine base;
 };
 
+struct nv98_crypt_chan {
+       struct nouveau_gpuobj *mem;
+};
+
 static int
-nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
+nv98_crypt_context_new(struct nouveau_channel *chan, int engine)
+{
+       struct drm_device *dev = chan->dev;
+       struct drm_nouveau_private *dev_priv = dev->dev_private;
+       struct nv98_crypt_priv *priv = nv_engine(dev, engine);
+       struct nv98_crypt_chan *cctx;
+       int ret;
+
+       cctx = chan->engctx[engine] = kzalloc(sizeof(*cctx), GFP_KERNEL);
+       if (!cctx)
+               return -ENOMEM;
+
+       atomic_inc(&chan->vm->engref[engine]);
+
+       ret = nouveau_gpuobj_new(dev, chan, 256, 0, NVOBJ_FLAG_ZERO_ALLOC |
+                                NVOBJ_FLAG_ZERO_FREE, &cctx->mem);
+       if (ret)
+               goto error;
+
+       nv_wo32(chan->ramin, 0xa0, 0x00190000);
+       nv_wo32(chan->ramin, 0xa4, cctx->mem->vinst + cctx->mem->size - 1);
+       nv_wo32(chan->ramin, 0xa8, cctx->mem->vinst);
+       nv_wo32(chan->ramin, 0xac, 0x00000000);
+       nv_wo32(chan->ramin, 0xb0, 0x00000000);
+       nv_wo32(chan->ramin, 0xb4, 0x00000000);
+       dev_priv->engine.instmem.flush(dev);
+
+error:
+       if (ret)
+               priv->base.context_del(chan, engine);
+       return ret;
+}
+
+static void
+nv98_crypt_context_del(struct nouveau_channel *chan, int engine)
+{
+       struct nv98_crypt_chan *cctx = chan->engctx[engine];
+       int i;
+
+       for (i = 0xa0; i < 0xb4; i += 4)
+               nv_wo32(chan->ramin, i, 0x00000000);
+
+       nouveau_gpuobj_ref(NULL, &cctx->mem);
+
+       atomic_dec(&chan->vm->engref[engine]);
+       chan->engctx[engine] = NULL;
+       kfree(cctx);
+}
+
+static int
+nv98_crypt_object_new(struct nouveau_channel *chan, int engine,
+                    u32 handle, u16 class)
 {
-       if (!(nv_rd32(dev, 0x000200) & 0x00004000))
-               return 0;
+       struct nv98_crypt_chan *cctx = chan->engctx[engine];
+
+       /* fuc engine doesn't need an object, our ramht code does.. */
+       cctx->mem->engine = 5;
+       cctx->mem->class  = class;
+       return nouveau_ramht_insert(chan, handle, cctx->mem);
+}
 
+static void
+nv98_crypt_tlb_flush(struct drm_device *dev, int engine)
+{
+       nv50_vm_flush_engine(dev, 0x0a);
+}
+
+static int
+nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
+{
        nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
        return 0;
 }
@@ -45,34 +117,100 @@ nv98_crypt_fini(struct drm_device *dev, int engine, bool suspend)
 static int
 nv98_crypt_init(struct drm_device *dev, int engine)
 {
+       int i;
+
+       /* reset! */
        nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
        nv_mask(dev, 0x000200, 0x00004000, 0x00004000);
+
+       /* wait for exit interrupt to signal */
+       nv_wait(dev, 0x087008, 0x00000010, 0x00000010);
+       nv_wr32(dev, 0x087004, 0x00000010);
+
+       /* upload microcode code and data segments */
+       nv_wr32(dev, 0x087ff8, 0x00100000);
+       for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_code); i++)
+               nv_wr32(dev, 0x087ff4, nv98_pcrypt_code[i]);
+
+       nv_wr32(dev, 0x087ff8, 0x00000000);
+       for (i = 0; i < ARRAY_SIZE(nv98_pcrypt_data); i++)
+               nv_wr32(dev, 0x087ff4, nv98_pcrypt_data[i]);
+
+       /* start it running */
+       nv_wr32(dev, 0x08710c, 0x00000000);
+       nv_wr32(dev, 0x087104, 0x00000000); /* ENTRY */
+       nv_wr32(dev, 0x087100, 0x00000002); /* TRIGGER */
        return 0;
 }
 
+static struct nouveau_enum nv98_crypt_isr_error_name[] = {
+       { 0x0000, "ILLEGAL_MTHD" },
+       { 0x0001, "INVALID_BITFIELD" },
+       { 0x0002, "INVALID_ENUM" },
+       { 0x0003, "QUERY" },
+       {}
+};
+
+static void
+nv98_crypt_isr(struct drm_device *dev)
+{
+       u32 disp = nv_rd32(dev, 0x08701c);
+       u32 stat = nv_rd32(dev, 0x087008) & disp & ~(disp >> 16);
+       u32 inst = nv_rd32(dev, 0x087050) & 0x3fffffff;
+       u32 ssta = nv_rd32(dev, 0x087040) & 0x0000ffff;
+       u32 addr = nv_rd32(dev, 0x087040) >> 16;
+       u32 mthd = (addr & 0x07ff) << 2;
+       u32 subc = (addr & 0x3800) >> 11;
+       u32 data = nv_rd32(dev, 0x087044);
+       int chid = nv50_graph_isr_chid(dev, inst);
+
+       if (stat & 0x00000040) {
+               NV_INFO(dev, "PCRYPT: DISPATCH_ERROR [");
+               nouveau_enum_print(nv98_crypt_isr_error_name, ssta);
+               printk("] ch %d [0x%08x] subc %d mthd 0x%04x data 0x%08x\n",
+                       chid, inst, subc, mthd, data);
+               nv_wr32(dev, 0x087004, 0x00000040);
+               stat &= ~0x00000040;
+       }
+
+       if (stat) {
+               NV_INFO(dev, "PCRYPT: unhandled intr 0x%08x\n", stat);
+               nv_wr32(dev, 0x087004, stat);
+       }
+
+       nv50_fb_vm_trap(dev, 1);
+}
+
 static void
 nv98_crypt_destroy(struct drm_device *dev, int engine)
 {
-       struct nv98_crypt_engine *pcrypt = nv_engine(dev, engine);
+       struct nv98_crypt_priv *priv = nv_engine(dev, engine);
 
+       nouveau_irq_unregister(dev, 14);
        NVOBJ_ENGINE_DEL(dev, CRYPT);
-
-       kfree(pcrypt);
+       kfree(priv);
 }
 
 int
 nv98_crypt_create(struct drm_device *dev)
 {
-       struct nv98_crypt_engine *pcrypt;
+       struct nv98_crypt_priv *priv;
 
-       pcrypt = kzalloc(sizeof(*pcrypt), GFP_KERNEL);
-       if (!pcrypt)
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
                return -ENOMEM;
 
-       pcrypt->base.destroy = nv98_crypt_destroy;
-       pcrypt->base.init = nv98_crypt_init;
-       pcrypt->base.fini = nv98_crypt_fini;
+       priv->base.destroy = nv98_crypt_destroy;
+       priv->base.init = nv98_crypt_init;
+       priv->base.fini = nv98_crypt_fini;
+       priv->base.context_new = nv98_crypt_context_new;
+       priv->base.context_del = nv98_crypt_context_del;
+       priv->base.object_new = nv98_crypt_object_new;
+       priv->base.tlb_flush = nv98_crypt_tlb_flush;
+
+       nouveau_irq_register(dev, 14, nv98_crypt_isr);
 
-       NVOBJ_ENGINE_ADD(dev, CRYPT, &pcrypt->base);
+       NVOBJ_ENGINE_ADD(dev, CRYPT, &priv->base);
+       NVOBJ_CLASS(dev, 0x88b4, CRYPT);
        return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv98_crypt.fuc b/drivers/gpu/drm/nouveau/nv98_crypt.fuc
new file mode 100644 (file)
index 0000000..7393813
--- /dev/null
@@ -0,0 +1,698 @@
+/*
+ *  fuc microcode for nv98 pcrypt engine
+ *  Copyright (C) 2010  Marcin Koƛcielnicki
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+.section #nv98_pcrypt_data
+
+ctx_dma:
+ctx_dma_query:         .b32 0
+ctx_dma_src:           .b32 0
+ctx_dma_dst:           .b32 0
+.equ #dma_count 3
+ctx_query_address_high:        .b32 0
+ctx_query_address_low: .b32 0
+ctx_query_counter:     .b32 0
+ctx_cond_address_high: .b32 0
+ctx_cond_address_low:  .b32 0
+ctx_cond_off:          .b32 0
+ctx_src_address_high:  .b32 0
+ctx_src_address_low:   .b32 0
+ctx_dst_address_high:  .b32 0
+ctx_dst_address_low:   .b32 0
+ctx_mode:              .b32 0
+.align 16
+ctx_key:               .skip 16
+ctx_iv:                        .skip 16
+
+.align 0x80
+swap:
+.skip 32
+
+.align 8
+common_cmd_dtable:
+.b32 #ctx_query_address_high + 0x20000 ~0xff
+.b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_query_counter + 0x20000 ~0xffffffff
+.b32 #cmd_query_get + 0x00000 ~1
+.b32 #ctx_cond_address_high + 0x20000 ~0xff
+.b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
+.b32 #cmd_cond_mode + 0x00000 ~7
+.b32 #cmd_wrcache_flush + 0x00000 ~0
+.equ #common_cmd_max 0x88
+
+
+.align 8
+engine_cmd_dtable:
+.b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_src_address_high + 0x20000 ~0xff
+.b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_dst_address_high + 0x20000 ~0xff
+.b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
+.b32 #crypt_cmd_mode + 0x00000 ~0xf
+.b32 #crypt_cmd_length + 0x10000 ~0x0ffffff0
+.equ #engine_cmd_max 0xce
+
+.align 4
+crypt_dtable:
+.b16 #crypt_copy_prep #crypt_do_inout
+.b16 #crypt_store_prep #crypt_do_out
+.b16 #crypt_ecb_e_prep #crypt_do_inout
+.b16 #crypt_ecb_d_prep #crypt_do_inout
+.b16 #crypt_cbc_e_prep #crypt_do_inout
+.b16 #crypt_cbc_d_prep #crypt_do_inout
+.b16 #crypt_pcbc_e_prep #crypt_do_inout
+.b16 #crypt_pcbc_d_prep #crypt_do_inout
+.b16 #crypt_cfb_e_prep #crypt_do_inout
+.b16 #crypt_cfb_d_prep #crypt_do_inout
+.b16 #crypt_ofb_prep #crypt_do_inout
+.b16 #crypt_ctr_prep #crypt_do_inout
+.b16 #crypt_cbc_mac_prep #crypt_do_in
+.b16 #crypt_cmac_finish_complete_prep #crypt_do_in
+.b16 #crypt_cmac_finish_partial_prep #crypt_do_in
+
+.align 0x100
+
+.section #nv98_pcrypt_code
+
+       // $r0 is always set to 0 in our code - this allows some space savings.
+       clear b32 $r0
+
+       // set up the interrupt handler
+       mov $r1 #ih
+       mov $iv0 $r1
+
+       // init stack pointer
+       mov $sp $r0
+
+       // set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
+       movw $r1 0xfff0
+       sethi $r1 0
+       mov $r2 0x400
+       iowr I[$r2 + 0x300] $r1
+
+       // enable the interrupts
+       or $r1 0xc
+       iowr I[$r2] $r1
+
+       // enable fifo access and context switching
+       mov $r1 3
+       mov $r2 0x1200
+       iowr I[$r2] $r1
+
+       // enable i0 delivery
+       bset $flags ie0
+
+       // sleep forver, waking only for interrupts.
+       bset $flags $p0
+       spin:
+       sleep $p0
+       bra #spin
+
+// i0 handler
+ih:
+       // see which interrupts we got
+       iord $r1 I[$r0 + 0x200]
+
+       and $r2 $r1 0x8
+       cmpu b32 $r2 0
+       bra e #noctx
+
+               // context switch... prepare the regs for xfer
+               mov $r2 0x7700
+               mov $xtargets $r2
+               mov $xdbase $r0
+               // 128-byte context.
+               mov $r2 0
+               sethi $r2 0x50000
+
+               // read current channel
+               mov $r3 0x1400
+               iord $r4 I[$r3]
+               // if bit 30 set, it's active, so we have to unload it first.
+               shl b32 $r5 $r4 1
+               cmps b32 $r5 0
+               bra nc #ctxload
+
+                       // unload the current channel - save the context
+                       xdst $r0 $r2
+                       xdwait
+                       // and clear bit 30, then write back
+                       bclr $r4 0x1e
+                       iowr I[$r3] $r4
+                       // tell PFIFO we unloaded
+                       mov $r4 1
+                       iowr I[$r3 + 0x200] $r4
+
+               bra #noctx
+
+               ctxload:
+                       // no channel loaded - perhaps we're requested to load one
+                       iord $r4 I[$r3 + 0x100]
+                       shl b32 $r15 $r4 1
+                       cmps b32 $r15 0
+                       // if bit 30 of next channel not set, probably PFIFO is just
+                       // killing a context. do a faux load, without the active bit.
+                       bra nc #dummyload
+
+                               // ok, do a real context load.
+                               xdld $r0 $r2
+                               xdwait
+                               mov $r5 #ctx_dma
+                               mov $r6 #dma_count - 1
+                               ctxload_dma_loop:
+                                       ld b32 $r7 D[$r5 + $r6 * 4]
+                                       add b32 $r8 $r6 0x180
+                                       shl b32 $r8 8
+                                       iowr I[$r8] $r7
+                                       sub b32 $r6 1
+                               bra nc #ctxload_dma_loop
+
+                       dummyload:
+                       // tell PFIFO we're done
+                       mov $r5 2
+                       iowr I[$r3 + 0x200] $r5
+
+       noctx:
+       and $r2 $r1 0x4
+       cmpu b32 $r2 0
+       bra e #nocmd
+
+               // incoming fifo command.
+               mov $r3 0x1900
+               iord $r2 I[$r3 + 0x100]
+               iord $r3 I[$r3]
+               // extract the method
+               and $r4 $r2 0x7ff
+               // shift the addr to proper position if we need to interrupt later
+               shl b32 $r2 0x10
+
+               // mthd 0 and 0x100 [NAME, NOP]: ignore
+               and $r5 $r4 0x7bf
+               cmpu b32 $r5 0
+               bra e #cmddone
+
+               mov $r5 #engine_cmd_dtable - 0xc0 * 8
+               mov $r6 #engine_cmd_max
+               cmpu b32 $r4 0xc0
+               bra nc #dtable_cmd
+               mov $r5 #common_cmd_dtable - 0x80 * 8
+               mov $r6 #common_cmd_max
+               cmpu b32 $r4 0x80
+               bra nc #dtable_cmd
+               cmpu b32 $r4 0x60
+               bra nc #dma_cmd
+               cmpu b32 $r4 0x50
+               bra ne #illegal_mthd
+
+                       // mthd 0x140: PM_TRIGGER
+                       mov $r2 0x2200
+                       clear b32 $r3
+                       sethi $r3 0x20000
+                       iowr I[$r2] $r3
+                       bra #cmddone
+
+               dma_cmd:
+                       // mthd 0x180...: DMA_*
+                       cmpu b32 $r4 0x60+#dma_count
+                       bra nc #illegal_mthd
+                       shl b32 $r5 $r4 2
+                       add b32 $r5 (#ctx_dma - 0x60 * 4) & 0xffff
+                       bset $r3 0x1e
+                       st b32 D[$r5] $r3
+                       add b32 $r4 0x180 - 0x60
+                       shl b32 $r4 8
+                       iowr I[$r4] $r3
+                       bra #cmddone
+
+               dtable_cmd:
+                       cmpu b32 $r4 $r6
+                       bra nc #illegal_mthd
+                       shl b32 $r4 3
+                       add b32 $r4 $r5
+                       ld b32 $r5 D[$r4 + 4]
+                       and $r5 $r3
+                       cmpu b32 $r5 0
+                       bra ne #invalid_bitfield
+                       ld b16 $r5 D[$r4]
+                       ld b16 $r6 D[$r4 + 2]
+                       cmpu b32 $r6 2
+                       bra e #cmd_setctx
+                       ld b32 $r7 D[$r0 + #ctx_cond_off]
+                       and $r6 $r7
+                       cmpu b32 $r6 1
+                       bra e #cmddone
+                       call $r5
+                       bra $p1 #dispatch_error
+                       bra #cmddone
+
+               cmd_setctx:
+                       st b32 D[$r5] $r3
+                       bra #cmddone
+
+
+               invalid_bitfield:
+                       or $r2 1
+               dispatch_error:
+               illegal_mthd:
+                       mov $r4 0x1000
+                       iowr I[$r4] $r2
+                       iowr I[$r4 + 0x100] $r3
+                       mov $r4 0x40
+                       iowr I[$r0] $r4
+
+                       im_loop:
+                               iord $r4 I[$r0 + 0x200]
+                               and $r4 0x40
+                               cmpu b32 $r4 0
+                       bra ne #im_loop
+
+               cmddone:
+               // remove the command from FIFO
+               mov $r3 0x1d00
+               mov $r4 1
+               iowr I[$r3] $r4
+
+       nocmd:
+       // ack the processed interrupts
+       and $r1 $r1 0xc
+       iowr I[$r0 + 0x100] $r1
+iret
+
+cmd_query_get:
+       // if bit 0 of param set, trigger interrupt afterwards.
+       setp $p1 $r3
+       or $r2 3
+
+       // read PTIMER, beware of races...
+       mov $r4 0xb00
+       ptimer_retry:
+               iord $r6 I[$r4 + 0x100]
+               iord $r5 I[$r4]
+               iord $r7 I[$r4 + 0x100]
+               cmpu b32 $r6 $r7
+       bra ne #ptimer_retry
+
+       // prepare the query structure
+       ld b32 $r4 D[$r0 + #ctx_query_counter]
+       st b32 D[$r0 + #swap + 0x0] $r4
+       st b32 D[$r0 + #swap + 0x4] $r0
+       st b32 D[$r0 + #swap + 0x8] $r5
+       st b32 D[$r0 + #swap + 0xc] $r6
+
+       // will use target 0, DMA_QUERY.
+       mov $xtargets $r0
+
+       ld b32 $r4 D[$r0 + #ctx_query_address_high]
+       shl b32 $r4 0x18
+       mov $xdbase $r4
+
+       ld b32 $r4 D[$r0 + #ctx_query_address_low]
+       mov $r5 #swap
+       sethi $r5 0x20000
+       xdst $r4 $r5
+       xdwait
+
+       ret
+
+cmd_cond_mode:
+       // if >= 5, INVALID_ENUM
+       bset $flags $p1
+       or $r2 2
+       cmpu b32 $r3 5
+       bra nc #return
+
+       // otherwise, no error.
+       bclr $flags $p1
+
+       // if < 2, no QUERY object is involved
+       cmpu b32 $r3 2
+       bra nc #cmd_cond_mode_queryful
+
+               xor $r3 1
+               st b32 D[$r0 + #ctx_cond_off] $r3
+       return:
+               ret
+
+       cmd_cond_mode_queryful:
+       // ok, will need to pull a QUERY object, prepare offsets
+       ld b32 $r4 D[$r0 + #ctx_cond_address_high]
+       ld b32 $r5 D[$r0 + #ctx_cond_address_low]
+       and $r6 $r5 0xff
+       shr b32 $r5 8
+       shl b32 $r4 0x18
+       or $r4 $r5
+       mov $xdbase $r4
+       mov $xtargets $r0
+
+       // pull the first one
+       mov $r5 #swap
+       sethi $r5 0x20000
+       xdld $r6 $r5
+
+       // if == 2, only a single QUERY is involved...
+       cmpu b32 $r3 2
+       bra ne #cmd_cond_mode_double
+
+               xdwait
+               ld b32 $r4 D[$r0 + #swap + 4]
+               cmpu b32 $r4 0
+               xbit $r4 $flags z
+               st b32 D[$r0 + #ctx_cond_off] $r4
+               ret
+
+       // ok, we'll need to pull second one too
+       cmd_cond_mode_double:
+       add b32 $r6 0x10
+       add b32 $r5 0x10
+       xdld $r6 $r5
+       xdwait
+
+       // compare COUNTERs
+       ld b32 $r5 D[$r0 + #swap + 0x00]
+       ld b32 $r6 D[$r0 + #swap + 0x10]
+       cmpu b32 $r5 $r6
+       xbit $r4 $flags z
+
+       // compare RESen
+       ld b32 $r5 D[$r0 + #swap + 0x04]
+       ld b32 $r6 D[$r0 + #swap + 0x14]
+       cmpu b32 $r5 $r6
+       xbit $r5 $flags z
+       and $r4 $r5
+
+       // and negate or not, depending on mode
+       cmpu b32 $r3 3
+       xbit $r5 $flags z
+       xor $r4 $r5
+       st b32 D[$r0 + #ctx_cond_off] $r4
+       ret
+
+cmd_wrcache_flush:
+       bclr $flags $p1
+       mov $r2 0x2200
+       clear b32 $r3
+       sethi $r3 0x10000
+       iowr I[$r2] $r3
+       ret
+
+crypt_cmd_mode:
+       // if >= 0xf, INVALID_ENUM
+       bset $flags $p1
+       or $r2 2
+       cmpu b32 $r3 0xf
+       bra nc #crypt_cmd_mode_return
+
+               bclr $flags $p1
+               st b32 D[$r0 + #ctx_mode] $r3
+
+       crypt_cmd_mode_return:
+       ret
+
+crypt_cmd_length:
+       // nop if length == 0
+       cmpu b32 $r3 0
+       bra e #crypt_cmd_mode_return
+
+       // init key, IV
+       cxset 3
+       mov $r4 #ctx_key
+       sethi $r4 0x70000
+       xdst $r0 $r4
+       mov $r4 #ctx_iv
+       sethi $r4 0x60000
+       xdst $r0 $r4
+       xdwait
+       ckeyreg $c7
+
+       // prepare the targets
+       mov $r4 0x2100
+       mov $xtargets $r4
+
+       // prepare src address
+       ld b32 $r4 D[$r0 + #ctx_src_address_high]
+       ld b32 $r5 D[$r0 + #ctx_src_address_low]
+       shr b32 $r8 $r5 8
+       shl b32 $r4 0x18
+       or $r4 $r8
+       and $r5 $r5 0xff
+
+       // prepare dst address
+       ld b32 $r6 D[$r0 + #ctx_dst_address_high]
+       ld b32 $r7 D[$r0 + #ctx_dst_address_low]
+       shr b32 $r8 $r7 8
+       shl b32 $r6 0x18
+       or $r6 $r8
+       and $r7 $r7 0xff
+
+       // find the proper prep & do functions
+       ld b32 $r8 D[$r0 + #ctx_mode]
+       shl b32 $r8 2
+
+       // run prep
+       ld b16 $r9 D[$r8 + #crypt_dtable]
+       call $r9
+
+       // do it
+       ld b16 $r9 D[$r8 + #crypt_dtable + 2]
+       call $r9
+       cxset 1
+       xdwait
+       cxset 0x61
+       xdwait
+       xdwait
+
+       // update src address
+       shr b32 $r8 $r4 0x18
+       shl b32 $r9 $r4 8
+       add b32 $r9 $r5
+       adc b32 $r8 0
+       st b32 D[$r0 + #ctx_src_address_high] $r8
+       st b32 D[$r0 + #ctx_src_address_low] $r9
+
+       // update dst address
+       shr b32 $r8 $r6 0x18
+       shl b32 $r9 $r6 8
+       add b32 $r9 $r7
+       adc b32 $r8 0
+       st b32 D[$r0 + #ctx_dst_address_high] $r8
+       st b32 D[$r0 + #ctx_dst_address_low] $r9
+
+       // pull updated IV
+       cxset 2
+       mov $r4 #ctx_iv
+       sethi $r4 0x60000
+       xdld $r0 $r4
+       xdwait
+
+       ret
+
+
+crypt_copy_prep:
+       cs0begin 2
+               cxsin $c0
+               cxsout $c0
+       ret
+
+crypt_store_prep:
+       cs0begin 1
+               cxsout $c6
+       ret
+
+crypt_ecb_e_prep:
+       cs0begin 3
+               cxsin $c0
+               cenc $c0 $c0
+               cxsout $c0
+       ret
+
+crypt_ecb_d_prep:
+       ckexp $c7 $c7
+       cs0begin 3
+               cxsin $c0
+               cdec $c0 $c0
+               cxsout $c0
+       ret
+
+crypt_cbc_e_prep:
+       cs0begin 4
+               cxsin $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+               cxsout $c6
+       ret
+
+crypt_cbc_d_prep:
+       ckexp $c7 $c7
+       cs0begin 5
+               cmov $c2 $c6
+               cxsin $c6
+               cdec $c0 $c6
+               cxor $c0 $c2
+               cxsout $c0
+       ret
+
+crypt_pcbc_e_prep:
+       cs0begin 5
+               cxsin $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+               cxsout $c6
+               cxor $c6 $c0
+       ret
+
+crypt_pcbc_d_prep:
+       ckexp $c7 $c7
+       cs0begin 5
+               cxsin $c0
+               cdec $c1 $c0
+               cxor $c6 $c1
+               cxsout $c6
+               cxor $c6 $c0
+       ret
+
+crypt_cfb_e_prep:
+       cs0begin 4
+               cenc $c6 $c6
+               cxsin $c0
+               cxor $c6 $c0
+               cxsout $c6
+       ret
+
+crypt_cfb_d_prep:
+       cs0begin 4
+               cenc $c0 $c6
+               cxsin $c6
+               cxor $c0 $c6
+               cxsout $c0
+       ret
+
+crypt_ofb_prep:
+       cs0begin 4
+               cenc $c6 $c6
+               cxsin $c0
+               cxor $c0 $c6
+               cxsout $c0
+       ret
+
+crypt_ctr_prep:
+       cs0begin 5
+               cenc $c1 $c6
+               cadd $c6 1
+               cxsin $c0
+               cxor $c0 $c1
+               cxsout $c0
+       ret
+
+crypt_cbc_mac_prep:
+       cs0begin 3
+               cxsin $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+       ret
+
+crypt_cmac_finish_complete_prep:
+       cs0begin 7
+               cxsin $c0
+               cxor $c6 $c0
+               cxor $c0 $c0
+               cenc $c0 $c0
+               cprecmac $c0 $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+       ret
+
+crypt_cmac_finish_partial_prep:
+       cs0begin 8
+               cxsin $c0
+               cxor $c6 $c0
+               cxor $c0 $c0
+               cenc $c0 $c0
+               cprecmac $c0 $c0
+               cprecmac $c0 $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+       ret
+
+// TODO
+crypt_do_in:
+       add b32 $r3 $r5
+       mov $xdbase $r4
+       mov $r9 #swap
+       sethi $r9 0x20000
+       crypt_do_in_loop:
+               xdld $r5 $r9
+               xdwait
+               cxset 0x22
+               xdst $r0 $r9
+               cs0exec 1
+               xdwait
+               add b32 $r5 0x10
+               cmpu b32 $r5 $r3
+       bra ne #crypt_do_in_loop
+       cxset 1
+       xdwait
+       ret
+
+crypt_do_out:
+       add b32 $r3 $r7
+       mov $xdbase $r6
+       mov $r9 #swap
+       sethi $r9 0x20000
+       crypt_do_out_loop:
+               cs0exec 1
+               cxset 0x61
+               xdld $r7 $r9
+               xdst $r7 $r9
+               cxset 1
+               xdwait
+               add b32 $r7 0x10
+               cmpu b32 $r7 $r3
+       bra ne #crypt_do_out_loop
+       ret
+
+crypt_do_inout:
+       add b32 $r3 $r5
+       mov $r9 #swap
+       sethi $r9 0x20000
+       crypt_do_inout_loop:
+               mov $xdbase $r4
+               xdld $r5 $r9
+               xdwait
+               cxset 0x21
+               xdst $r0 $r9
+               cs0exec 1
+               cxset 0x61
+               mov $xdbase $r6
+               xdld $r7 $r9
+               xdst $r7 $r9
+               cxset 1
+               xdwait
+               add b32 $r5 0x10
+               add b32 $r7 0x10
+               cmpu b32 $r5 $r3
+       bra ne #crypt_do_inout_loop
+       ret
+
+.align 0x100
diff --git a/drivers/gpu/drm/nouveau/nv98_crypt.fuc.h b/drivers/gpu/drm/nouveau/nv98_crypt.fuc.h
new file mode 100644 (file)
index 0000000..38676c7
--- /dev/null
@@ -0,0 +1,584 @@
+uint32_t nv98_pcrypt_data[] = {
+/* 0x0000: ctx_dma */
+/* 0x0000: ctx_dma_query */
+       0x00000000,
+/* 0x0004: ctx_dma_src */
+       0x00000000,
+/* 0x0008: ctx_dma_dst */
+       0x00000000,
+/* 0x000c: ctx_query_address_high */
+       0x00000000,
+/* 0x0010: ctx_query_address_low */
+       0x00000000,
+/* 0x0014: ctx_query_counter */
+       0x00000000,
+/* 0x0018: ctx_cond_address_high */
+       0x00000000,
+/* 0x001c: ctx_cond_address_low */
+       0x00000000,
+/* 0x0020: ctx_cond_off */
+       0x00000000,
+/* 0x0024: ctx_src_address_high */
+       0x00000000,
+/* 0x0028: ctx_src_address_low */
+       0x00000000,
+/* 0x002c: ctx_dst_address_high */
+       0x00000000,
+/* 0x0030: ctx_dst_address_low */
+       0x00000000,
+/* 0x0034: ctx_mode */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+/* 0x0040: ctx_key */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+/* 0x0050: ctx_iv */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+/* 0x0080: swap */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+/* 0x00a0: common_cmd_dtable */
+       0x0002000c,
+       0xffffff00,
+       0x00020010,
+       0x0000000f,
+       0x00020014,
+       0x00000000,
+       0x00000192,
+       0xfffffffe,
+       0x00020018,
+       0xffffff00,
+       0x0002001c,
+       0x0000000f,
+       0x000001d7,
+       0xfffffff8,
+       0x00000260,
+       0xffffffff,
+/* 0x00e0: engine_cmd_dtable */
+       0x00020040,
+       0x00000000,
+       0x00020044,
+       0x00000000,
+       0x00020048,
+       0x00000000,
+       0x0002004c,
+       0x00000000,
+       0x00020050,
+       0x00000000,
+       0x00020054,
+       0x00000000,
+       0x00020058,
+       0x00000000,
+       0x0002005c,
+       0x00000000,
+       0x00020024,
+       0xffffff00,
+       0x00020028,
+       0x0000000f,
+       0x0002002c,
+       0xffffff00,
+       0x00020030,
+       0x0000000f,
+       0x00000271,
+       0xfffffff0,
+       0x00010285,
+       0xf000000f,
+/* 0x0150: crypt_dtable */
+       0x04db0321,
+       0x04b1032f,
+       0x04db0339,
+       0x04db034b,
+       0x04db0361,
+       0x04db0377,
+       0x04db0395,
+       0x04db03af,
+       0x04db03cd,
+       0x04db03e3,
+       0x04db03f9,
+       0x04db040f,
+       0x04830429,
+       0x0483043b,
+       0x0483045d,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+};
+
+uint32_t nv98_pcrypt_code[] = {
+       0x17f004bd,
+       0x0010fe35,
+       0xf10004fe,
+       0xf0fff017,
+       0x27f10013,
+       0x21d00400,
+       0x0c15f0c0,
+       0xf00021d0,
+       0x27f10317,
+       0x21d01200,
+       0x1031f400,
+/* 0x002f: spin */
+       0xf40031f4,
+       0x0ef40028,
+/* 0x0035: ih */
+       0x8001cffd,
+       0xb00812c4,
+       0x0bf40024,
+       0x0027f167,
+       0x002bfe77,
+       0xf00007fe,
+       0x23f00027,
+       0x0037f105,
+       0x0034cf14,
+       0xb0014594,
+       0x18f40055,
+       0x0602fa17,
+       0x4af003f8,
+       0x0034d01e,
+       0xd00147f0,
+       0x0ef48034,
+/* 0x0075: ctxload */
+       0x4034cf33,
+       0xb0014f94,
+       0x18f400f5,
+       0x0502fa21,
+       0x57f003f8,
+       0x0267f000,
+/* 0x008c: ctxload_dma_loop */
+       0xa07856bc,
+       0xb6018068,
+       0x87d00884,
+       0x0162b600,
+/* 0x009f: dummyload */
+       0xf0f018f4,
+       0x35d00257,
+/* 0x00a5: noctx */
+       0x0412c480,
+       0xf50024b0,
+       0xf100df0b,
+       0xcf190037,
+       0x33cf4032,
+       0xff24e400,
+       0x1024b607,
+       0x07bf45e4,
+       0xf50054b0,
+       0xf100b90b,
+       0xf1fae057,
+       0xb000ce67,
+       0x18f4c044,
+       0xa057f14d,
+       0x8867f1fc,
+       0x8044b000,
+       0xb03f18f4,
+       0x18f46044,
+       0x5044b019,
+       0xf1741bf4,
+       0xbd220027,
+       0x0233f034,
+       0xf50023d0,
+/* 0x0103: dma_cmd */
+       0xb000810e,
+       0x18f46344,
+       0x0245945e,
+       0xfe8050b7,
+       0x801e39f0,
+       0x40b70053,
+       0x44b60120,
+       0x0043d008,
+/* 0x0123: dtable_cmd */
+       0xb8600ef4,
+       0x18f40446,
+       0x0344b63e,
+       0x980045bb,
+       0x53fd0145,
+       0x0054b004,
+       0x58291bf4,
+       0x46580045,
+       0x0264b001,
+       0x98170bf4,
+       0x67fd0807,
+       0x0164b004,
+       0xf9300bf4,
+       0x0f01f455,
+/* 0x015b: cmd_setctx */
+       0x80280ef4,
+       0x0ef40053,
+/* 0x0161: invalid_bitfield */
+       0x0125f022,
+/* 0x0164: dispatch_error */
+/* 0x0164: illegal_mthd */
+       0x100047f1,
+       0xd00042d0,
+       0x47f04043,
+       0x0004d040,
+/* 0x0174: im_loop */
+       0xf08004cf,
+       0x44b04044,
+       0xf71bf400,
+/* 0x0180: cmddone */
+       0x1d0037f1,
+       0xd00147f0,
+/* 0x018a: nocmd */
+       0x11c40034,
+       0x4001d00c,
+/* 0x0192: cmd_query_get */
+       0x38f201f8,
+       0x0325f001,
+       0x0b0047f1,
+/* 0x019c: ptimer_retry */
+       0xcf4046cf,
+       0x47cf0045,
+       0x0467b840,
+       0x98f41bf4,
+       0x04800504,
+       0x21008020,
+       0x80220580,
+       0x0bfe2306,
+       0x03049800,
+       0xfe1844b6,
+       0x04980047,
+       0x8057f104,
+       0x0253f000,
+       0xf80645fa,
+/* 0x01d7: cmd_cond_mode */
+       0xf400f803,
+       0x25f00131,
+       0x0534b002,
+       0xf41218f4,
+       0x34b00132,
+       0x0b18f402,
+       0x800136f0,
+/* 0x01f2: return */
+       0x00f80803,
+/* 0x01f4: cmd_cond_mode_queryful */
+       0x98060498,
+       0x56c40705,
+       0x0855b6ff,
+       0xfd1844b6,
+       0x47fe0545,
+       0x000bfe00,
+       0x008057f1,
+       0xfa0253f0,
+       0x34b00565,
+       0x131bf402,
+       0x049803f8,
+       0x0044b021,
+       0x800b4cf0,
+       0x00f80804,
+/* 0x022c: cmd_cond_mode_double */
+       0xb61060b6,
+       0x65fa1050,
+       0x9803f805,
+       0x06982005,
+       0x0456b824,
+       0x980b4cf0,
+       0x06982105,
+       0x0456b825,
+       0xfd0b5cf0,
+       0x34b00445,
+       0x0b5cf003,
+       0x800645fd,
+       0x00f80804,
+/* 0x0260: cmd_wrcache_flush */
+       0xf10132f4,
+       0xbd220027,
+       0x0133f034,
+       0xf80023d0,
+/* 0x0271: crypt_cmd_mode */
+       0x0131f400,
+       0xb00225f0,
+       0x18f40f34,
+       0x0132f409,
+/* 0x0283: crypt_cmd_mode_return */
+       0xf80d0380,
+/* 0x0285: crypt_cmd_length */
+       0x0034b000,
+       0xf4fb0bf4,
+       0x47f0033c,
+       0x0743f040,
+       0xf00604fa,
+       0x43f05047,
+       0x0604fa06,
+       0x3cf503f8,
+       0x47f1c407,
+       0x4bfe2100,
+       0x09049800,
+       0x950a0598,
+       0x44b60858,
+       0x0548fd18,
+       0x98ff55c4,
+       0x07980b06,
+       0x0878950c,
+       0xfd1864b6,
+       0x77c40568,
+       0x0d0898ff,
+       0x580284b6,
+       0x95f9a889,
+       0xf9a98958,
+       0x013cf495,
+       0x3cf403f8,
+       0xf803f861,
+       0x18489503,
+       0xbb084994,
+       0x81b60095,
+       0x09088000,
+       0x950a0980,
+       0x69941868,
+       0x0097bb08,
+       0x800081b6,
+       0x09800b08,
+       0x023cf40c,
+       0xf05047f0,
+       0x04fa0643,
+       0xf803f805,
+/* 0x0321: crypt_copy_prep */
+       0x203cf500,
+       0x003cf594,
+       0x003cf588,
+/* 0x032f: crypt_store_prep */
+       0xf500f88c,
+       0xf594103c,
+       0xf88c063c,
+/* 0x0339: crypt_ecb_e_prep */
+       0x303cf500,
+       0x003cf594,
+       0x003cf588,
+       0x003cf5d0,
+/* 0x034b: crypt_ecb_d_prep */
+       0xf500f88c,
+       0xf5c8773c,
+       0xf594303c,
+       0xf588003c,
+       0xf5d4003c,
+       0xf88c003c,
+/* 0x0361: crypt_cbc_e_prep */
+       0x403cf500,
+       0x003cf594,
+       0x063cf588,
+       0x663cf5ac,
+       0x063cf5d0,
+/* 0x0377: crypt_cbc_d_prep */
+       0xf500f88c,
+       0xf5c8773c,
+       0xf594503c,
+       0xf584623c,
+       0xf588063c,
+       0xf5d4603c,
+       0xf5ac203c,
+       0xf88c003c,
+/* 0x0395: crypt_pcbc_e_prep */
+       0x503cf500,
+       0x003cf594,
+       0x063cf588,
+       0x663cf5ac,
+       0x063cf5d0,
+       0x063cf58c,
+/* 0x03af: crypt_pcbc_d_prep */
+       0xf500f8ac,
+       0xf5c8773c,
+       0xf594503c,
+       0xf588003c,
+       0xf5d4013c,
+       0xf5ac163c,
+       0xf58c063c,
+       0xf8ac063c,
+/* 0x03cd: crypt_cfb_e_prep */
+       0x403cf500,
+       0x663cf594,
+       0x003cf5d0,
+       0x063cf588,
+       0x063cf5ac,
+/* 0x03e3: crypt_cfb_d_prep */
+       0xf500f88c,
+       0xf594403c,
+       0xf5d0603c,
+       0xf588063c,
+       0xf5ac603c,
+       0xf88c003c,
+/* 0x03f9: crypt_ofb_prep */
+       0x403cf500,
+       0x663cf594,
+       0x003cf5d0,
+       0x603cf588,
+       0x003cf5ac,
+/* 0x040f: crypt_ctr_prep */
+       0xf500f88c,
+       0xf594503c,
+       0xf5d0613c,
+       0xf5b0163c,
+       0xf588003c,
+       0xf5ac103c,
+       0xf88c003c,
+/* 0x0429: crypt_cbc_mac_prep */
+       0x303cf500,
+       0x003cf594,
+       0x063cf588,
+       0x663cf5ac,
+/* 0x043b: crypt_cmac_finish_complete_prep */
+       0xf500f8d0,
+       0xf594703c,
+       0xf588003c,
+       0xf5ac063c,
+       0xf5ac003c,
+       0xf5d0003c,
+       0xf5bc003c,
+       0xf5ac063c,
+       0xf8d0663c,
+/* 0x045d: crypt_cmac_finish_partial_prep */
+       0x803cf500,
+       0x003cf594,
+       0x063cf588,
+       0x003cf5ac,
+       0x003cf5ac,
+       0x003cf5d0,
+       0x003cf5bc,
+       0x063cf5bc,
+       0x663cf5ac,
+/* 0x0483: crypt_do_in */
+       0xbb00f8d0,
+       0x47fe0035,
+       0x8097f100,
+       0x0293f000,
+/* 0x0490: crypt_do_in_loop */
+       0xf80559fa,
+       0x223cf403,
+       0xf50609fa,
+       0xf898103c,
+       0x1050b603,
+       0xf40453b8,
+       0x3cf4e91b,
+       0xf803f801,
+/* 0x04b1: crypt_do_out */
+       0x0037bb00,
+       0xf10067fe,
+       0xf0008097,
+/* 0x04be: crypt_do_out_loop */
+       0x3cf50293,
+       0x3cf49810,
+       0x0579fa61,
+       0xf40679fa,
+       0x03f8013c,
+       0xb81070b6,
+       0x1bf40473,
+/* 0x04db: crypt_do_inout */
+       0xbb00f8e8,
+       0x97f10035,
+       0x93f00080,
+/* 0x04e5: crypt_do_inout_loop */
+       0x0047fe02,
+       0xf80559fa,
+       0x213cf403,
+       0xf50609fa,
+       0xf498103c,
+       0x67fe613c,
+       0x0579fa00,
+       0xf40679fa,
+       0x03f8013c,
+       0xb61050b6,
+       0x53b81070,
+       0xd41bf404,
+       0x000000f8,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+};