From: Ben Skeggs Date: Fri, 28 Oct 2011 02:06:42 +0000 (+1000) Subject: drm/nvc0/gr: update fuc source to assemble with latest envyas X-Git-Tag: v3.3-rc1~121^2~51^2~55 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=be7f2615d7d14221a106e6c4ec3a64558e6190ed;p=platform%2Fupstream%2Fkernel-adaptation-pc.git drm/nvc0/gr: update fuc source to assemble with latest envyas Signed-off-by: Ben Skeggs --- diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.fuc b/drivers/gpu/drm/nouveau/nvc0_graph.fuc index 2a4b6dc..e6b2288 100644 --- a/drivers/gpu/drm/nouveau/nvc0_graph.fuc +++ b/drivers/gpu/drm/nouveau/nvc0_graph.fuc @@ -71,9 +71,9 @@ queue_put: ld b32 $r9 D[$r13 + 0x4] // PUT xor $r8 8 cmpu b32 $r8 $r9 - bra ne queue_put_next + bra ne #queue_put_next mov $r15 E_CMD_OVERFLOW - call error + call #error ret // store cmd/data on queue @@ -104,7 +104,7 @@ queue_get: ld b32 $r8 D[$r13 + 0x0] // GET ld b32 $r9 D[$r13 + 0x4] // PUT cmpu b32 $r8 $r9 - bra e queue_get_done + bra e #queue_get_done // fetch first cmd/data pair and $r9 $r8 7 shl b32 $r9 3 @@ -135,9 +135,9 @@ nv_rd32: nv_rd32_wait: iord $r12 I[$r11 + 0x000] xbit $r12 $r12 31 - bra ne nv_rd32_wait + bra ne #nv_rd32_wait mov $r10 6 // DONE_MMIO_RD - call wait_doneo + call #wait_doneo iord $r15 I[$r11 + 0x100] // MMIO_RDVAL ret @@ -157,7 +157,7 @@ nv_wr32: nv_wr32_wait: iord $r12 I[$r11 + 0x000] xbit $r12 $r12 31 - bra ne nv_wr32_wait + bra ne #nv_wr32_wait ret // (re)set watchdog timer @@ -193,7 +193,7 @@ $1: shl b32 $r8 6 iord $r8 I[$r8 + 0x000] // DONE xbit $r8 $r8 $r10 - bra $2 wait_done_$1 + bra $2 #wait_done_$1 trace_clr(T_WAIT) ret ') @@ -216,7 +216,7 @@ mmctx_size: add b32 $r9 $r8 add b32 $r14 4 cmpu b32 $r14 $r15 - bra ne nv_mmctx_size_loop + bra ne #nv_mmctx_size_loop mov b32 $r15 $r9 ret @@ -238,12 +238,12 @@ mmctx_xfer: shl b32 $r8 6 clear b32 $r9 or $r11 $r11 - bra e mmctx_base_disabled + bra e #mmctx_base_disabled iowr I[$r8 + 0x000] $r11 // MMCTX_BASE bset $r9 0 // BASE_EN mmctx_base_disabled: or $r14 $r14 - bra e mmctx_multi_disabled + bra e #mmctx_multi_disabled iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK bset $r9 1 // MULTI_EN @@ -264,7 +264,7 @@ mmctx_xfer: mmctx_wait_free: iord $r14 I[$r8 + 0x000] // MMCTX_CTRL and $r14 0x1f - bra e mmctx_wait_free + bra e #mmctx_wait_free // queue up an entry ld b32 $r14 D[$r12] @@ -272,19 +272,19 @@ mmctx_xfer: iowr I[$r8 + 0x300] $r14 add b32 $r12 4 cmpu b32 $r12 $r13 - bra ne mmctx_exec_loop + bra ne #mmctx_exec_loop xbit $r11 $r10 2 - bra ne mmctx_stop + bra ne #mmctx_stop // wait for queue to empty mmctx_fini_wait: iord $r11 I[$r8 + 0x000] // MMCTX_CTRL and $r11 0x1f cmpu b32 $r11 0x10 - bra ne mmctx_fini_wait + bra ne #mmctx_fini_wait mov $r10 2 // DONE_MMCTX - call wait_donez - bra mmctx_done + call #wait_donez + bra #mmctx_done mmctx_stop: xbit $r11 $r10 0 shl b32 $r11 16 // DIR @@ -295,7 +295,7 @@ mmctx_xfer: // wait for STOP_TRIGGER to clear iord $r11 I[$r8 + 0x000] // MMCTX_CTRL xbit $r11 $r11 18 - bra ne mmctx_stop_wait + bra ne #mmctx_stop_wait mmctx_done: trace_clr(T_MMCTX) ret @@ -305,7 +305,7 @@ mmctx_xfer: strand_wait: push $r10 mov $r10 2 - call wait_donez + call #wait_donez pop $r10 ret @@ -316,7 +316,7 @@ strand_pre: sethi $r8 0x20000 mov $r9 0xc iowr I[$r8] $r9 - call strand_wait + call #strand_wait ret // unknown - call after issuing strand commands @@ -326,7 +326,7 @@ strand_post: sethi $r8 0x20000 mov $r9 0xd iowr I[$r8] $r9 - call strand_wait + call #strand_wait ret // Selects strand set?! @@ -341,11 +341,11 @@ strand_set: iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf mov $r12 0xb iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb - call strand_wait + call #strand_wait iowr I[$r10 + 0x000] $r14 // 0x93c = mov $r12 0xa iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa - call strand_wait + call #strand_wait ret // Initialise strand context data @@ -357,22 +357,22 @@ strand_set: // strand_ctx_init: trace_set(T_STRINIT) - call strand_pre + call #strand_pre mov $r14 3 - call strand_set + call #strand_set mov $r10 0x46fc sethi $r10 0x20000 add b32 $r11 $r10 0x400 iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0 mov $r12 1 iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE - call strand_wait + call #strand_wait sub b32 $r12 $r0 1 iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff mov $r12 2 iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT - call strand_wait - call strand_post + call #strand_wait + call #strand_post // read the size of each strand, poke the context offset of // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry @@ -391,7 +391,7 @@ strand_ctx_init: add b32 $r14 $r10 add b32 $r8 4 sub b32 $r9 1 - bra ne ctx_init_strand_loop + bra ne #ctx_init_strand_loop shl b32 $r14 8 sub b32 $r15 $r14 $r15 diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc index 06f5e26..a9e93c8 100644 --- a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc +++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc @@ -32,7 +32,7 @@ * - watchdog timer around ctx operations */ -.section nvc0_grgpc_data +.section #nvc0_grgpc_data include(`nvc0_graph.fuc') gpc_id: .b32 0 gpc_mmio_list_head: .b32 0 @@ -48,40 +48,40 @@ cmd_queue: queue_init // chipset descriptions chipsets: .b8 0xc0 0 0 0 -.b16 nvc0_gpc_mmio_head -.b16 nvc0_gpc_mmio_tail -.b16 nvc0_tpc_mmio_head -.b16 nvc0_tpc_mmio_tail +.b16 #nvc0_gpc_mmio_head +.b16 #nvc0_gpc_mmio_tail +.b16 #nvc0_tpc_mmio_head +.b16 #nvc0_tpc_mmio_tail .b8 0xc1 0 0 0 -.b16 nvc0_gpc_mmio_head -.b16 nvc1_gpc_mmio_tail -.b16 nvc0_tpc_mmio_head -.b16 nvc1_tpc_mmio_tail +.b16 #nvc0_gpc_mmio_head +.b16 #nvc1_gpc_mmio_tail +.b16 #nvc0_tpc_mmio_head +.b16 #nvc1_tpc_mmio_tail .b8 0xc3 0 0 0 -.b16 nvc0_gpc_mmio_head -.b16 nvc0_gpc_mmio_tail -.b16 nvc0_tpc_mmio_head -.b16 nvc3_tpc_mmio_tail +.b16 #nvc0_gpc_mmio_head +.b16 #nvc0_gpc_mmio_tail +.b16 #nvc0_tpc_mmio_head +.b16 #nvc3_tpc_mmio_tail .b8 0xc4 0 0 0 -.b16 nvc0_gpc_mmio_head -.b16 nvc0_gpc_mmio_tail -.b16 nvc0_tpc_mmio_head -.b16 nvc3_tpc_mmio_tail +.b16 #nvc0_gpc_mmio_head +.b16 #nvc0_gpc_mmio_tail +.b16 #nvc0_tpc_mmio_head +.b16 #nvc3_tpc_mmio_tail .b8 0xc8 0 0 0 -.b16 nvc0_gpc_mmio_head -.b16 nvc0_gpc_mmio_tail -.b16 nvc0_tpc_mmio_head -.b16 nvc0_tpc_mmio_tail +.b16 #nvc0_gpc_mmio_head +.b16 #nvc0_gpc_mmio_tail +.b16 #nvc0_tpc_mmio_head +.b16 #nvc0_tpc_mmio_tail .b8 0xce 0 0 0 -.b16 nvc0_gpc_mmio_head -.b16 nvc0_gpc_mmio_tail -.b16 nvc0_tpc_mmio_head -.b16 nvc3_tpc_mmio_tail +.b16 #nvc0_gpc_mmio_head +.b16 #nvc0_gpc_mmio_tail +.b16 #nvc0_tpc_mmio_head +.b16 #nvc3_tpc_mmio_tail .b8 0xcf 0 0 0 -.b16 nvc0_gpc_mmio_head -.b16 nvc0_gpc_mmio_tail -.b16 nvc0_tpc_mmio_head -.b16 nvcf_tpc_mmio_tail +.b16 #nvc0_gpc_mmio_head +.b16 #nvc0_gpc_mmio_tail +.b16 #nvc0_tpc_mmio_head +.b16 #nvcf_tpc_mmio_tail .b8 0 0 0 0 // GPC mmio lists @@ -147,8 +147,8 @@ mmctx_data(0x000544, 1) nvc1_tpc_mmio_tail: -.section nvc0_grgpc_code -bra init +.section #nvc0_grgpc_code +bra #init define(`include_code') include(`nvc0_graph.fuc') @@ -160,10 +160,10 @@ error: push $r14 mov $r14 -0x67ec // 0x9814 sethi $r14 0x400000 - call nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code + call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code add b32 $r14 0x41c mov $r15 1 - call nv_wr32 // HUB_CTXCTL_INTR_UP_SET + call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET pop $r14 ret @@ -190,7 +190,7 @@ init: iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE // setup i0 handler, and route all interrupts to it - mov $r1 ih + mov $r1 #ih mov $iv0 $r1 mov $r1 0x400 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH @@ -210,24 +210,24 @@ init: and $r2 0x1f shl b32 $r3 $r2 sub b32 $r3 1 - st b32 D[$r0 + tpc_count] $r2 - st b32 D[$r0 + tpc_mask] $r3 + st b32 D[$r0 + #tpc_count] $r2 + st b32 D[$r0 + #tpc_mask] $r3 add b32 $r1 0x400 iord $r2 I[$r1 + 0x000] // MYINDEX - st b32 D[$r0 + gpc_id] $r2 + st b32 D[$r0 + #gpc_id] $r2 // find context data for this chipset mov $r2 0x800 shl b32 $r2 6 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] - mov $r1 chipsets - 12 + mov $r1 #chipsets - 12 init_find_chipset: add b32 $r1 12 ld b32 $r3 D[$r1 + 0x00] cmpu b32 $r3 $r2 - bra e init_context + bra e #init_context cmpu b32 $r3 0 - bra ne init_find_chipset + bra ne #init_find_chipset // unknown chipset ret @@ -253,19 +253,19 @@ init: clear b32 $r15 ld b16 $r14 D[$r1 + 4] ld b16 $r15 D[$r1 + 6] - st b16 D[$r0 + gpc_mmio_list_head] $r14 - st b16 D[$r0 + gpc_mmio_list_tail] $r15 - call mmctx_size + st b16 D[$r0 + #gpc_mmio_list_head] $r14 + st b16 D[$r0 + #gpc_mmio_list_tail] $r15 + call #mmctx_size add b32 $r2 $r15 add b32 $r3 $r15 // calculate per-TPC mmio context size, store the list pointers ld b16 $r14 D[$r1 + 8] ld b16 $r15 D[$r1 + 10] - st b16 D[$r0 + tpc_mmio_list_head] $r14 - st b16 D[$r0 + tpc_mmio_list_tail] $r15 - call mmctx_size - ld b32 $r14 D[$r0 + tpc_count] + st b16 D[$r0 + #tpc_mmio_list_head] $r14 + st b16 D[$r0 + #tpc_mmio_list_tail] $r15 + call #mmctx_size + ld b32 $r14 D[$r0 + #tpc_count] mulu $r14 $r15 add b32 $r2 $r14 add b32 $r3 $r14 @@ -283,7 +283,7 @@ init: // calculate size of strand context data mov b32 $r15 $r2 - call strand_ctx_init + call #strand_ctx_init add b32 $r3 $r15 // save context size, and tell HUB we're done @@ -301,13 +301,13 @@ init: main: bset $flags $p0 sleep $p0 - mov $r13 cmd_queue - call queue_get - bra $p1 main + mov $r13 #cmd_queue + call #queue_get + bra $p1 #main // 0x0000-0x0003 are all context transfers cmpu b32 $r14 0x04 - bra nc main_not_ctx_xfer + bra nc #main_not_ctx_xfer // fetch $flags and mask off $p1/$p2 mov $r1 $flags mov $r2 0x0006 @@ -318,14 +318,14 @@ main: or $r1 $r14 mov $flags $r1 // transfer context data - call ctx_xfer - bra main + call #ctx_xfer + bra #main main_not_ctx_xfer: shl b32 $r15 $r14 16 or $r15 E_BAD_COMMAND - call error - bra main + call #error + bra #main // interrupt handler ih: @@ -342,13 +342,13 @@ ih: // incoming fifo command? iord $r10 I[$r0 + 0x200] // INTR and $r11 $r10 0x00000004 - bra e ih_no_fifo + bra e #ih_no_fifo // queue incoming fifo command for later processing mov $r11 0x1900 - mov $r13 cmd_queue + mov $r13 #cmd_queue iord $r14 I[$r11 + 0x100] // FIFO_CMD iord $r15 I[$r11 + 0x000] // FIFO_DATA - call queue_put + call #queue_put add b32 $r11 0x400 mov $r14 1 iowr I[$r11 + 0x000] $r14 // FIFO_ACK @@ -374,11 +374,11 @@ ih: // hub_barrier_done: mov $r15 1 - ld b32 $r14 D[$r0 + gpc_id] + ld b32 $r14 D[$r0 + #gpc_id] shl b32 $r15 $r14 mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET sethi $r14 0x400000 - call nv_wr32 + call #nv_wr32 ret // Disables various things, waits a bit, and re-enables them.. @@ -395,7 +395,7 @@ ctx_redswitch: mov $r15 8 ctx_redswitch_delay: sub b32 $r15 1 - bra ne ctx_redswitch_delay + bra ne #ctx_redswitch_delay mov $r15 0xa20 iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER ret @@ -413,8 +413,8 @@ ctx_xfer: mov $r1 0xa04 shl b32 $r1 6 iowr I[$r1 + 0x000] $r15// MEM_BASE - bra not $p1 ctx_xfer_not_load - call ctx_redswitch + bra not $p1 #ctx_xfer_not_load + call #ctx_redswitch ctx_xfer_not_load: // strands @@ -422,7 +422,7 @@ ctx_xfer: sethi $r1 0x20000 mov $r2 0xc iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c - call strand_wait + call #strand_wait mov $r2 0x47fc sethi $r2 0x20000 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 @@ -435,46 +435,46 @@ ctx_xfer: or $r10 2 // first mov $r11 0x0000 sethi $r11 0x500000 - ld b32 $r12 D[$r0 + gpc_id] + ld b32 $r12 D[$r0 + #gpc_id] shl b32 $r12 15 add b32 $r11 $r12 // base = NV_PGRAPH_GPCn - ld b32 $r12 D[$r0 + gpc_mmio_list_head] - ld b32 $r13 D[$r0 + gpc_mmio_list_tail] + ld b32 $r12 D[$r0 + #gpc_mmio_list_head] + ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] mov $r14 0 // not multi - call mmctx_xfer + call #mmctx_xfer // per-TPC mmio context xbit $r10 $flags $p1 // direction or $r10 4 // last mov $r11 0x4000 sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 - ld b32 $r12 D[$r0 + gpc_id] + ld b32 $r12 D[$r0 + #gpc_id] shl b32 $r12 15 add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 - ld b32 $r12 D[$r0 + tpc_mmio_list_head] - ld b32 $r13 D[$r0 + tpc_mmio_list_tail] - ld b32 $r15 D[$r0 + tpc_mask] + ld b32 $r12 D[$r0 + #tpc_mmio_list_head] + ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] + ld b32 $r15 D[$r0 + #tpc_mask] mov $r14 0x800 // stride = 0x800 - call mmctx_xfer + call #mmctx_xfer // wait for strands to finish - call strand_wait + call #strand_wait // if load, or a save without a load following, do some // unknown stuff that's done after finishing a block of // strand commands - bra $p1 ctx_xfer_post - bra not $p2 ctx_xfer_done + bra $p1 #ctx_xfer_post + bra not $p2 #ctx_xfer_done ctx_xfer_post: mov $r1 0x4afc sethi $r1 0x20000 mov $r2 0xd iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d - call strand_wait + call #strand_wait // mark completion in HUB's barrier ctx_xfer_done: - call hub_barrier_done + call #hub_barrier_done ret .align 256 diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc index e4f8c7e..3ea3196 100644 --- a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc +++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc @@ -27,7 +27,7 @@ * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h */ -.section nvc0_grhub_data +.section #nvc0_grhub_data include(`nvc0_graph.fuc') gpc_count: .b32 0 rop_count: .b32 0 @@ -39,26 +39,26 @@ ctx_current: .b32 0 chipsets: .b8 0xc0 0 0 0 -.b16 nvc0_hub_mmio_head -.b16 nvc0_hub_mmio_tail +.b16 #nvc0_hub_mmio_head +.b16 #nvc0_hub_mmio_tail .b8 0xc1 0 0 0 -.b16 nvc0_hub_mmio_head -.b16 nvc1_hub_mmio_tail +.b16 #nvc0_hub_mmio_head +.b16 #nvc1_hub_mmio_tail .b8 0xc3 0 0 0 -.b16 nvc0_hub_mmio_head -.b16 nvc0_hub_mmio_tail +.b16 #nvc0_hub_mmio_head +.b16 #nvc0_hub_mmio_tail .b8 0xc4 0 0 0 -.b16 nvc0_hub_mmio_head -.b16 nvc0_hub_mmio_tail +.b16 #nvc0_hub_mmio_head +.b16 #nvc0_hub_mmio_tail .b8 0xc8 0 0 0 -.b16 nvc0_hub_mmio_head -.b16 nvc0_hub_mmio_tail +.b16 #nvc0_hub_mmio_head +.b16 #nvc0_hub_mmio_tail .b8 0xce 0 0 0 -.b16 nvc0_hub_mmio_head -.b16 nvc0_hub_mmio_tail +.b16 #nvc0_hub_mmio_head +.b16 #nvc0_hub_mmio_tail .b8 0xcf 0 0 0 -.b16 nvc0_hub_mmio_head -.b16 nvc0_hub_mmio_tail +.b16 #nvc0_hub_mmio_head +.b16 #nvc0_hub_mmio_tail .b8 0 0 0 0 nvc0_hub_mmio_head: @@ -113,8 +113,8 @@ chan_mmio_address: .b32 0 .align 256 xfer_data: .b32 0 -.section nvc0_grhub_code -bra init +.section #nvc0_grhub_code +bra #init define(`include_code') include(`nvc0_graph.fuc') @@ -157,7 +157,7 @@ init: iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE // setup i0 handler, and route all interrupts to it - mov $r1 ih + mov $r1 #ih mov $iv0 $r1 mov $r1 0x400 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH @@ -201,11 +201,11 @@ init: // fetch enabled GPC/ROP counts mov $r14 -0x69fc // 0x409604 sethi $r14 0x400000 - call nv_rd32 + call #nv_rd32 extr $r1 $r15 16:20 - st b32 D[$r0 + rop_count] $r1 + st b32 D[$r0 + #rop_count] $r1 and $r15 0x1f - st b32 D[$r0 + gpc_count] $r15 + st b32 D[$r0 + #gpc_count] $r15 // set BAR_REQMASK to GPC mask mov $r1 1 @@ -220,14 +220,14 @@ init: mov $r2 0x800 shl b32 $r2 6 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] - mov $r15 chipsets - 8 + mov $r15 #chipsets - 8 init_find_chipset: add b32 $r15 8 ld b32 $r3 D[$r15 + 0x00] cmpu b32 $r3 $r2 - bra e init_context + bra e #init_context cmpu b32 $r3 0 - bra ne init_find_chipset + bra ne #init_find_chipset // unknown chipset ret @@ -239,9 +239,9 @@ init: ld b16 $r14 D[$r15 + 4] ld b16 $r15 D[$r15 + 6] sethi $r14 0 - st b32 D[$r0 + hub_mmio_list_head] $r14 - st b32 D[$r0 + hub_mmio_list_tail] $r15 - call mmctx_size + st b32 D[$r0 + #hub_mmio_list_head] $r14 + st b32 D[$r0 + #hub_mmio_list_tail] $r15 + call #mmctx_size // set mmctx base addresses now so we don't have to do it later, // they don't (currently) ever change @@ -260,7 +260,7 @@ init: add b32 $r1 1 shl b32 $r1 8 mov b32 $r15 $r1 - call strand_ctx_init + call #strand_ctx_init add b32 $r1 $r15 // initialise each GPC in sequence by passing in the offset of its @@ -271,40 +271,40 @@ init: // when it has completed, and return the size of its context data // in GPCn_CC_SCRATCH[1] // - ld b32 $r3 D[$r0 + gpc_count] + ld b32 $r3 D[$r0 + #gpc_count] mov $r4 0x2000 sethi $r4 0x500000 init_gpc: // setup, and start GPC ucode running add b32 $r14 $r4 0x804 mov b32 $r15 $r1 - call nv_wr32 // CC_SCRATCH[1] = ctx offset + call #nv_wr32 // CC_SCRATCH[1] = ctx offset add b32 $r14 $r4 0x800 mov b32 $r15 $r2 - call nv_wr32 // CC_SCRATCH[0] = chipset + call #nv_wr32 // CC_SCRATCH[0] = chipset add b32 $r14 $r4 0x10c clear b32 $r15 - call nv_wr32 + call #nv_wr32 add b32 $r14 $r4 0x104 - call nv_wr32 // ENTRY + call #nv_wr32 // ENTRY add b32 $r14 $r4 0x100 mov $r15 2 // CTRL_START_TRIGGER - call nv_wr32 // CTRL + call #nv_wr32 // CTRL // wait for it to complete, and adjust context size add b32 $r14 $r4 0x800 init_gpc_wait: - call nv_rd32 + call #nv_rd32 xbit $r15 $r15 31 - bra e init_gpc_wait + bra e #init_gpc_wait add b32 $r14 $r4 0x804 - call nv_rd32 + call #nv_rd32 add b32 $r1 $r15 // next! add b32 $r4 0x8000 sub b32 $r3 1 - bra ne init_gpc + bra ne #init_gpc // save context size, and tell host we're ready mov $r2 0x800 @@ -322,13 +322,13 @@ main: // sleep until we have something to do bset $flags $p0 sleep $p0 - mov $r13 cmd_queue - call queue_get - bra $p1 main + mov $r13 #cmd_queue + call #queue_get + bra $p1 #main // context switch, requested by GPU? cmpu b32 $r14 0x4001 - bra ne main_not_ctx_switch + bra ne #main_not_ctx_switch trace_set(T_AUTO) mov $r1 0xb00 shl b32 $r1 6 @@ -336,39 +336,39 @@ main: iord $r1 I[$r1 + 0x000] // CHAN_CUR xbit $r3 $r1 31 - bra e chsw_no_prev + bra e #chsw_no_prev xbit $r3 $r2 31 - bra e chsw_prev_no_next + bra e #chsw_prev_no_next push $r2 mov b32 $r2 $r1 trace_set(T_SAVE) bclr $flags $p1 bset $flags $p2 - call ctx_xfer + call #ctx_xfer trace_clr(T_SAVE); pop $r2 trace_set(T_LOAD); bset $flags $p1 - call ctx_xfer + call #ctx_xfer trace_clr(T_LOAD); - bra chsw_done + bra #chsw_done chsw_prev_no_next: push $r2 mov b32 $r2 $r1 bclr $flags $p1 bclr $flags $p2 - call ctx_xfer + call #ctx_xfer pop $r2 mov $r1 0xb00 shl b32 $r1 6 iowr I[$r1] $r2 - bra chsw_done + bra #chsw_done chsw_no_prev: xbit $r3 $r2 31 - bra e chsw_done + bra e #chsw_done bset $flags $p1 bclr $flags $p2 - call ctx_xfer + call #ctx_xfer // ack the context switch request chsw_done: @@ -377,32 +377,32 @@ main: mov $r2 1 iowr I[$r1 + 0x000] $r2 // 0x409b0c trace_clr(T_AUTO) - bra main + bra #main // request to set current channel? (*not* a context switch) main_not_ctx_switch: cmpu b32 $r14 0x0001 - bra ne main_not_ctx_chan + bra ne #main_not_ctx_chan mov b32 $r2 $r15 - call ctx_chan - bra main_done + call #ctx_chan + bra #main_done // request to store current channel context? main_not_ctx_chan: cmpu b32 $r14 0x0002 - bra ne main_not_ctx_save + bra ne #main_not_ctx_save trace_set(T_SAVE) bclr $flags $p1 bclr $flags $p2 - call ctx_xfer + call #ctx_xfer trace_clr(T_SAVE) - bra main_done + bra #main_done main_not_ctx_save: shl b32 $r15 $r14 16 or $r15 E_BAD_COMMAND - call error - bra main + call #error + bra #main main_done: mov $r1 0x820 @@ -410,7 +410,7 @@ main: clear b32 $r2 bset $r2 31 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 - bra main + bra #main // interrupt handler ih: @@ -427,13 +427,13 @@ ih: // incoming fifo command? iord $r10 I[$r0 + 0x200] // INTR and $r11 $r10 0x00000004 - bra e ih_no_fifo + bra e #ih_no_fifo // queue incoming fifo command for later processing mov $r11 0x1900 - mov $r13 cmd_queue + mov $r13 #cmd_queue iord $r14 I[$r11 + 0x100] // FIFO_CMD iord $r15 I[$r11 + 0x000] // FIFO_DATA - call queue_put + call #queue_put add b32 $r11 0x400 mov $r14 1 iowr I[$r11 + 0x000] $r14 // FIFO_ACK @@ -441,18 +441,18 @@ ih: // context switch request? ih_no_fifo: and $r11 $r10 0x00000100 - bra e ih_no_ctxsw + bra e #ih_no_ctxsw // enqueue a context switch for later processing - mov $r13 cmd_queue + mov $r13 #cmd_queue mov $r14 0x4001 - call queue_put + call #queue_put // anything we didn't handle, bring it to the host's attention ih_no_ctxsw: mov $r11 0x104 not b32 $r11 and $r11 $r10 $r11 - bra e ih_no_other + bra e #ih_no_other mov $r10 0xc1c shl b32 $r10 6 iowr I[$r10] $r11 // INTR_UP_SET @@ -478,11 +478,11 @@ ctx_4160s: mov $r14 0x4160 sethi $r14 0x400000 mov $r15 1 - call nv_wr32 + call #nv_wr32 ctx_4160s_wait: - call nv_rd32 + call #nv_rd32 xbit $r15 $r15 4 - bra e ctx_4160s_wait + bra e #ctx_4160s_wait ret // Without clearing again at end of xfer, some things cause PGRAPH @@ -492,7 +492,7 @@ ctx_4160c: mov $r14 0x4160 sethi $r14 0x400000 clear b32 $r15 - call nv_wr32 + call #nv_wr32 ret // Again, not real sure @@ -503,7 +503,7 @@ ctx_4170s: mov $r14 0x4170 sethi $r14 0x400000 or $r15 0x10 - call nv_wr32 + call #nv_wr32 ret // Waits for a ctx_4170s() call to complete @@ -511,9 +511,9 @@ ctx_4170s: ctx_4170w: mov $r14 0x4170 sethi $r14 0x400000 - call nv_rd32 + call #nv_rd32 and $r15 0x10 - bra ne ctx_4170w + bra ne #ctx_4170w ret // Disables various things, waits a bit, and re-enables them.. @@ -530,7 +530,7 @@ ctx_redswitch: mov $r15 8 ctx_redswitch_delay: sub b32 $r15 1 - bra ne ctx_redswitch_delay + bra ne #ctx_redswitch_delay mov $r15 0x770 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL ret @@ -546,10 +546,10 @@ ctx_86c: iowr I[$r14] $r15 // HUB(0x86c) = val mov $r14 -0x75ec sethi $r14 0x400000 - call nv_wr32 // ROP(0xa14) = val + call #nv_wr32 // ROP(0xa14) = val mov $r14 -0x5794 sethi $r14 0x410000 - call nv_wr32 // GPC(0x86c) = val + call #nv_wr32 // GPC(0x86c) = val ret // ctx_load - load's a channel's ctxctl data, and selects its vm @@ -561,7 +561,7 @@ ctx_load: // switch to channel, somewhat magic in parts.. mov $r10 12 // DONE_UNK12 - call wait_donez + call #wait_donez mov $r1 0xa24 shl b32 $r1 6 iowr I[$r1 + 0x000] $r0 // 0x409a24 @@ -576,7 +576,7 @@ ctx_load: ctx_chan_wait_0: iord $r4 I[$r1 + 0x100] and $r4 0x1f - bra ne ctx_chan_wait_0 + bra ne #ctx_chan_wait_0 iowr I[$r3 + 0x000] $r2 // CHAN_CUR // load channel header, fetch PGRAPH context pointer @@ -595,19 +595,19 @@ ctx_load: sethi $r2 0x80000000 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram mov $r1 0x10 // chan + 0x0210 - mov $r2 xfer_data + mov $r2 #xfer_data sethi $r2 0x00020000 // 16 bytes xdld $r1 $r2 xdwait trace_clr(T_LCHAN) // update current context - ld b32 $r1 D[$r0 + xfer_data + 4] + ld b32 $r1 D[$r0 + #xfer_data + 4] shl b32 $r1 24 - ld b32 $r2 D[$r0 + xfer_data + 0] + ld b32 $r2 D[$r0 + #xfer_data + 0] shr b32 $r2 8 or $r1 $r2 - st b32 D[$r0 + ctx_current] $r1 + st b32 D[$r0 + #ctx_current] $r1 // set transfer base to start of context, and fetch context header trace_set(T_LCTXH) @@ -618,7 +618,7 @@ ctx_load: mov $r1 0xa20 shl b32 $r1 6 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm - mov $r1 chan_data + mov $r1 #chan_data sethi $r1 0x00060000 // 256 bytes xdld $r0 $r1 xdwait @@ -635,10 +635,10 @@ ctx_load: // In: $r2 channel address // ctx_chan: - call ctx_4160s - call ctx_load + call #ctx_4160s + call #ctx_load mov $r10 12 // DONE_UNK12 - call wait_donez + call #wait_donez mov $r1 0xa10 shl b32 $r1 6 mov $r2 5 @@ -646,8 +646,8 @@ ctx_chan: ctx_chan_wait: iord $r2 I[$r1 + 0x000] or $r2 $r2 - bra ne ctx_chan_wait - call ctx_4160c + bra ne #ctx_chan_wait + call #ctx_4160c ret // Execute per-context state overrides list @@ -661,7 +661,7 @@ ctx_chan: // ctx_mmio_exec: // set transfer base to be the mmio list - ld b32 $r3 D[$r0 + chan_mmio_address] + ld b32 $r3 D[$r0 + #chan_mmio_address] mov $r2 0xa04 shl b32 $r2 6 iowr I[$r2 + 0x000] $r3 // MEM_BASE @@ -670,31 +670,31 @@ ctx_mmio_exec: ctx_mmio_loop: // fetch next 256 bytes of mmio list if necessary and $r4 $r3 0xff - bra ne ctx_mmio_pull - mov $r5 xfer_data + bra ne #ctx_mmio_pull + mov $r5 #xfer_data sethi $r5 0x00060000 // 256 bytes xdld $r3 $r5 xdwait // execute a single list entry ctx_mmio_pull: - ld b32 $r14 D[$r4 + xfer_data + 0x00] - ld b32 $r15 D[$r4 + xfer_data + 0x04] - call nv_wr32 + ld b32 $r14 D[$r4 + #xfer_data + 0x00] + ld b32 $r15 D[$r4 + #xfer_data + 0x04] + call #nv_wr32 // next! add b32 $r3 8 sub b32 $r1 1 - bra ne ctx_mmio_loop + bra ne #ctx_mmio_loop // set transfer base back to the current context ctx_mmio_done: - ld b32 $r3 D[$r0 + ctx_current] + ld b32 $r3 D[$r0 + #ctx_current] iowr I[$r2 + 0x000] $r3 // MEM_BASE // disable the mmio list now, we don't need/want to execute it again - st b32 D[$r0 + chan_mmio_count] $r0 - mov $r1 chan_data + st b32 D[$r0 + #chan_mmio_count] $r0 + mov $r1 #chan_data sethi $r1 0x00060000 // 256 bytes xdst $r0 $r1 xdwait @@ -709,46 +709,46 @@ ctx_mmio_exec: // on load it means: "a save preceeded this load" // ctx_xfer: - bra not $p1 ctx_xfer_pre - bra $p2 ctx_xfer_pre_load + bra not $p1 #ctx_xfer_pre + bra $p2 #ctx_xfer_pre_load ctx_xfer_pre: mov $r15 0x10 - call ctx_86c - call ctx_4160s - bra not $p1 ctx_xfer_exec + call #ctx_86c + call #ctx_4160s + bra not $p1 #ctx_xfer_exec ctx_xfer_pre_load: mov $r15 2 - call ctx_4170s - call ctx_4170w - call ctx_redswitch + call #ctx_4170s + call #ctx_4170w + call #ctx_redswitch clear b32 $r15 - call ctx_4170s - call ctx_load + call #ctx_4170s + call #ctx_load // fetch context pointer, and initiate xfer on all GPCs ctx_xfer_exec: - ld b32 $r1 D[$r0 + ctx_current] + ld b32 $r1 D[$r0 + #ctx_current] mov $r2 0x414 shl b32 $r2 6 iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset mov $r14 -0x5b00 sethi $r14 0x410000 mov b32 $r15 $r1 - call nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer + call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer add b32 $r14 4 xbit $r15 $flags $p1 xbit $r2 $flags $p2 shl b32 $r2 1 or $r15 $r2 - call nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) + call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) // strands mov $r1 0x4afc sethi $r1 0x20000 mov $r2 0xc iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c - call strand_wait + call #strand_wait mov $r2 0x47fc sethi $r2 0x20000 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 @@ -760,22 +760,22 @@ ctx_xfer: xbit $r10 $flags $p1 // direction or $r10 6 // first, last mov $r11 0 // base = 0 - ld b32 $r12 D[$r0 + hub_mmio_list_head] - ld b32 $r13 D[$r0 + hub_mmio_list_tail] + ld b32 $r12 D[$r0 + #hub_mmio_list_head] + ld b32 $r13 D[$r0 + #hub_mmio_list_tail] mov $r14 0 // not multi - call mmctx_xfer + call #mmctx_xfer // wait for GPCs to all complete mov $r10 8 // DONE_BAR - call wait_doneo + call #wait_doneo // wait for strand xfer to complete - call strand_wait + call #strand_wait // post-op - bra $p1 ctx_xfer_post + bra $p1 #ctx_xfer_post mov $r10 12 // DONE_UNK12 - call wait_donez + call #wait_donez mov $r1 0xa10 shl b32 $r1 6 mov $r2 5 @@ -783,27 +783,27 @@ ctx_xfer: ctx_xfer_post_save_wait: iord $r2 I[$r1] or $r2 $r2 - bra ne ctx_xfer_post_save_wait + bra ne #ctx_xfer_post_save_wait - bra $p2 ctx_xfer_done + bra $p2 #ctx_xfer_done ctx_xfer_post: mov $r15 2 - call ctx_4170s + call #ctx_4170s clear b32 $r15 - call ctx_86c - call strand_post - call ctx_4170w + call #ctx_86c + call #strand_post + call #ctx_4170w clear b32 $r15 - call ctx_4170s + call #ctx_4170s - bra not $p1 ctx_xfer_no_post_mmio - ld b32 $r1 D[$r0 + chan_mmio_count] + bra not $p1 #ctx_xfer_no_post_mmio + ld b32 $r1 D[$r0 + #chan_mmio_count] or $r1 $r1 - bra e ctx_xfer_no_post_mmio - call ctx_mmio_exec + bra e #ctx_xfer_no_post_mmio + call #ctx_mmio_exec ctx_xfer_no_post_mmio: - call ctx_4160c + call #ctx_4160c ctx_xfer_done: ret