r600g: add initial cayman acceleration support.
authorDave Airlie <airlied@redhat.com>
Thu, 10 Mar 2011 02:54:13 +0000 (12:54 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 25 May 2011 01:42:45 +0000 (11:42 +1000)
Cayman is the RadeonHD 69xx series of GPUs. This adds support for
3D acceleration to the r600g driver.

Major changes:
Some context registers moved around - mainly MSAA and clipping/guardband related.
GPR allocation is all dynamic
no vertex cache - all unified in texture cache.
5-wide to 4-wide shader engines (no scalar or trans slot)
- some changes to how instructions are placed into slots
- removal of END_OF_PROGRAM bit in favour of END flow control clause
- no vertex fetch clause - TC accepts vertex or texture

Signed-off-by: Dave Airlie <airlied@redhat.com>
12 files changed:
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/evergreend.h
src/gallium/drivers/r600/r600.h
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_opcodes.h
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/winsys/r600/drm/evergreen_hw_context.c
src/gallium/winsys/r600/drm/r600_drm.c
src/gallium/winsys/r600/drm/radeon_pciid.c

index 3793b91..fb0b0f1 100644 (file)
@@ -62,14 +62,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
-               bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+               bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
-                       S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
-                       S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst);
+               if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */
+                       bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+               id++;
+
                break;
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
@@ -80,6 +83,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+       case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
                bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
                bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
                                        S_SQ_CF_WORD1_BARRIER(1) |
index 187f00e..dcb2806 100644 (file)
@@ -71,15 +71,19 @@ static void evergreen_set_blend_color(struct pipe_context *ctx,
 static void *evergreen_create_blend_state(struct pipe_context *ctx,
                                        const struct pipe_blend_state *state)
 {
+       struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend);
        struct r600_pipe_state *rstate;
        u32 color_control, target_mask;
        /* FIXME there is more then 8 framebuffer */
        unsigned blend_cntl[8];
+       enum radeon_family family;
 
        if (blend == NULL) {
                return NULL;
        }
+
+       family = r600_get_family(rctx->radeon);
        rstate = &blend->rstate;
 
        rstate->id = R600_PIPE_STATE_BLEND;
@@ -102,9 +106,16 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
                }
        }
        blend->cb_target_mask = target_mask;
+       
        r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
                                color_control, 0xFFFFFFFD, NULL);
-       r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+
+       if (family != CHIP_CAYMAN)
+               r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+       else {
+               r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+       }
 
        for (int i = 0; i < 8; i++) {
                /* state->rt entries > 0 only written if independent blending */
@@ -229,11 +240,15 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 static void *evergreen_create_rs_state(struct pipe_context *ctx,
                                        const struct pipe_rasterizer_state *state)
 {
+       struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
        struct r600_pipe_state *rstate;
        unsigned tmp;
        unsigned prov_vtx = 1, polygon_dual_mode;
        unsigned clip_rule;
+       enum radeon_family family;
+
+       family = r600_get_family(rctx->radeon);
 
        if (rs == NULL) {
                return NULL;
@@ -290,17 +305,30 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
        tmp = (unsigned)state->line_width * 8;
        r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
 
-       r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
+       if (family == CHIP_CAYMAN) {
+               r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL,
+                                       S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
+                                       0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
 
-       r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
-                               S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
-                               0xFFFFFFFF, NULL);
 
+       } else {
+               r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+
+               r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+
+               r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
+                                       S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
+                                       0xFFFFFFFF, NULL);
+       }
+       r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
        return rstate;
 }
@@ -832,10 +860,13 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
        u32 shader_mask, tl, br, target_mask;
+       enum radeon_family family;
 
        if (rstate == NULL)
                return;
 
+       family = r600_get_family(rctx->radeon);
+
        evergreen_context_flush_dest_caches(&rctx->ctx);
        rctx->ctx.num_dest_buffers = state->nr_cbufs;
 
@@ -898,10 +929,17 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
                                0x00000000, target_mask, NULL);
        r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
                                shader_mask, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
-                               0x00000000, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
-                               0x00000000, 0xFFFFFFFF, NULL);
+
+
+       if (family == CHIP_CAYMAN) {
+               r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+       } else {
+               r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+       }
 
        free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
        rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
@@ -968,6 +1006,85 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
        rctx->context.texture_barrier = evergreen_texture_barrier;
 }
 
+static void cayman_init_config(struct r600_pipe_context *rctx)
+{
+       struct r600_pipe_state *rstate = &rctx->config;
+       unsigned tmp;
+
+       tmp = 0x00000000;
+       tmp |= S_008C00_EXPORT_SRC_C(1);
+       r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+
+       r600_pipe_state_add_reg(rstate, CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, (4 << 28), 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
+
+       r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
+
+       r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
+
+       r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
+
+       r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
+
+       r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0);
+       r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0);
+
+       r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL);
+       r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL);
+
+       r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL);
+       r600_context_pipe_state_set(&rctx->ctx, rstate);
+}
+
 void evergreen_init_config(struct r600_pipe_context *rctx)
 {
        struct r600_pipe_state *rstate = &rctx->config;
@@ -999,6 +1116,12 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
        unsigned tmp;
 
        family = r600_get_family(rctx->radeon);
+
+       if (family == CHIP_CAYMAN) {
+               cayman_init_config(rctx);
+               return;
+       }
+               
        ps_prio = 0;
        vs_prio = 1;
        gs_prio = 2;
index 3e87810..ee0c7c9 100644 (file)
 #define EVERGREEN_CONTEXT_REG_END                   0X00029000
 #define EVERGREEN_RESOURCE_OFFSET                   0x00030000
 #define EVERGREEN_RESOURCE_END                      0x00034000
+#define CAYMAN_RESOURCE_END                         0x00038000
 #define EVERGREEN_LOOP_CONST_OFFSET                 0x0003A200
 #define EVERGREEN_LOOP_CONST_END                    0x0003A26C
 #define EVERGREEN_BOOL_CONST_OFFSET                 0x0003A500
 #define EVERGREEN_BOOL_CONST_END                    0x0003A506
+#define CAYMAN_BOOL_CONST_END                       0x0003A518
 #define EVERGREEN_SAMPLER_OFFSET                    0X0003C000
 #define EVERGREEN_SAMPLER_END                       0X0003CFF0
+#define CAYMAN_SAMPLER_END                          0X0003C600
 
 #define EVERGREEN_CTL_CONST_OFFSET                  0x0003CFF0
 #define EVERGREEN_CTL_CONST_END                     0x0003E200
+#define CAYMAN_CTL_CONST_END                        0x0003FF0C
 
 #define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
 #define EVENT_TYPE_ZPASS_DONE                  0x15
 #define ENDIAN_8IN32   2
 #define ENDIAN_8IN64   3
 
+#define CM_R_0288E8_SQ_LDS_ALLOC                     0x000288E8
+
+#define CM_R_028804_DB_EQAA 0x00028804
+
+#define CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0        0x00028BD4
+#define CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1        0x00028BD8
+#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc
+#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0
+#define CM_R_028BE4_PA_SU_VTX_CNTL 0x28be4
+#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8
+#define CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ 0x28bec
+#define CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ 0x28bf0
+#define CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ 0x28bf4
+
+#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8
+#define CM_R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 0x28bfc
+#define CM_R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 0x28c00
+#define CM_R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 0x28c04
+
+#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08
+#define CM_R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 0x28c0c
+#define CM_R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 0x28c10
+#define CM_R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 0x28c14
+
+#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18
+#define CM_R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 0x28c1c
+#define CM_R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 0x28c20
+#define CM_R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 0x28c24
+
+#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28
+#define CM_R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 0x28c2c
+#define CM_R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 0x28c30
+#define CM_R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 0x28c34
+
+#define CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 0x28c38
+#define CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 0x28c3c
+
+#define CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1              0x00008C10
+#define CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2              0x00008C14
 #endif
index 7b57fc8..996418a 100644 (file)
@@ -95,6 +95,7 @@ enum radeon_family {
        CHIP_BARTS,
        CHIP_TURKS,
        CHIP_CAICOS,
+       CHIP_CAYMAN,
        CHIP_LAST,
 };
 
@@ -102,6 +103,7 @@ enum chip_class {
        R600,
        R700,
        EVERGREEN,
+       CAYMAN,
 };
 
 struct r600_tiling_info {
index 033e846..00572cb 100644 (file)
@@ -94,6 +94,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
                }
                break;
        case CHIPREV_EVERGREEN:
+       case CHIPREV_CAYMAN:
                switch (alu->inst) {
                case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
                        return 0;
@@ -226,6 +227,9 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
        case CHIP_CAICOS:
                bc->chiprev = CHIPREV_EVERGREEN;
                break;
+       case CHIP_CAYMAN:
+               bc->chiprev = CHIPREV_CAYMAN;
+               break;
        default:
                R600_ERR("unknown family %d\n", bc->family);
                return -EINVAL;
@@ -334,6 +338,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
        case CHIPREV_EVERGREEN:
+       case CHIPREV_CAYMAN:
        default:
                return !alu->is_op3 && (
                        alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
@@ -384,6 +389,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
        case CHIPREV_EVERGREEN:
+       case CHIPREV_CAYMAN:
        default:
                return !alu->is_op3 && (
                        alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
@@ -401,6 +407,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
                return !alu->is_op3 &&
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
        case CHIPREV_EVERGREEN:
+       case CHIPREV_CAYMAN:
        default:
                return !alu->is_op3 &&
                        alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
@@ -417,6 +424,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
                        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
        case CHIPREV_EVERGREEN:
+       case CHIPREV_CAYMAN:
        default:
                return !alu->is_op3 && (
                        alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
@@ -469,6 +477,7 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
                                alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
                                alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
        case CHIPREV_EVERGREEN:
+       case CHIPREV_CAYMAN:
        default:
                if (!alu->is_op3)
                        /* Note that FLT_TO_INT_* instructions are vector-only instructions
@@ -514,13 +523,16 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
 {
        struct r600_bc_alu *alu;
        unsigned i, chan, trans;
+       int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
 
-       for (i = 0; i < 5; i++)
+       for (i = 0; i < max_slots; i++)
                assignment[i] = NULL;
 
        for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
                chan = alu->dst.chan;
-               if (is_alu_trans_unit_inst(bc, alu))
+               if (max_slots == 4)
+                       trans = 0;
+               else if (is_alu_trans_unit_inst(bc, alu))
                        trans = 1;
                else if (is_alu_vec_unit_inst(bc, alu))
                        trans = 0;
@@ -719,8 +731,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
        struct alu_bank_swizzle bs;
        int bank_swizzle[5];
        int i, r = 0, forced = 0;
-       boolean scalar_only = true;
-       for (i = 0; i < 5; i++) {
+       boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true;
+       int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
+
+       for (i = 0; i < max_slots; i++) {
                if (slots[i] && slots[i]->bank_swizzle_force) {
                        slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
                        forced = 1;
@@ -737,6 +751,13 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
                bank_swizzle[i] = SQ_ALU_VEC_012;
        bank_swizzle[4] = SQ_ALU_SCL_210;
        while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+
+               if (max_slots == 4) {
+                       for (i = 0; i < max_slots; i++) {
+                               if (bank_swizzle[i] == SQ_ALU_VEC_210)
+                                 return -1;
+                       }
+               }
                init_bank_swizzle(&bs);
                if (scalar_only == false) {
                        for (i = 0; i < 4; i++) {
@@ -749,11 +770,11 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
                } else
                        r = 0;
 
-               if (!r && slots[4]) {
+               if (!r && slots[4] && max_slots == 5) {
                        r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
                }
                if (!r) {
-                       for (i = 0; i < 5; i++) {
+                       for (i = 0; i < max_slots; i++) {
                                if (slots[i])
                                        slots[i]->bank_swizzle = bank_swizzle[i];
                        }
@@ -763,7 +784,7 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
                if (scalar_only) {
                        bank_swizzle[4]++;
                } else {
-                       for (i = 0; i < 5; i++) {
+                       for (i = 0; i < max_slots; i++) {
                                bank_swizzle[i]++;
                                if (bank_swizzle[i] <= SQ_ALU_VEC_210)
                                        break;
@@ -783,12 +804,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
        struct r600_bc_alu *prev[5];
        int gpr[5], chan[5];
        int i, j, r, src, num_src;
+       int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
 
        r = assign_alu_units(bc, alu_prev, prev);
        if (r)
                return r;
 
-       for (i = 0; i < 5; ++i) {
+       for (i = 0; i < max_slots; ++i) {
                if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
                        gpr[i] = prev[i]->dst.sel;
                        /* cube writes more than PV.X */
@@ -800,7 +822,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
                        gpr[i] = -1;
        }
 
-       for (i = 0; i < 5; ++i) {
+       for (i = 0; i < max_slots; ++i) {
                struct r600_bc_alu *alu = slots[i];
                if(!alu)
                        continue;
@@ -810,11 +832,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
                        if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
                                continue;
 
-                       if (alu->src[src].sel == gpr[4] &&
-                               alu->src[src].chan == chan[4]) {
-                               alu->src[src].sel = V_SQ_ALU_SRC_PS;
-                               alu->src[src].chan = 0;
-                               continue;
+                       if (bc->chiprev < CHIPREV_CAYMAN) {
+                               if (alu->src[src].sel == gpr[4] &&
+                                   alu->src[src].chan == chan[4]) {
+                                       alu->src[src].sel = V_SQ_ALU_SRC_PS;
+                                       alu->src[src].chan = 0;
+                                       continue;
+                               }
                        }
 
                        for (j = 0; j < 4; ++j) {
@@ -922,12 +946,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
        int i, j, r, src, num_src;
        int num_once_inst = 0;
        int have_mova = 0, have_rel = 0;
+       int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
 
        r = assign_alu_units(bc, alu_prev, prev);
        if (r)
                return r;
 
-       for (i = 0; i < 5; ++i) {
+       for (i = 0; i < max_slots; ++i) {
                struct r600_bc_alu *alu;
 
                /* check number of literals */
@@ -951,7 +976,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
                        result[i] = prev[i];
                        continue;
                } else if (prev[i] && slots[i]) {
-                       if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+                       if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
                                /* Trans unit is still free try to use it. */
                                if (is_alu_any_unit_inst(bc, slots[i])) {
                                        result[i] = prev[i];
@@ -991,7 +1016,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
                        if (!is_gpr(alu->src[src].sel))
                                continue;
 
-                       for (j = 0; j < 5; ++j) {
+                       for (j = 0; j < max_slots; ++j) {
                                if (!prev[j] || !prev[j]->dst.write)
                                        continue;
 
@@ -1019,7 +1044,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
        bc->cf_last->ndw -= align(prev_nliteral, 2);
 
        /* sort instructions */
-       for (i = 0; i < 5; ++i) {
+       for (i = 0; i < max_slots; ++i) {
                slots[i] = result[i];
                if (result[i]) {
                        LIST_DEL(&result[i]->list);
@@ -1032,7 +1057,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
        LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
 
        /* determine new first instruction */
-       for (i = 0; i < 5; ++i) {
+       for (i = 0; i < max_slots; ++i) {
                if (result[i]) {
                        bc->cf_last->curr_bs_head = result[i];
                        break;
@@ -1225,6 +1250,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
                uint32_t literal[4];
                unsigned nliteral;
                struct r600_bc_alu *slots[5];
+               int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;
                r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
                if (r)
                        return r;
@@ -1245,7 +1271,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
                if (r)
                        return r;
 
-               for (i = 0, nliteral = 0; i < 5; i++) {
+               for (i = 0, nliteral = 0; i < max_slots; i++) {
                        if (slots[i]) {
                                r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
                                if (r)
@@ -1282,6 +1308,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
                return 16;
 
        case CHIPREV_EVERGREEN:
+       case CHIPREV_CAYMAN:
                return 64;
 
        default:
@@ -1290,6 +1317,19 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
        }
 }
 
+static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc)
+{
+       if (bc->chiprev == CHIPREV_CAYMAN) {
+               if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC)
+                       return TRUE;
+       } else {
+               if (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
+                   bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)
+                       return TRUE;
+       }
+       return FALSE;
+}
+
 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 {
        struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -1301,15 +1341,17 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 
        /* cf can contains only alu or only vtx or only tex */
        if (bc->cf_last == NULL ||
-               (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
-                bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) ||
-                bc->force_add_cf) {
+           last_inst_was_vtx_fetch(bc) ||
+           bc->force_add_cf) {
                r = r600_bc_add_cf(bc);
                if (r) {
                        free(nvtx);
                        return r;
                }
-               bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
+               if (bc->chiprev == CHIPREV_CAYMAN)
+                       bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC;
+               else
+                       bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
        }
        LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
        /* each fetch use 4 dwords */
@@ -1379,14 +1421,21 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
        return 0;
 }
 
+int cm_bc_add_cf_end(struct r600_bc *bc)
+{
+       return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END);
+}
+
 /* common to all 3 families */
 static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
 {
-       bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+       bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
                        S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
                        S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
-                       S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
-                       S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
+                       S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
+       if (bc->chiprev < CHIPREV_CAYMAN)
+               bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
+       id++;
        bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
                                S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
                                S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
@@ -1397,9 +1446,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
                                S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
                                S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
                                S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
-       bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) |
-                               S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) |
-                               S_SQ_VTX_WORD2_MEGA_FETCH(1);
+       bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)|
+                               S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian);
+       if (bc->chiprev < CHIPREV_CAYMAN)
+               bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1);
+       id++;
        bc->bytecode[id++] = 0;
        return 0;
 }
@@ -1601,6 +1652,7 @@ int r600_bc_build(struct r600_bc *bc)
                case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
                case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
                case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+               case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
                        break;
                default:
                        R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -1616,7 +1668,7 @@ int r600_bc_build(struct r600_bc *bc)
                return -ENOMEM;
        LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
                addr = cf->addr;
-               if (bc->chiprev == CHIPREV_EVERGREEN)
+               if (bc->chiprev >= CHIPREV_EVERGREEN)
                        r = eg_bc_cf_build(bc, cf);
                else
                        r = r600_bc_cf_build(bc, cf);
@@ -1640,6 +1692,7 @@ int r600_bc_build(struct r600_bc *bc)
                                        break;
                                case CHIPREV_R700:
                                case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */
+                               case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */
                                        r = r700_bc_alu_build(bc, alu, addr);
                                        break;
                                default:
@@ -1668,6 +1721,14 @@ int r600_bc_build(struct r600_bc *bc)
                        }
                        break;
                case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+                       if (bc->chiprev == CHIPREV_CAYMAN) {
+                               LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+                                       r = r600_bc_vtx_build(bc, vtx, addr);
+                                       if (r)
+                                               return r;
+                                       addr += 4;
+                               }
+                       }
                        LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
                                r = r600_bc_tex_build(bc, tex, addr);
                                if (r)
@@ -1688,6 +1749,7 @@ int r600_bc_build(struct r600_bc *bc)
                case V_SQ_CF_WORD1_SQ_CF_INST_POP:
                case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
                case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+               case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
                        break;
                default:
                        R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -1752,6 +1814,9 @@ void r600_bc_dump(struct r600_bc *bc)
        case 2:
                chip = 'E';
                break;
+       case 3:
+               chip = 'C';
+               break;
        case 0:
        default:
                chip = '6';
@@ -1818,6 +1883,7 @@ void r600_bc_dump(struct r600_bc *bc)
                case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
                case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
                case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+               case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
                        fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
                        fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
                        id++;
@@ -1920,7 +1986,10 @@ void r600_bc_dump(struct r600_bc *bc)
                        fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
                        fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr);
                        fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x);
-                       fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
+                       if (bc->chiprev < CHIPREV_CAYMAN)
+                               fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
+                       else
+                               fprintf(stderr, "SEL_Y:%d) ", 0);
                        fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr);
                        fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x);
                        fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y);
index 26d337f..540f45b 100644 (file)
@@ -205,6 +205,8 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
 void r600_bc_dump(struct r600_bc *bc);
 
+int cm_bc_add_cf_end(struct r600_bc *bc);
+
 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
 
 /* r700_asm.c */
index a85d0bb..b19cc2b 100644 (file)
 #define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT             0x00000027
 #define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE        0x00000028
 
+/* cayman doesn't have VTX */
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_NOP                             0x00000000
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX                             0x00000001
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_TC                              0x00000001
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX                             0x00000002
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_2                          0x00000002
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_GDS                             0x00000003
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START                      0x00000004
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END                        0x00000005
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_WAIT_ACK                        0x0000001a
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_TC_ACK                          0x0000001b
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_VC_ACK                          0x0000001c
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_28                         0x0000001c
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMPTABLE                       0x0000001d
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_GLOBAL_WAVE_SYNC                0x0000001e
 #define     EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT                            0x0000001f
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT                            0x0000001f
+
+/* cayman extras */
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_END                             0x00000020
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_LDS_DEALLOC                     0x00000021
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_PUSH_WQM                        0x00000022
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_POP_WQM                         0x00000023
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_ELSE_WQM                        0x00000024
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_JUMP_ANY                        0x00000025
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE                      0x00000026
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE_WQM                  0x00000027
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT                       0x00000028
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT_AND_SLEEP             0x00000029
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_SET_PRIORITY                    0x00000030
 
 #define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU                         0x00000008
 #define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE             0x00000009
 #define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER               0x0000000A
 #define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER              0x0000000B
 #define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED                    0x0000000C
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                0x0000000D
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   0x0000000E
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                0x0000000D /* different on CAYMAN */
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   0x0000000E /* different on CAYMAN */
 #define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER              0x0000000F
 
+#define     CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_REACTIVATE_BEFORE       0x0000000D
+#define     CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_VALID_PIXEL_MODE        0x0000000E
+
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD                       0x00000000
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL                       0x00000001
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE                  0x00000002
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADDC_UINT                 0x00000052
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUBB_UINT                 0x00000053
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_BARRIER             0x00000054
-#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN           0x00000055
-#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END             0x00000056
+#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN           0x00000055 /* not on CAYMAN */
+#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END             0x00000056 /* not on CAYMAN */
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_MODE                  0x00000057
-#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0               0x00000058
-#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1               0x00000059
+#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0               0x00000058 /* not on CAYMAN */
+#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1               0x00000059 /* not on CAYMAN */
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_LDS_SIZE              0x0000005A
 
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE                  0x00000081
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT                 0x00000090
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT                0x00000091
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT                0x00000092
-#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT                 0x00000093
-#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT                0x00000094
+#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT                 0x00000093 /* not on CAYMAN */
+#define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT                0x00000094 /* not on CAYMAN */
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_64                  0x00000095
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED_64          0x00000096
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_64              0x00000097
 #define CHIPREV_R600      0
 #define CHIPREV_R700      1
 #define CHIPREV_EVERGREEN 2
+#define CHIPREV_CAYMAN    3
 
-#define BC_INST(bc, x) ((bc)->chiprev == CHIPREV_EVERGREEN ? EG_##x : x)
+#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x)
 
-#define CTX_INST(x) (ctx->bc->chiprev == CHIPREV_EVERGREEN ? EG_##x : x)
+#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x)
 
 #endif
index 080180f..402ccb2 100644 (file)
@@ -262,6 +262,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
        case CHIP_BARTS:
        case CHIP_TURKS:
        case CHIP_CAICOS:
+       case CHIP_CAYMAN:
                evergreen_init_state_functions(rctx);
                if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
                        r600_destroy_context(&rctx->context);
@@ -336,6 +337,7 @@ static const char *r600_get_family_name(enum radeon_family family)
        case CHIP_BARTS: return "AMD BARTS";
        case CHIP_TURKS: return "AMD TURKS";
        case CHIP_CAICOS: return "AMD CAICOS";
+       case CHIP_CAYMAN: return "AMD CAYMAN";
        default: return "AMD unknown";
        }
 }
index a27f495..39e6d85 100644 (file)
 #include <errno.h>
 #include <byteswap.h>
 
+/* CAYMAN notes 
+Why CAYMAN got loops for lots of instructions is explained here.
+
+-These 8xx t-slot only ops are implemented in all vector slots.
+MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
+These 8xx t-slot only opcodes become vector ops, with all four 
+slots expecting the arguments on sources a and b. Result is 
+broadcast to all channels.
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+These 8xx t-slot only opcodes become vector ops in the z, y, and 
+x slots.
+EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
+RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
+SQRT_IEEE/_64
+SIN/COS
+The w slot may have an independent co-issued operation, or if the 
+result is required to be in the w slot, the opcode above may be 
+issued in the w slot as well.
+The compiler must issue the source argument to slots z, y, and x
+*/
+
+
 int r600_find_vs_semantic_index(struct r600_shader *vs,
                                struct r600_shader *ps, int id)
 {
@@ -185,7 +207,7 @@ struct r600_shader_tgsi_instruction {
        int (*process)(struct r600_shader_ctx *ctx);
 };
 
-static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
+static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
 
 static int tgsi_is_supported(struct r600_shader_ctx *ctx)
@@ -296,7 +318,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
                ctx->shader->input[i].centroid = d->Declaration.Centroid;
                ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
-               if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
+               if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) {
                        /* turn input into interpolate on EG */
                        if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
                                if (ctx->shader->input[i].interpolate > 0) {
@@ -623,13 +645,13 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
        }
        if (ctx.type == TGSI_PROCESSOR_VERTEX) {
                ctx.file_offset[TGSI_FILE_INPUT] = 1;
-               if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
+               if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
                        r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
                } else {
                        r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
                }
        }
-       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
+       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
                ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
        }
        ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
@@ -683,7 +705,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
                                goto out_err;
                        if ((r = tgsi_split_literal_constant(&ctx)))
                                goto out_err;
-                       if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
+                       if (ctx.bc->chiprev == CHIPREV_CAYMAN)
+                               ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
+                       else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN)
                                ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
                        else
                                ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
@@ -804,8 +828,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
        }
        /* set export done on last export of each type */
        for (i = noutput - 1, output_done = 0; i >= 0; i--) {
-               if (i == (noutput - 1)) {
-                       output[i].end_of_program = 1;
+               if (ctx.bc->chiprev < CHIPREV_CAYMAN) {
+                       if (i == (noutput - 1)) {
+                               output[i].end_of_program = 1;
+                       }
                }
                if (!(output_done & (1 << output[i].type))) {
                        output_done |= (1 << output[i].type);
@@ -818,6 +844,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
                if (r)
                        goto out_err;
        }
+       /* add program end */
+       if (ctx.bc->chiprev == CHIPREV_CAYMAN)
+               cm_bc_add_cf_end(ctx.bc);
+
        free(ctx.literals);
        tgsi_parse_free(&ctx.parse);
        return 0;
@@ -937,6 +967,31 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
        return tgsi_op2_s(ctx, 1);
 }
 
+static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int i, j, r;
+       struct r600_bc_alu alu;
+       int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+       
+       for (i = 0 ; i < last_slot; i++) {
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               alu.inst = ctx->inst_info->r600_opcode;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r600_bc_src(&alu.src[j], &ctx->src[j], 0);
+               }
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+
+               if (i == last_slot - 1)
+                       alu.last = 1;
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 /*
  * r600 - trunc to -PI..PI range
  * r700 - normalize by dividing by 2PI
@@ -1017,6 +1072,37 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int cayman_trig(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bc_alu alu;
+       int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+       int i, r;
+
+       r = tgsi_setup_trig(ctx);
+       if (r)
+               return r;
+
+
+       for (i = 0; i < last_slot; i++) {
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               alu.inst = ctx->inst_info->r600_opcode;
+               alu.dst.chan = i;
+
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+
+               alu.src[0].sel = ctx->temp_reg;
+               alu.src[0].chan = 0;
+               if (i == last_slot - 1)
+                       alu.last = 1;
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 static int tgsi_trig(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -1064,7 +1150,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        struct r600_bc_alu alu;
-       int r;
+       int i, r;
 
        /* We'll only need the trig stuff if we are going to write to the
         * X or Y components of the destination vector.
@@ -1077,30 +1163,69 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 
        /* dst.x = COS */
        if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
-               tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0 ; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
+                               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+                               if (i == 0)
+                                       alu.dst.write = 1;
+                               else
+                                       alu.dst.write = 0;
+                               alu.src[0].sel = ctx->temp_reg;
+                               alu.src[0].chan = 0;
+                               if (i == 2)
+                                       alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
+                       tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
 
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 0;
-               alu.last = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 0;
+                       alu.last = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
        }
 
        /* dst.y = SIN */
        if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
-               tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0 ; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
+                               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                               if (i == 1)
+                                       alu.dst.write = 1;
+                               else
+                                       alu.dst.write = 0;
+                               alu.src[0].sel = ctx->temp_reg;
+                               alu.src[0].chan = 0;
+                               if (i == 2)
+                                       alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
+                       tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
 
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 0;
-               alu.last = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 0;
+                       alu.last = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
        }
 
        /* dst.z = 0.0; */
@@ -1220,16 +1345,36 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
        {
                int chan;
                int sel;
+               int i;
 
-               /* dst.z = log(src.y) */
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
-               r600_bc_src(&alu.src[0], &ctx->src[0], 1);
-               tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
-               alu.last = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               /* dst.z = log(src.y) */
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
+                               r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+                               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                               if (i == 2) {
+                                       alu.dst.write = 1;
+                                       alu.last = 1;
+                               } else
+                                       alu.dst.write = 0;
+                               
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       /* dst.z = log(src.y) */
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
+                       r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+                       tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
+                       alu.last = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
 
                chan = alu.dst.chan;
                sel = alu.dst.sel;
@@ -1251,16 +1396,35 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
 
-               /* dst.z = exp(tmp.x) */
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 0;
-               tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
-               alu.last = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               /* dst.z = exp(tmp.x) */
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                               alu.src[0].sel = ctx->temp_reg;
+                               alu.src[0].chan = 0;
+                               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                               if (i == 2) {
+                                       alu.dst.write = 1;
+                                       alu.last = 1;
+                               } else
+                                       alu.dst.write = 0;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       /* dst.z = exp(tmp.x) */
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 0;
+                       tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
+                       alu.last = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
        }
        return 0;
 }
@@ -1336,6 +1500,56 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
        return tgsi_helper_tempx_replicate(ctx);
 }
 
+static int cayman_pow(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int i, r;
+       struct r600_bc_alu alu;
+       int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+
+       for (i = 0; i < 3; i++) {
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = i;
+               alu.dst.write = 1;
+               if (i == 2)
+                       alu.last = 1;
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       /* b * LOG2(a) */
+       memset(&alu, 0, sizeof(struct r600_bc_alu));
+       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
+       r600_bc_src(&alu.src[0], &ctx->src[1], 0);
+       alu.src[1].sel = ctx->temp_reg;
+       alu.dst.sel = ctx->temp_reg;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bc_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       for (i = 0; i < last_slot; i++) {
+               /* POW(a,b) = EXP2(b * LOG2(a))*/
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+               alu.src[0].sel = ctx->temp_reg;
+
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+               if (i == last_slot - 1)
+                       alu.last = 1;
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 static int tgsi_pow(struct r600_shader_ctx *ctx)
 {
        struct r600_bc_alu alu;
@@ -1554,24 +1768,46 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
 
        if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+               int out_chan;
                /* Add perspective divide */
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-               r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       out_chan = 2;
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+                               r600_bc_src(&alu.src[0], &ctx->src[0], 3);
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 3;
-               alu.last = 1;
-               alu.dst.write = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 2)
+                                       alu.last = 1;
+                               if (out_chan == i)
+                                       alu.dst.write = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+
+               } else {
+                       out_chan = 3;
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+                       r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = out_chan;
+                       alu.last = 1;
+                       alu.dst.write = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
 
                for (i = 0; i < 3; i++) {
                        memset(&alu, 0, sizeof(struct r600_bc_alu));
                        alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
                        alu.src[0].sel = ctx->temp_reg;
-                       alu.src[0].chan = 3;
+                       alu.src[0].chan = out_chan;
                        r600_bc_src(&alu.src[1], &ctx->src[0], i);
                        alu.dst.sel = ctx->temp_reg;
                        alu.dst.chan = i;
@@ -1616,18 +1852,37 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                }
 
                /* tmp1.z = RCP_e(|tmp1.z|) */
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 2;
-               alu.src[0].abs = 1;
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 2;
-               alu.dst.write = 1;
-               alu.last = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+                               alu.src[0].sel = ctx->temp_reg;
+                               alu.src[0].chan = 2;
+                               alu.src[0].abs = 1;
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 2)
+                                       alu.dst.write = 1;
+                               if (i == 2)
+                                       alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 2;
+                       alu.src[0].abs = 1;
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = 2;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
 
                /* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
                 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
@@ -1962,6 +2217,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        struct r600_bc_alu alu;
        int r;
+       int i;
 
        /* result.x = 2^floor(src); */
        if (inst->Dst[0].Register.WriteMask & 1) {
@@ -1978,20 +2234,38 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
 
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 0;
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                               alu.src[0].sel = ctx->temp_reg;
+                               alu.src[0].chan = 0;
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 0;
-               alu.dst.write = 1;
-               alu.last = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
-       }
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 0)
+                                       alu.dst.write = 1;
+                               if (i == 2)
+                                       alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 0;
 
-       /* result.y = tmp - floor(tmp); */
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = 0;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+
+       /* result.y = tmp - floor(tmp); */
        if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
                memset(&alu, 0, sizeof(struct r600_bc_alu));
 
@@ -2016,19 +2290,38 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 
        /* result.z = RoughApprox2ToX(tmp);*/
        if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.write = 1;
-               alu.dst.chan = 2;
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 2) {
+                                       alu.dst.write = 1;
+                                       alu.last = 1;
+                               }
 
-               alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                       r600_bc_src(&alu.src[0], &ctx->src[0], 0);
 
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.write = 1;
+                       alu.dst.chan = 2;
+
+                       alu.last = 1;
+
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
        }
 
        /* result.w = 1.0;*/
@@ -2055,21 +2348,42 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        struct r600_bc_alu alu;
        int r;
+       int i;
 
        /* result.x = floor(log2(src)); */
        if (inst->Dst[0].Register.WriteMask & 1) {
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
 
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+                               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+                       
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 0) 
+                                       alu.dst.write = 1;
+                               if (i == 2)
+                                       alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 0;
-               alu.dst.write = 1;
-               alu.last = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+                       r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+                       
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = 0;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
 
                alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
                alu.src[0].sel = ctx->temp_reg;
@@ -2087,19 +2401,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 
        /* result.y = src.x / (2 ^ floor(log2(src.x))); */
        if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
 
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 1;
-               alu.dst.write = 1;
-               alu.last = 1;
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+                               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
 
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 1)
+                                       alu.dst.write = 1;
+                               if (i == 2)
+                                       alu.last = 1;
+                               
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;       
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+                       r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = 1;
+                       alu.dst.write = 1;
+                       alu.last = 1;
+
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
 
                memset(&alu, 0, sizeof(struct r600_bc_alu));
 
@@ -2116,35 +2451,73 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
 
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 1;
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                               alu.src[0].sel = ctx->temp_reg;
+                               alu.src[0].chan = 1;
+
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 1)
+                                       alu.dst.write = 1;
+                               if (i == 2)
+                                       alu.last = 1;
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 1;
-               alu.dst.write = 1;
-               alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 1;
 
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = 1;
+                       alu.dst.write = 1;
+                       alu.last = 1;
 
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
 
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 1;
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+                               alu.src[0].sel = ctx->temp_reg;
+                               alu.src[0].chan = 1;
+
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               if (i == 1)
+                                       alu.dst.write = 1;
+                               if (i == 2)
+                                       alu.last = 1;
+                               
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 1;
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 1;
-               alu.dst.write = 1;
-               alu.last = 1;
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = 1;
+                       alu.dst.write = 1;
+                       alu.last = 1;
 
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
 
                memset(&alu, 0, sizeof(struct r600_bc_alu));
 
@@ -2167,19 +2540,39 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 
        /* result.z = log2(src);*/
        if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
+                       for (i = 0; i < 3; i++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
 
-               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+                               alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+                               r600_bc_src(&alu.src[0], &ctx->src[0], 0);
 
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.write = 1;
-               alu.dst.chan = 2;
-               alu.last = 1;
+                               alu.dst.sel = ctx->temp_reg;
+                               if (i == 2)
+                                       alu.dst.write = 1;
+                               alu.dst.chan = i;
+                               if (i == 2)
+                                       alu.last = 1;
 
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+               } else {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+                       alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
+                       r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.write = 1;
+                       alu.dst.chan = 2;
+                       alu.last = 1;
+
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
        }
 
        /* result.w = 1.0; */
@@ -2950,3 +3343,161 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_LAST,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 };
+
+static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
+       {TGSI_OPCODE_ARL,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
+       {TGSI_OPCODE_MOV,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
+       {TGSI_OPCODE_LIT,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
+       {TGSI_OPCODE_RCP,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
+       {TGSI_OPCODE_RSQ,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
+       {TGSI_OPCODE_EXP,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
+       {TGSI_OPCODE_LOG,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
+       {TGSI_OPCODE_MUL,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
+       {TGSI_OPCODE_ADD,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
+       {TGSI_OPCODE_DP3,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+       {TGSI_OPCODE_DP4,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+       {TGSI_OPCODE_DST,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
+       {TGSI_OPCODE_MIN,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
+       {TGSI_OPCODE_MAX,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
+       {TGSI_OPCODE_SLT,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
+       {TGSI_OPCODE_SGE,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
+       {TGSI_OPCODE_MAD,       1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
+       {TGSI_OPCODE_SUB,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
+       {TGSI_OPCODE_LRP,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
+       {TGSI_OPCODE_CND,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       /* gap */
+       {20,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_DP2A,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       /* gap */
+       {22,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {23,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_FRC,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
+       {TGSI_OPCODE_CLAMP,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_FLR,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
+       {TGSI_OPCODE_ROUND,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_EX2,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
+       {TGSI_OPCODE_LG2,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
+       {TGSI_OPCODE_POW,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
+       {TGSI_OPCODE_XPD,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
+       /* gap */
+       {32,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ABS,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
+       {TGSI_OPCODE_RCC,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_DPH,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+       {TGSI_OPCODE_COS,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
+       {TGSI_OPCODE_DDX,       0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
+       {TGSI_OPCODE_DDY,       0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
+       {TGSI_OPCODE_KILP,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
+       {TGSI_OPCODE_PK2H,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_PK2US,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_PK4B,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_PK4UB,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_RFL,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SEQ,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
+       {TGSI_OPCODE_SFL,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SGT,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
+       {TGSI_OPCODE_SIN,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
+       {TGSI_OPCODE_SLE,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
+       {TGSI_OPCODE_SNE,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
+       {TGSI_OPCODE_STR,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_TEX,       0, SQ_TEX_INST_SAMPLE, tgsi_tex},
+       {TGSI_OPCODE_TXD,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_TXP,       0, SQ_TEX_INST_SAMPLE, tgsi_tex},
+       {TGSI_OPCODE_UP2H,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UP2US,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UP4B,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UP4UB,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_X2D,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ARA,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ARR,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
+       {TGSI_OPCODE_BRA,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CAL,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_RET,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SSG,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
+       {TGSI_OPCODE_CMP,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
+       {TGSI_OPCODE_SCS,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
+       {TGSI_OPCODE_TXB,       0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
+       {TGSI_OPCODE_NRM,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_DIV,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_DP2,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+       {TGSI_OPCODE_TXL,       0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
+       {TGSI_OPCODE_BRK,       0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
+       {TGSI_OPCODE_IF,        0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
+       /* gap */
+       {75,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {76,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ELSE,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
+       {TGSI_OPCODE_ENDIF,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
+       /* gap */
+       {79,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {80,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_PUSHA,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_POPA,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CEIL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_I2F,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_NOT,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_TRUNC,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
+       {TGSI_OPCODE_SHL,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       /* gap */
+       {88,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_AND,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_OR,        0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_MOD,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_XOR,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SAD,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_TXF,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_TXQ,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CONT,      0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
+       {TGSI_OPCODE_EMIT,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ENDPRIM,   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_BGNLOOP,   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
+       {TGSI_OPCODE_BGNSUB,    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ENDLOOP,   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
+       {TGSI_OPCODE_ENDSUB,    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       /* gap */
+       {103,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {104,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {105,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {106,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_NOP,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       /* gap */
+       {108,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {109,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {110,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {111,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_NRM4,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CALLNZ,    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_IFC,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_BREAKC,    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_KIL,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
+       {TGSI_OPCODE_END,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
+       /* gap */
+       {118,                   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_F2I,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_IDIV,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_IMAX,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_IMIN,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_INEG,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ISGE,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ISHR,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ISLT,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_F2U,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_U2F,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UADD,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UDIV,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UMAD,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UMAX,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UMIN,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UMOD,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UMUL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_USEQ,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_USGE,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_USHR,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_USLT,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_USNE,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SWITCH,    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CASE,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_DEFAULT,   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_LAST,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+};
index 2a2c37f..6fcb54f 100644 (file)
@@ -57,6 +57,19 @@ static const struct r600_reg evergreen_config_reg_list[] = {
        {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0},
 };
 
+
+static const struct r600_reg cayman_config_reg_list[] = {
+       {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0},
+       {R_008A14_PA_CL_ENHANCE, 0, 0, 0},
+       {R_008C00_SQ_CONFIG, 0, 0, 0},
+       {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0},
+       {CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0, 0},
+       {CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0, 0},
+       {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0},
+       {R_009100_SPI_CONFIG_CNTL, 0, 0, 0},
+       {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0},
+};
+
 static const struct r600_reg evergreen_ctl_const_list[] = {
        {R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0},
        {R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0},
@@ -421,6 +434,385 @@ static const struct r600_reg evergreen_context_reg_list[] = {
        {R_028EAC_CB_COLOR11_DIM, 0, 0, 0},
 };
 
+static const struct r600_reg cayman_context_reg_list[] = {
+       {R_028000_DB_RENDER_CONTROL, 0, 0, 0},
+       {R_028004_DB_COUNT_CONTROL, 0, 0, 0},
+       {R_028008_DB_DEPTH_VIEW, 0, 0, 0},
+       {R_02800C_DB_RENDER_OVERRIDE, 0, 0, 0},
+       {R_028010_DB_RENDER_OVERRIDE2, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028028_DB_STENCIL_CLEAR, 0, 0, 0},
+       {R_02802C_DB_DEPTH_CLEAR, 0, 0, 0},
+       {R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0},
+       {R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028040_DB_Z_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028044_DB_STENCIL_INFO, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028048_DB_Z_READ_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_02804C_DB_STENCIL_READ_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028050_DB_Z_WRITE_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028054_DB_STENCIL_WRITE_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028058_DB_DEPTH_SIZE, 0, 0, 0},
+       {R_02805C_DB_DEPTH_SLICE, 0, 0, 0},
+       {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0},
+       {R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0},
+       {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0},
+       {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0},
+       {R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0},
+       {R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0},
+       {R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0},
+       {R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0},
+       {R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0},
+       {R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0},
+       {R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0},
+       {R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0},
+       {R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0},
+       {R_028230_PA_SC_EDGERULE, 0, 0, 0},
+       {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0, 0},
+       {R_028238_CB_TARGET_MASK, 0, 0, 0},
+       {R_02823C_CB_SHADER_MASK, 0, 0, 0},
+       {R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0},
+       {R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0},
+       {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0},
+       {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0},
+       {R_028350_SX_MISC, 0, 0, 0},
+       {R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0},
+       {R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0},
+       {R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0},
+       {R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0},
+       {R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0},
+       {R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0},
+       {R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0},
+       {R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0},
+       {R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0},
+       {R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0},
+       {R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0},
+       {R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0},
+       {R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0},
+       {R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0},
+       {R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0},
+       {R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0},
+       {R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0},
+       {R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0},
+       {R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0},
+       {R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0},
+       {R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0},
+       {R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0},
+       {R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0},
+       {R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0},
+       {R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0},
+       {R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0},
+       {R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0},
+       {R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0},
+       {R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0},
+       {R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0},
+       {R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0},
+       {R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0},
+       {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0},
+       {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0},
+       {R_028400_VGT_MAX_VTX_INDX, 0, 0, 0},
+       {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0},
+       {R_028408_VGT_INDX_OFFSET, 0, 0, 0},
+       {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0},
+       {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
+       {R_028414_CB_BLEND_RED, 0, 0, 0},
+       {R_028418_CB_BLEND_GREEN, 0, 0, 0},
+       {R_02841C_CB_BLEND_BLUE, 0, 0, 0},
+       {R_028420_CB_BLEND_ALPHA, 0, 0, 0},
+       {R_028430_DB_STENCILREFMASK, 0, 0, 0},
+       {R_028434_DB_STENCILREFMASK_BF, 0, 0, 0},
+       {R_028438_SX_ALPHA_REF, 0, 0, 0},
+       {R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0},
+       {R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0},
+       {R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0},
+       {R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0},
+       {R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0},
+       {R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0},
+       {R_0285BC_PA_CL_UCP0_X, 0, 0, 0},
+       {R_0285C0_PA_CL_UCP0_Y, 0, 0, 0},
+       {R_0285C4_PA_CL_UCP0_Z, 0, 0, 0},
+       {R_0285C8_PA_CL_UCP0_W, 0, 0, 0},
+       {R_0285CC_PA_CL_UCP1_X, 0, 0, 0},
+       {R_0285D0_PA_CL_UCP1_Y, 0, 0, 0},
+       {R_0285D4_PA_CL_UCP1_Z, 0, 0, 0},
+       {R_0285D8_PA_CL_UCP1_W, 0, 0, 0},
+       {R_0285DC_PA_CL_UCP2_X, 0, 0, 0},
+       {R_0285E0_PA_CL_UCP2_Y, 0, 0, 0},
+       {R_0285E4_PA_CL_UCP2_Z, 0, 0, 0},
+       {R_0285E8_PA_CL_UCP2_W, 0, 0, 0},
+       {R_0285EC_PA_CL_UCP3_X, 0, 0, 0},
+       {R_0285F0_PA_CL_UCP3_Y, 0, 0, 0},
+       {R_0285F4_PA_CL_UCP3_Z, 0, 0, 0},
+       {R_0285F8_PA_CL_UCP3_W, 0, 0, 0},
+       {R_0285FC_PA_CL_UCP4_X, 0, 0, 0},
+       {R_028600_PA_CL_UCP4_Y, 0, 0, 0},
+       {R_028604_PA_CL_UCP4_Z, 0, 0, 0},
+       {R_028608_PA_CL_UCP4_W, 0, 0, 0},
+       {R_02860C_PA_CL_UCP5_X, 0, 0, 0},
+       {R_028610_PA_CL_UCP5_Y, 0, 0, 0},
+       {R_028614_PA_CL_UCP5_Z, 0, 0, 0},
+       {R_028618_PA_CL_UCP5_W, 0, 0, 0},
+       {R_02861C_SPI_VS_OUT_ID_0, 0, 0, 0},
+       {R_028620_SPI_VS_OUT_ID_1, 0, 0, 0},
+       {R_028624_SPI_VS_OUT_ID_2, 0, 0, 0},
+       {R_028628_SPI_VS_OUT_ID_3, 0, 0, 0},
+       {R_02862C_SPI_VS_OUT_ID_4, 0, 0, 0},
+       {R_028630_SPI_VS_OUT_ID_5, 0, 0, 0},
+       {R_028634_SPI_VS_OUT_ID_6, 0, 0, 0},
+       {R_028638_SPI_VS_OUT_ID_7, 0, 0, 0},
+       {R_02863C_SPI_VS_OUT_ID_8, 0, 0, 0},
+       {R_028640_SPI_VS_OUT_ID_9, 0, 0, 0},
+       {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0},
+       {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0},
+       {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0},
+       {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0},
+       {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0},
+       {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0},
+       {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0},
+       {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0},
+       {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0},
+       {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0},
+       {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0},
+       {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0},
+       {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0},
+       {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0},
+       {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0},
+       {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0},
+       {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0},
+       {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0},
+       {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0},
+       {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0},
+       {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0},
+       {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0},
+       {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0},
+       {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0},
+       {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0},
+       {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0},
+       {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0},
+       {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0},
+       {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0},
+       {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0},
+       {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0},
+       {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0},
+       {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0},
+       {R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0},
+       {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0},
+       {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0},
+       {R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0},
+       {R_0286D8_SPI_INPUT_Z, 0, 0, 0},
+       {R_0286DC_SPI_FOG_CNTL, 0, 0, 0},
+       {R_0286E0_SPI_BARYC_CNTL, 0, 0, 0},
+       {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0, 0},
+       {R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0, 0, 0},
+       {R_028780_CB_BLEND0_CONTROL, 0, 0, 0},
+       {R_028784_CB_BLEND1_CONTROL, 0, 0, 0},
+       {R_028788_CB_BLEND2_CONTROL, 0, 0, 0},
+       {R_02878C_CB_BLEND3_CONTROL, 0, 0, 0},
+       {R_028790_CB_BLEND4_CONTROL, 0, 0, 0},
+       {R_028794_CB_BLEND5_CONTROL, 0, 0, 0},
+       {R_028798_CB_BLEND6_CONTROL, 0, 0, 0},
+       {R_02879C_CB_BLEND7_CONTROL, 0, 0, 0},
+       {R_028800_DB_DEPTH_CONTROL, 0, 0, 0},
+       {CM_R_028804_DB_EQAA, 0, 0, 0},
+       {R_028808_CB_COLOR_CONTROL, 0, 0, 0},
+       {R_02880C_DB_SHADER_CONTROL, 0, 0, 0},
+       {R_028810_PA_CL_CLIP_CNTL, 0, 0, 0},
+       {R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0},
+       {R_028818_PA_CL_VTE_CNTL, 0, 0, 0},
+       {R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0},
+       {R_028820_PA_CL_NANINF_CNTL, 0, 0, 0},
+       {R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0},
+       {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0},
+       {R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0},
+       {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0},
+       {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0},
+       {R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0},
+       {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0},
+       {CM_R_0288E8_SQ_LDS_ALLOC, 0, 0, 0},
+       {R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0},
+       {R_028900_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0},
+       {R_028904_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0},
+       {R_028908_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0},
+       {R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0},
+       {R_028910_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0},
+       {R_028914_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0},
+       {R_02891C_SQ_GS_VERT_ITEMSIZE, 0, 0, 0},
+       {R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0},
+       {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0},
+       {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0},
+       {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF},
+       {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0},
+       {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0},
+       {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0},
+       {R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0},
+       {R_028A14_VGT_HOS_CNTL, 0, 0, 0},
+       {R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0},
+       {R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0},
+       {R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0},
+       {R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0},
+       {R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0},
+       {R_028A2C_VGT_GROUP_DECR, 0, 0, 0},
+       {R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0},
+       {R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0},
+       {R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0},
+       {R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0},
+       {R_028A40_VGT_GS_MODE, 0, 0, 0},
+       {R_028A48_PA_SC_MODE_CNTL_0, 0, 0, 0},
+       {R_028A4C_PA_SC_MODE_CNTL_1, 0, 0, 0},
+       {R_028AB4_VGT_REUSE_OFF, 0, 0, 0},
+       {R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0},
+       {R_028ABC_DB_HTILE_SURFACE, 0, 0, 0},
+       {R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0, 0, 0},
+       {R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0},
+       {R_028AC8_DB_PRELOAD_CONTROL, 0, 0, 0},
+       {R_028B54_VGT_SHADER_STAGES_EN, 0, 0, 0},
+       {R_028B70_DB_ALPHA_TO_MASK, 0, 0, 0},
+       {R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0},
+       {R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0},
+       {R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0},
+       {R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0},
+       {R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0},
+       {R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0},
+       {R_028B94_VGT_STRMOUT_CONFIG, 0, 0, 0},
+       {R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0, 0, 0},
+       {CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0, 0, 0},
+       {CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0, 0, 0},
+       {CM_R_028BDC_PA_SC_LINE_CNTL, 0, 0, 0},
+       {CM_R_028BE0_PA_SC_AA_CONFIG, 0, 0, 0},
+       {CM_R_028BE4_PA_SU_VTX_CNTL, 0, 0, 0},
+       {CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0},
+       {CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0},
+       {CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0},
+       {CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0},
+       {CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0, 0, 0},
+       {CM_R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, 0, 0, 0},
+       {CM_R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0, 0, 0},
+       {CM_R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0, 0, 0},
+       {CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0, 0, 0},
+       {CM_R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, 0, 0, 0},
+       {CM_R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0, 0, 0},
+       {CM_R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0, 0, 0},
+       {CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0, 0, 0},
+       {CM_R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, 0, 0, 0},
+       {CM_R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0, 0, 0},
+       {CM_R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0, 0, 0},
+       {CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0, 0, 0},
+       {CM_R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, 0, 0, 0},
+       {CM_R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, 0, 0, 0},
+       {CM_R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, 0, 0, 0},
+       {CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0, 0, 0},
+       {CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028C60_CB_COLOR0_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028C64_CB_COLOR0_PITCH, 0, 0, 0},
+       {R_028C68_CB_COLOR0_SLICE, 0, 0, 0},
+       {R_028C6C_CB_COLOR0_VIEW, 0, 0, 0},
+       {R_028C70_CB_COLOR0_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028C74_CB_COLOR0_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028C78_CB_COLOR0_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028C9C_CB_COLOR1_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028CA0_CB_COLOR1_PITCH, 0, 0, 0},
+       {R_028CA4_CB_COLOR1_SLICE, 0, 0, 0},
+       {R_028CA8_CB_COLOR1_VIEW, 0, 0, 0},
+       {R_028CAC_CB_COLOR1_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028CB0_CB_COLOR1_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028CB4_CB_COLOR1_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028CD8_CB_COLOR2_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028CDC_CB_COLOR2_PITCH, 0, 0, 0},
+       {R_028CE0_CB_COLOR2_SLICE, 0, 0, 0},
+       {R_028CE4_CB_COLOR2_VIEW, 0, 0, 0},
+       {R_028CE8_CB_COLOR2_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028CEC_CB_COLOR2_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028CF0_CB_COLOR2_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028D14_CB_COLOR3_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028D18_CB_COLOR3_PITCH, 0, 0, 0},
+       {R_028D1C_CB_COLOR3_SLICE, 0, 0, 0},
+       {R_028D20_CB_COLOR3_VIEW, 0, 0, 0},
+       {R_028D24_CB_COLOR3_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028D28_CB_COLOR3_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028D2C_CB_COLOR3_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028D50_CB_COLOR4_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028D54_CB_COLOR4_PITCH, 0, 0, 0},
+       {R_028D58_CB_COLOR4_SLICE, 0, 0, 0},
+       {R_028D5C_CB_COLOR4_VIEW, 0, 0, 0},
+       {R_028D60_CB_COLOR4_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028D64_CB_COLOR4_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028D68_CB_COLOR4_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028D8C_CB_COLOR5_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028D90_CB_COLOR5_PITCH, 0, 0, 0},
+       {R_028D94_CB_COLOR5_SLICE, 0, 0, 0},
+       {R_028D98_CB_COLOR5_VIEW, 0, 0, 0},
+       {R_028D9C_CB_COLOR5_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028DA0_CB_COLOR5_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028DA4_CB_COLOR5_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028DC8_CB_COLOR6_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028DCC_CB_COLOR6_PITCH, 0, 0, 0},
+       {R_028DD0_CB_COLOR6_SLICE, 0, 0, 0},
+       {R_028DD4_CB_COLOR6_VIEW, 0, 0, 0},
+       {R_028DD8_CB_COLOR6_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028DDC_CB_COLOR6_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028DE0_CB_COLOR6_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028E04_CB_COLOR7_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E08_CB_COLOR7_PITCH, 0, 0, 0},
+       {R_028E0C_CB_COLOR7_SLICE, 0, 0, 0},
+       {R_028E10_CB_COLOR7_VIEW, 0, 0, 0},
+       {R_028E14_CB_COLOR7_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028E18_CB_COLOR7_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E1C_CB_COLOR7_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028E40_CB_COLOR8_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E44_CB_COLOR8_PITCH, 0, 0, 0},
+       {R_028E48_CB_COLOR8_SLICE, 0, 0, 0},
+       {R_028E4C_CB_COLOR8_VIEW, 0, 0, 0},
+       {R_028E50_CB_COLOR8_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028E54_CB_COLOR8_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E58_CB_COLOR8_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028E5C_CB_COLOR9_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E60_CB_COLOR9_PITCH, 0, 0, 0},
+       {R_028E64_CB_COLOR9_SLICE, 0, 0, 0},
+       {R_028E68_CB_COLOR9_VIEW, 0, 0, 0},
+       {R_028E6C_CB_COLOR9_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028E70_CB_COLOR9_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E74_CB_COLOR9_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028E78_CB_COLOR10_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E7C_CB_COLOR10_PITCH, 0, 0, 0},
+       {R_028E80_CB_COLOR10_SLICE, 0, 0, 0},
+       {R_028E84_CB_COLOR10_VIEW, 0, 0, 0},
+       {R_028E88_CB_COLOR10_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028E8C_CB_COLOR10_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E90_CB_COLOR10_DIM, 0, 0, 0},
+       {GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
+       {R_028E94_CB_COLOR11_BASE, REG_FLAG_NEED_BO, 0, 0},
+       {R_028E98_CB_COLOR11_PITCH, 0, 0, 0},
+       {R_028E9C_CB_COLOR11_SLICE, 0, 0, 0},
+       {R_028EA0_CB_COLOR11_VIEW, 0, 0, 0},
+       {R_028EA4_CB_COLOR11_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+       {R_028EA8_CB_COLOR11_ATTRIB, REG_FLAG_NEED_BO, 0, 0},
+       {R_028EAC_CB_COLOR11_DIM, 0, 0, 0},
+};
+
 /* SHADER RESOURCE R600/R700 */
 static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset)
 {
@@ -519,12 +911,20 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
        }
 
        /* add blocks */
-       r = r600_context_add_block(ctx, evergreen_config_reg_list,
-                                  Elements(evergreen_config_reg_list), PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET);
+       if (r600_get_family(radeon) == CHIP_CAYMAN) 
+               r = r600_context_add_block(ctx, cayman_config_reg_list,
+                                          Elements(cayman_config_reg_list), PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET);
+       else
+               r = r600_context_add_block(ctx, evergreen_config_reg_list,
+                                          Elements(evergreen_config_reg_list), PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET);
        if (r)
                goto out_err;
-       r = r600_context_add_block(ctx, evergreen_context_reg_list,
-                                  Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
+       if (r600_get_family(radeon) == CHIP_CAYMAN) 
+               r = r600_context_add_block(ctx, cayman_context_reg_list,
+                                          Elements(cayman_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
+       else
+               r = r600_context_add_block(ctx, evergreen_context_reg_list,
+                                          Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
        if (r)
                goto out_err;
        r = r600_context_add_block(ctx, evergreen_ctl_const_list,
index 311324f..cbed89d 100644 (file)
@@ -327,6 +327,11 @@ static struct radeon *radeon_new(int fd, unsigned device)
                /* set default group bytes, overridden by tiling info ioctl */
                radeon->tiling_info.group_bytes = 512;
                break;
+       case CHIP_CAYMAN:
+               radeon->chip_class = CAYMAN;
+               /* set default group bytes, overridden by tiling info ioctl */
+               radeon->tiling_info.group_bytes = 512;
+               break;
        default:
                fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
                        __func__, radeon->device);
index 35db37a..b999e45 100644 (file)
@@ -451,6 +451,21 @@ static const struct pci_id radeon_pci_id[] = {
        {0x1002, 0x9805, CHIP_PALM},
        {0x1002, 0x9806, CHIP_PALM},
        {0x1002, 0x9807, CHIP_PALM},
+       {0x1002, 0x6700, CHIP_CAYMAN},
+       {0x1002, 0x6701, CHIP_CAYMAN},
+       {0x1002, 0x6702, CHIP_CAYMAN},
+       {0x1002, 0x6703, CHIP_CAYMAN},
+       {0x1002, 0x6704, CHIP_CAYMAN},
+       {0x1002, 0x6705, CHIP_CAYMAN},
+       {0x1002, 0x6706, CHIP_CAYMAN},
+       {0x1002, 0x6707, CHIP_CAYMAN},
+       {0x1002, 0x6708, CHIP_CAYMAN},
+       {0x1002, 0x6709, CHIP_CAYMAN},
+       {0x1002, 0x6718, CHIP_CAYMAN},
+       {0x1002, 0x6719, CHIP_CAYMAN},
+       {0x1002, 0x671C, CHIP_CAYMAN},
+       {0x1002, 0x671D, CHIP_CAYMAN},
+       {0x1002, 0x671F, CHIP_CAYMAN},
        {0x1002, 0x6720, CHIP_BARTS},
        {0x1002, 0x6721, CHIP_BARTS},
        {0x1002, 0x6722, CHIP_BARTS},