llvmpipe/gallivm/draw: introduce a buffer type.
[platform/upstream/mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27
28 #include "draw_llvm.h"
29
30 #include "draw_context.h"
31 #include "draw_vs.h"
32 #include "draw_gs.h"
33
34 #include "gallivm/lp_bld_arit.h"
35 #include "gallivm/lp_bld_arit_overflow.h"
36 #include "gallivm/lp_bld_bitarit.h"
37 #include "gallivm/lp_bld_gather.h"
38 #include "gallivm/lp_bld_logic.h"
39 #include "gallivm/lp_bld_const.h"
40 #include "gallivm/lp_bld_coro.h"
41 #include "gallivm/lp_bld_swizzle.h"
42 #include "gallivm/lp_bld_struct.h"
43 #include "gallivm/lp_bld_type.h"
44 #include "gallivm/lp_bld_flow.h"
45 #include "gallivm/lp_bld_debug.h"
46 #include "gallivm/lp_bld_tgsi.h"
47 #include "gallivm/lp_bld_nir.h"
48 #include "gallivm/lp_bld_printf.h"
49 #include "gallivm/lp_bld_intr.h"
50 #include "gallivm/lp_bld_init.h"
51 #include "gallivm/lp_bld_type.h"
52 #include "gallivm/lp_bld_pack.h"
53 #include "gallivm/lp_bld_format.h"
54 #include "gallivm/lp_bld_misc.h"
55 #include "tgsi/tgsi_exec.h"
56 #include "tgsi/tgsi_dump.h"
57
58 #include "util/u_math.h"
59 #include "util/u_pointer.h"
60 #include "util/u_string.h"
61 #include "nir_serialize.h"
62 #include "util/mesa-sha1.h"
63 #define DEBUG_STORE 0
64
65
66 static void
67 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
68
69
70 struct draw_gs_llvm_iface {
71    struct lp_build_gs_iface base;
72
73    struct draw_gs_llvm_variant *variant;
74    LLVMValueRef input;
75 };
76
77 static inline const struct draw_gs_llvm_iface *
78 draw_gs_llvm_iface(const struct lp_build_gs_iface *iface)
79 {
80    return (const struct draw_gs_llvm_iface *)iface;
81 }
82
83 struct draw_tcs_llvm_iface {
84    struct lp_build_tcs_iface base;
85
86    struct draw_tcs_llvm_variant *variant;
87    LLVMValueRef input;
88    LLVMValueRef output;
89 };
90
91 static inline const struct draw_tcs_llvm_iface *
92 draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)
93 {
94    return (const struct draw_tcs_llvm_iface *)iface;
95 }
96
97 struct draw_tes_llvm_iface {
98    struct lp_build_tes_iface base;
99
100    struct draw_tes_llvm_variant *variant;
101    LLVMValueRef input;
102 };
103
104 static inline const struct draw_tes_llvm_iface *
105 draw_tes_llvm_iface(const struct lp_build_tes_iface *iface)
106 {
107    return (const struct draw_tes_llvm_iface *)iface;
108 }
109
110 /**
111  * Create LLVM type for draw_vertex_buffer.
112  */
113 static LLVMTypeRef
114 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
115                          const char *struct_name)
116 {
117    LLVMTargetDataRef target = gallivm->target;
118    LLVMTypeRef dvbuffer_type;
119    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
120    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
121
122    elem_types[DRAW_JIT_DVBUFFER_MAP] =
123       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
124    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
125
126    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
127                                            ARRAY_SIZE(elem_types), 0);
128
129    (void) target; /* silence unused var warning for non-debug build */
130    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
131                           target, dvbuffer_type,
132                           DRAW_JIT_DVBUFFER_MAP);
133    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
134                           target, dvbuffer_type,
135                           DRAW_JIT_DVBUFFER_SIZE);
136
137    return dvbuffer_type;
138 }
139
140 /**
141  * Create LLVM type for struct draw_jit_texture
142  */
143 static LLVMTypeRef
144 create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
145 {
146    LLVMTargetDataRef target = gallivm->target;
147    LLVMTypeRef texture_type;
148    LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
149    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
150
151    elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
152    elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
153    elem_types[DRAW_JIT_TEXTURE_DEPTH] =
154    elem_types[DRAW_JIT_TEXTURE_NUM_SAMPLES] =
155    elem_types[DRAW_JIT_TEXTURE_SAMPLE_STRIDE] =
156    elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
157    elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
158    elem_types[DRAW_JIT_TEXTURE_BASE] =
159       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
160    elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
161    elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
162    elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =
163       LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
164
165    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
166                                           ARRAY_SIZE(elem_types), 0);
167
168    (void) target; /* silence unused var warning for non-debug build */
169    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
170                           target, texture_type,
171                           DRAW_JIT_TEXTURE_WIDTH);
172    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
173                           target, texture_type,
174                           DRAW_JIT_TEXTURE_HEIGHT);
175    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
176                           target, texture_type,
177                           DRAW_JIT_TEXTURE_DEPTH);
178    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,
179                           target, texture_type,
180                           DRAW_JIT_TEXTURE_BASE);
181    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
182                           target, texture_type,
183                           DRAW_JIT_TEXTURE_ROW_STRIDE);
184    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
185                           target, texture_type,
186                           DRAW_JIT_TEXTURE_IMG_STRIDE);
187    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
188                           target, texture_type,
189                           DRAW_JIT_TEXTURE_FIRST_LEVEL);
190    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
191                           target, texture_type,
192                           DRAW_JIT_TEXTURE_LAST_LEVEL);
193    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,
194                           target, texture_type,
195                           DRAW_JIT_TEXTURE_MIP_OFFSETS);
196    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, num_samples,
197                           target, texture_type,
198                           DRAW_JIT_TEXTURE_NUM_SAMPLES);
199    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, sample_stride,
200                           target, texture_type,
201                           DRAW_JIT_TEXTURE_SAMPLE_STRIDE);
202
203    LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
204
205    return texture_type;
206 }
207
208
209 /**
210  * Create LLVM type for struct draw_jit_sampler
211  */
212 static LLVMTypeRef
213 create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
214 {
215    LLVMTargetDataRef target = gallivm->target;
216    LLVMTypeRef sampler_type;
217    LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];
218
219    elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =
220    elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =
221    elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] =
222    elem_types[DRAW_JIT_SAMPLER_MAX_ANISO] = LLVMFloatTypeInContext(gallivm->context);
223    elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =
224       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
225
226    sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
227                                           ARRAY_SIZE(elem_types), 0);
228
229    (void) target; /* silence unused var warning for non-debug build */
230    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
231                           target, sampler_type,
232                           DRAW_JIT_SAMPLER_MIN_LOD);
233    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,
234                           target, sampler_type,
235                           DRAW_JIT_SAMPLER_MAX_LOD);
236    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,
237                           target, sampler_type,
238                           DRAW_JIT_SAMPLER_LOD_BIAS);
239    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,
240                           target, sampler_type,
241                           DRAW_JIT_SAMPLER_BORDER_COLOR);
242    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_aniso,
243                           target, sampler_type,
244                           DRAW_JIT_SAMPLER_MAX_ANISO);
245
246    LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);
247
248    return sampler_type;
249 }
250
251 /**
252  * Create LLVM type for struct draw_jit_texture
253  */
254 static LLVMTypeRef
255 create_jit_image_type(struct gallivm_state *gallivm, const char *struct_name)
256 {
257    LLVMTargetDataRef target = gallivm->target;
258    LLVMTypeRef image_type;
259    LLVMTypeRef elem_types[DRAW_JIT_IMAGE_NUM_FIELDS];
260    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
261
262    elem_types[DRAW_JIT_IMAGE_WIDTH]  =
263    elem_types[DRAW_JIT_IMAGE_HEIGHT] =
264    elem_types[DRAW_JIT_IMAGE_DEPTH] =
265    elem_types[DRAW_JIT_IMAGE_ROW_STRIDE] =
266    elem_types[DRAW_JIT_IMAGE_IMG_STRIDE] =
267    elem_types[DRAW_JIT_IMAGE_NUM_SAMPLES] =
268    elem_types[DRAW_JIT_IMAGE_SAMPLE_STRIDE] = int32_type;
269    elem_types[DRAW_JIT_IMAGE_BASE] =
270       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
271
272    image_type = LLVMStructTypeInContext(gallivm->context, elem_types,
273                                           ARRAY_SIZE(elem_types), 0);
274
275    (void) target; /* silence unused var warning for non-debug build */
276    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, width,
277                           target, image_type,
278                           DRAW_JIT_IMAGE_WIDTH);
279    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, height,
280                           target, image_type,
281                           DRAW_JIT_IMAGE_HEIGHT);
282    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, depth,
283                           target, image_type,
284                           DRAW_JIT_IMAGE_DEPTH);
285    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, base,
286                           target, image_type,
287                           DRAW_JIT_IMAGE_BASE);
288    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, row_stride,
289                           target, image_type,
290                           DRAW_JIT_IMAGE_ROW_STRIDE);
291    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, img_stride,
292                           target, image_type,
293                           DRAW_JIT_IMAGE_IMG_STRIDE);
294    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, num_samples,
295                           target, image_type,
296                           DRAW_JIT_IMAGE_NUM_SAMPLES);
297    LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, sample_stride,
298                           target, image_type,
299                           DRAW_JIT_IMAGE_SAMPLE_STRIDE);
300
301    LP_CHECK_STRUCT_SIZE(struct draw_jit_image, target, image_type);
302
303    return image_type;
304 }
305
306 /**
307  * Create LLVM type for struct draw_jit_context
308  */
309 static LLVMTypeRef
310 create_jit_context_type(struct gallivm_state *gallivm, const char *struct_name)
311 {
312    LLVMTypeRef buffer_type = lp_build_create_jit_buffer_type(gallivm);
313    LLVMTypeRef texture_type = create_jit_texture_type(gallivm, "texture");
314    LLVMTypeRef sampler_type = create_jit_sampler_type(gallivm, "sampler");
315    LLVMTypeRef image_type = create_jit_image_type(gallivm, "image");
316
317    LLVMTargetDataRef target = gallivm->target;
318    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
319    LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];
320
321    elem_types[DRAW_JIT_CTX_CONSTANTS] = LLVMArrayType(buffer_type, LP_MAX_TGSI_CONST_BUFFERS);
322    elem_types[DRAW_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES), 0);
323    elem_types[DRAW_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0);
324    elem_types[DRAW_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, PIPE_MAX_SHADER_SAMPLER_VIEWS);
325    elem_types[DRAW_JIT_CTX_SAMPLERS] = LLVMArrayType(sampler_type, PIPE_MAX_SAMPLERS);
326    elem_types[DRAW_JIT_CTX_IMAGES] = LLVMArrayType(image_type, PIPE_MAX_SHADER_IMAGES);
327    elem_types[DRAW_JIT_CTX_SSBOS] = LLVMArrayType(buffer_type, LP_MAX_TGSI_SHADER_BUFFERS);
328    elem_types[DRAW_JIT_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(float_type, 0);
329    LLVMTypeRef context_type = LLVMStructTypeInContext(gallivm->context, elem_types, ARRAY_SIZE(elem_types), 0);
330
331    (void) target; /* silence unused var warning for non-debug build */
332    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, constants,
333                           target, context_type, DRAW_JIT_CTX_CONSTANTS);
334    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
335                           target, context_type, DRAW_JIT_CTX_PLANES);
336    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewports,
337                           target, context_type, DRAW_JIT_CTX_VIEWPORT);
338    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
339                           target, context_type,
340                           DRAW_JIT_CTX_TEXTURES);
341    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,
342                           target, context_type,
343                           DRAW_JIT_CTX_SAMPLERS);
344    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, images,
345                           target, context_type, DRAW_JIT_CTX_IMAGES);
346    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, ssbos,
347                           target, context_type, DRAW_JIT_CTX_SSBOS);
348    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, aniso_filter_table,
349                           target, context_type, DRAW_JIT_CTX_ANISO_FILTER_TABLE);
350    LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
351                         target, context_type);
352
353    return context_type;
354 }
355
356
357 /**
358  * Create LLVM type for struct draw_gs_jit_context
359  */
360 static LLVMTypeRef
361 create_gs_jit_context_type(struct gallivm_state *gallivm,
362                            unsigned vector_length,
363                            LLVMTypeRef buffer_type,
364                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
365                            LLVMTypeRef image_type,
366                            const char *struct_name)
367 {
368    LLVMTargetDataRef target = gallivm->target;
369    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
370    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
371    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
372    LLVMTypeRef context_type;
373
374    elem_types[DRAW_GS_JIT_CTX_CONSTANTS] = LLVMArrayType(buffer_type, /* constants */
375                                                          LP_MAX_TGSI_CONST_BUFFERS);
376    elem_types[DRAW_GS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
377                                                                       DRAW_TOTAL_CLIP_PLANES), 0);
378    elem_types[DRAW_GS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0); /* viewports */
379
380    elem_types[DRAW_GS_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
381                                                         PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
382    elem_types[DRAW_GS_JIT_CTX_SAMPLERS] = LLVMArrayType(sampler_type,
383                                                         PIPE_MAX_SAMPLERS); /* samplers */
384    elem_types[DRAW_GS_JIT_CTX_IMAGES] = LLVMArrayType(image_type,
385                                                       PIPE_MAX_SHADER_IMAGES); /* images */
386    elem_types[DRAW_GS_JIT_CTX_PRIM_LENGTHS] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
387    elem_types[DRAW_GS_JIT_CTX_EMITTED_VERTICES] = LLVMPointerType(LLVMVectorType(int_type,
388                                                                                  vector_length), 0);
389    elem_types[DRAW_GS_JIT_CTX_EMITTED_PRIMS] = LLVMPointerType(LLVMVectorType(int_type,
390                                                                               vector_length), 0);
391
392    elem_types[DRAW_GS_JIT_CTX_SSBOS] = LLVMArrayType(buffer_type, /* ssbos */
393                                                      LP_MAX_TGSI_SHADER_BUFFERS);
394    elem_types[DRAW_GS_JIT_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(float_type, 0); /* aniso table */
395
396    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
397                                           ARRAY_SIZE(elem_types), 0);
398
399    (void) target; /* silence unused var warning for non-debug build */
400    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
401                           target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
402    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
403                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
404    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
405                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
406    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
407                           target, context_type,
408                           DRAW_GS_JIT_CTX_TEXTURES);
409    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
410                           target, context_type,
411                           DRAW_GS_JIT_CTX_SAMPLERS);
412    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
413                           target, context_type,
414                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
415    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
416                           target, context_type,
417                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
418    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
419                           target, context_type,
420                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
421    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, ssbos,
422                           target, context_type, DRAW_GS_JIT_CTX_SSBOS);
423    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, images,
424                           target, context_type, DRAW_GS_JIT_CTX_IMAGES);
425    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, aniso_filter_table,
426                           target, context_type, DRAW_GS_JIT_CTX_ANISO_FILTER_TABLE);
427    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
428                         target, context_type);
429
430    return context_type;
431 }
432
433
434 static LLVMTypeRef
435 create_gs_jit_input_type(struct gallivm_state *gallivm)
436 {
437    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
438    LLVMTypeRef input_array;
439
440    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
441    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
442    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
443    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
444
445    return input_array;
446 }
447
448 /**
449  * Create LLVM type for struct pipe_vertex_buffer
450  */
451 static LLVMTypeRef
452 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
453                               const char *struct_name)
454 {
455    LLVMTargetDataRef target = gallivm->target;
456    LLVMTypeRef elem_types[4];
457    LLVMTypeRef vb_type;
458
459    elem_types[0] = LLVMInt16TypeInContext(gallivm->context);
460    elem_types[1] = LLVMInt8TypeInContext(gallivm->context);
461    elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
462    elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
463
464    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
465                                      ARRAY_SIZE(elem_types), 0);
466
467    (void) target; /* silence unused var warning for non-debug build */
468    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
469                           target, vb_type, 0);
470    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
471                           target, vb_type, 1);
472    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
473                           target, vb_type, 2);
474    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
475                           target, vb_type, 3);
476
477    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
478
479    return vb_type;
480 }
481
482
483 /**
484  * Create LLVM type for struct vertex_header;
485  */
486 static LLVMTypeRef
487 create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
488 {
489    LLVMTargetDataRef target = gallivm->target;
490    LLVMTypeRef elem_types[3];
491    LLVMTypeRef vertex_header;
492    char struct_name[24];
493
494    snprintf(struct_name, 23, "vertex_header%d", data_elems);
495
496    elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
497    elem_types[DRAW_JIT_VERTEX_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
498    elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
499
500    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
501                                            ARRAY_SIZE(elem_types), 0);
502
503    /* these are bit-fields and we can't take address of them
504       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
505       target, vertex_header,
506       DRAW_JIT_VERTEX_CLIPMASK);
507       LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
508       target, vertex_header,
509       DRAW_JIT_VERTEX_EDGEFLAG);
510       LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
511       target, vertex_header,
512       DRAW_JIT_VERTEX_PAD);
513       LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
514       target, vertex_header,
515       DRAW_JIT_VERTEX_VERTEX_ID);
516    */
517    (void) target; /* silence unused var warning for non-debug build */
518    LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,
519                           target, vertex_header,
520                           DRAW_JIT_VERTEX_CLIP_POS);
521    LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
522                           target, vertex_header,
523                           DRAW_JIT_VERTEX_DATA);
524
525    assert(LLVMABISizeOfType(target, vertex_header) ==
526           offsetof(struct vertex_header, data[data_elems]));
527
528    return vertex_header;
529 }
530
531 /**
532  * Create LLVM type for struct draw_tcs_jit_context
533  */
534 static LLVMTypeRef
535 create_tcs_jit_context_type(struct gallivm_state *gallivm,
536                             unsigned vector_length,
537                             LLVMTypeRef buffer_type,
538                             LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
539                             LLVMTypeRef image_type,
540                             const char *struct_name)
541 {
542    LLVMTargetDataRef target = gallivm->target;
543    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
544    LLVMTypeRef elem_types[DRAW_TCS_JIT_CTX_NUM_FIELDS];
545    LLVMTypeRef context_type;
546
547
548    elem_types[DRAW_TCS_JIT_CTX_CONSTANTS] = LLVMArrayType(buffer_type, /* constants */
549                                                           LP_MAX_TGSI_CONST_BUFFERS);
550    elem_types[DRAW_TCS_JIT_CTX_DUMMY1] = LLVMInt32TypeInContext(gallivm->context);
551    elem_types[DRAW_TCS_JIT_CTX_DUMMY2] = LLVMInt32TypeInContext(gallivm->context);
552
553    elem_types[DRAW_TCS_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
554                                                          PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
555    elem_types[DRAW_TCS_JIT_CTX_SAMPLERS] = LLVMArrayType(sampler_type,
556                                                          PIPE_MAX_SAMPLERS); /* samplers */
557    elem_types[DRAW_TCS_JIT_CTX_IMAGES] = LLVMArrayType(image_type,
558                                                        PIPE_MAX_SHADER_IMAGES); /* images */
559
560    elem_types[DRAW_TCS_JIT_CTX_SSBOS] = LLVMArrayType(buffer_type, /* ssbos */
561                                                       LP_MAX_TGSI_SHADER_BUFFERS);
562    elem_types[DRAW_TCS_JIT_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(float_type, 0); /* aniso table */
563
564    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
565                                           ARRAY_SIZE(elem_types), 0);
566
567    (void) target; /* silence unused var warning for non-debug build */
568    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, constants,
569                           target, context_type, DRAW_TCS_JIT_CTX_CONSTANTS);
570    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, textures,
571                           target, context_type,
572                           DRAW_TCS_JIT_CTX_TEXTURES);
573    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, samplers,
574                           target, context_type,
575                           DRAW_TCS_JIT_CTX_SAMPLERS);
576    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, ssbos,
577                           target, context_type, DRAW_TCS_JIT_CTX_SSBOS);
578    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, images,
579                           target, context_type, DRAW_TCS_JIT_CTX_IMAGES);
580    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, aniso_filter_table,
581                           target, context_type, DRAW_TCS_JIT_CTX_ANISO_FILTER_TABLE);
582    LP_CHECK_STRUCT_SIZE(struct draw_tcs_jit_context,
583                         target, context_type);
584
585    return context_type;
586 }
587
588 static LLVMTypeRef
589 create_tcs_jit_input_type(struct gallivm_state *gallivm)
590 {
591    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
592    LLVMTypeRef input_array;
593
594    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
595    input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */
596    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
597
598    return input_array;
599 }
600
601 static LLVMTypeRef
602 create_tcs_jit_output_type(struct gallivm_state *gallivm)
603 {
604    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
605    LLVMTypeRef output_array;
606
607    output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
608    output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
609    output_array = LLVMPointerType(output_array, 0); /* num vertices per prim */
610
611    return output_array;
612 }
613
614 static LLVMTypeRef
615 create_tes_jit_input_deref_type(struct gallivm_state *gallivm)
616 {
617    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
618    LLVMTypeRef input_array;
619
620    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
621    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
622
623    return input_array;
624 }
625
626 /**
627  * Create LLVM type for struct draw_tes_jit_context
628  */
629 static LLVMTypeRef
630 create_tes_jit_context_type(struct gallivm_state *gallivm,
631                             unsigned vector_length,
632                             LLVMTypeRef buffer_type,
633                             LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
634                             LLVMTypeRef image_type,
635                             const char *struct_name)
636 {
637    LLVMTargetDataRef target = gallivm->target;
638    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
639    LLVMTypeRef elem_types[DRAW_TES_JIT_CTX_NUM_FIELDS];
640    LLVMTypeRef context_type;
641
642    elem_types[DRAW_TES_JIT_CTX_CONSTANTS] = LLVMArrayType(buffer_type, /* constants */
643                                                           LP_MAX_TGSI_CONST_BUFFERS);
644    elem_types[DRAW_TES_JIT_CTX_DUMMY1] = LLVMInt32TypeInContext(gallivm->context);
645    elem_types[DRAW_TES_JIT_CTX_DUMMY2] = LLVMInt32TypeInContext(gallivm->context);
646
647    elem_types[DRAW_TES_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
648                                                          PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
649    elem_types[DRAW_TES_JIT_CTX_SAMPLERS] = LLVMArrayType(sampler_type,
650                                                          PIPE_MAX_SAMPLERS); /* samplers */
651    elem_types[DRAW_TES_JIT_CTX_IMAGES] = LLVMArrayType(image_type,
652                                                        PIPE_MAX_SHADER_IMAGES); /* images */
653
654    elem_types[DRAW_TES_JIT_CTX_SSBOS] = LLVMArrayType(buffer_type, /* ssbos */
655                                                       LP_MAX_TGSI_SHADER_BUFFERS);
656    elem_types[DRAW_TES_JIT_CTX_ANISO_FILTER_TABLE] = LLVMPointerType(float_type, 0); /* aniso table */
657
658    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
659                                           ARRAY_SIZE(elem_types), 0);
660
661    (void) target; /* silence unused var warning for non-debug build */
662    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, constants,
663                           target, context_type, DRAW_TES_JIT_CTX_CONSTANTS);
664    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, textures,
665                           target, context_type,
666                           DRAW_TES_JIT_CTX_TEXTURES);
667    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, samplers,
668                           target, context_type,
669                           DRAW_TES_JIT_CTX_SAMPLERS);
670    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, ssbos,
671                           target, context_type, DRAW_TES_JIT_CTX_SSBOS);
672    LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, images,
673                           target, context_type, DRAW_TES_JIT_CTX_IMAGES);
674    LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, aniso_filter_table,
675                           target, context_type, DRAW_TES_JIT_CTX_ANISO_FILTER_TABLE);
676    LP_CHECK_STRUCT_SIZE(struct draw_tes_jit_context,
677                         target, context_type);
678
679    return context_type;
680 }
681
682 /**
683  * Create LLVM types for various structures.
684  */
685 static void
686 create_jit_types(struct draw_llvm_variant *variant)
687 {
688    struct gallivm_state *gallivm = variant->gallivm;
689
690    variant->context_type = create_jit_context_type(gallivm, "draw_jit_context");
691    variant->context_ptr_type = LLVMPointerType(variant->context_type, 0);
692
693    variant->buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
694    variant->buffer_ptr_type = LLVMPointerType(variant->buffer_type, 0);
695
696    variant->vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
697    variant->vb_ptr_type = LLVMPointerType(variant->vb_type, 0);
698 }
699
700
701 static LLVMTypeRef
702 get_context_ptr_type(struct draw_llvm_variant *variant)
703 {
704    if (!variant->context_ptr_type)
705       create_jit_types(variant);
706    return variant->context_ptr_type;
707 }
708
709
710 static LLVMTypeRef
711 get_buffer_ptr_type(struct draw_llvm_variant *variant)
712 {
713    if (!variant->buffer_ptr_type)
714       create_jit_types(variant);
715    return variant->buffer_ptr_type;
716 }
717
718
719 static LLVMTypeRef
720 get_vb_ptr_type(struct draw_llvm_variant *variant)
721 {
722    if (!variant->vb_ptr_type)
723       create_jit_types(variant);
724    return variant->vb_ptr_type;
725 }
726
727 static LLVMTypeRef
728 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
729 {
730    assert(variant->vertex_header_ptr_type);
731    return variant->vertex_header_ptr_type;
732 }
733
734
735 /**
736  * Create per-context LLVM info.
737  */
738 struct draw_llvm *
739 draw_llvm_create(struct draw_context *draw, LLVMContextRef context)
740 {
741    struct draw_llvm *llvm;
742
743    if (!lp_build_init())
744       return NULL;
745
746    llvm = CALLOC_STRUCT( draw_llvm );
747    if (!llvm)
748       return NULL;
749
750    llvm->draw = draw;
751
752    llvm->context = context;
753    if (!llvm->context) {
754       llvm->context = LLVMContextCreate();
755
756 #if LLVM_VERSION_MAJOR >= 15
757       LLVMContextSetOpaquePointers(llvm->context, false);
758 #endif
759
760       llvm->context_owned = true;
761    }
762    if (!llvm->context)
763       goto fail;
764
765    llvm->nr_variants = 0;
766    list_inithead(&llvm->vs_variants_list.list);
767
768    llvm->nr_gs_variants = 0;
769    list_inithead(&llvm->gs_variants_list.list);
770
771    llvm->nr_tcs_variants = 0;
772    list_inithead(&llvm->tcs_variants_list.list);
773
774    llvm->nr_tes_variants = 0;
775    list_inithead(&llvm->tes_variants_list.list);
776
777    return llvm;
778
779 fail:
780    draw_llvm_destroy(llvm);
781    return NULL;
782 }
783
784
785 /**
786  * Free per-context LLVM info.
787  */
788 void
789 draw_llvm_destroy(struct draw_llvm *llvm)
790 {
791    if (llvm->context_owned)
792       LLVMContextDispose(llvm->context);
793    llvm->context = NULL;
794
795    /* XXX free other draw_llvm data? */
796    FREE(llvm);
797 }
798
799 static void
800 draw_get_ir_cache_key(struct nir_shader *nir,
801                       const void *key, size_t key_size,
802                       uint32_t val_32bit,
803                       unsigned char ir_sha1_cache_key[20])
804 {
805    struct blob blob = { 0 };
806    unsigned ir_size;
807    void *ir_binary;
808
809    blob_init(&blob);
810    nir_serialize(&blob, nir, true);
811    ir_binary = blob.data;
812    ir_size = blob.size;
813
814    struct mesa_sha1 ctx;
815    _mesa_sha1_init(&ctx);
816    _mesa_sha1_update(&ctx, key, key_size);
817    _mesa_sha1_update(&ctx, ir_binary, ir_size);
818    _mesa_sha1_update(&ctx, &val_32bit, 4);
819    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
820
821    blob_finish(&blob);
822 }
823
824 /**
825  * Create LLVM-generated code for a vertex shader.
826  */
827 struct draw_llvm_variant *
828 draw_llvm_create_variant(struct draw_llvm *llvm,
829                          unsigned num_inputs,
830                          const struct draw_llvm_variant_key *key)
831 {
832    struct draw_llvm_variant *variant;
833    struct llvm_vertex_shader *shader =
834       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
835    char module_name[64];
836    unsigned char ir_sha1_cache_key[20];
837    struct lp_cached_code cached = { 0 };
838    bool needs_caching = false;
839    variant = MALLOC(sizeof *variant +
840                     shader->variant_key_size -
841                     sizeof variant->key);
842    if (!variant)
843       return NULL;
844
845    variant->llvm = llvm;
846    variant->shader = shader;
847    memcpy(&variant->key, key, shader->variant_key_size);
848
849    snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
850             variant->shader->variants_cached);
851
852    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
853       draw_get_ir_cache_key(shader->base.state.ir.nir,
854                             key,
855                             shader->variant_key_size,
856                             num_inputs,
857                             ir_sha1_cache_key);
858
859       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
860                                          &cached,
861                                          ir_sha1_cache_key);
862       if (!cached.data_size)
863          needs_caching = true;
864    }
865    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
866
867    create_jit_types(variant);
868
869    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
870       if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)
871          tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
872       else
873          nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);
874       draw_llvm_dump_variant_key(&variant->key);
875    }
876
877    variant->vertex_header_type = create_jit_vertex_header(variant->gallivm, num_inputs);
878    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
879
880    draw_llvm_generate(llvm, variant);
881
882    gallivm_compile_module(variant->gallivm);
883
884    variant->jit_func = (draw_jit_vert_func)
885          gallivm_jit_function(variant->gallivm, variant->function);
886
887    if (needs_caching)
888       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
889                                            &cached,
890                                            ir_sha1_cache_key);
891    gallivm_free_ir(variant->gallivm);
892
893    variant->list_item_global.base = variant;
894    variant->list_item_local.base = variant;
895    /*variant->no = */shader->variants_created++;
896    variant->list_item_global.base = variant;
897
898    return variant;
899 }
900
901 static void
902 do_clamp_vertex_color(struct gallivm_state *gallivm,
903                       struct lp_type type,
904                       const struct tgsi_shader_info *info,
905                       LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
906 {
907    LLVMBuilderRef builder = gallivm->builder;
908    LLVMValueRef out;
909    unsigned chan, attrib;
910    struct lp_build_context bld;
911    lp_build_context_init(&bld, gallivm, type);
912
913    for (attrib = 0; attrib < info->num_outputs; ++attrib) {
914       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
915          if (outputs[attrib][chan]) {
916             switch (info->output_semantic_name[attrib]) {
917             case TGSI_SEMANTIC_COLOR:
918             case TGSI_SEMANTIC_BCOLOR:
919                out = LLVMBuildLoad2(builder, LLVMTypeOf(bld.zero), outputs[attrib][chan], "");
920                out = lp_build_clamp(&bld, out, bld.zero, bld.one);
921                LLVMBuildStore(builder, out, outputs[attrib][chan]);
922                break;
923             }
924          }
925       }
926    }
927 }
928
929 static void
930 generate_vs(struct draw_llvm_variant *variant,
931             LLVMBuilderRef builder,
932             struct lp_type vs_type,
933             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
934             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
935             const struct lp_bld_tgsi_system_values *system_values,
936             LLVMValueRef context_ptr,
937             const struct lp_build_sampler_soa *draw_sampler,
938             const struct lp_build_image_soa *draw_image,
939             boolean clamp_vertex_color,
940             struct lp_build_mask_context *bld_mask)
941 {
942    struct draw_llvm *llvm = variant->llvm;
943    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
944    LLVMValueRef consts_ptr =
945       draw_jit_context_constants(variant, context_ptr);
946    LLVMValueRef ssbos_ptr =
947       draw_jit_context_ssbos(variant, context_ptr);
948
949    struct lp_build_tgsi_params params;
950    memset(&params, 0, sizeof(params));
951
952    params.type = vs_type;
953    params.mask = bld_mask;
954    params.consts_ptr = consts_ptr;
955    params.system_values = system_values;
956    params.inputs = inputs;
957    params.context_ptr = context_ptr;
958    params.sampler = draw_sampler;
959    params.info = &llvm->draw->vs.vertex_shader->info;
960    params.ssbo_ptr = ssbos_ptr;
961    params.image = draw_image;
962    params.aniso_filter_table = draw_jit_context_aniso_filter_table(variant, context_ptr);
963
964    if (llvm->draw->vs.vertex_shader->state.ir.nir &&
965        llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR)
966       lp_build_nir_soa(variant->gallivm,
967                        llvm->draw->vs.vertex_shader->state.ir.nir,
968                        &params,
969                        outputs);
970    else
971       lp_build_tgsi_soa(variant->gallivm,
972                         tokens,
973                         &params,
974                         outputs);
975
976    if (clamp_vertex_color) {
977       const struct tgsi_shader_info *info = &llvm->draw->vs.vertex_shader->info;
978       do_clamp_vertex_color(variant->gallivm,
979                             vs_type, info,
980                             outputs);
981    }
982 }
983
984
985 static void
986 fetch_instanced(struct gallivm_state *gallivm,
987                 const struct util_format_description *format_desc,
988                 struct lp_type vs_type,
989                 LLVMValueRef vb_stride,
990                 LLVMValueRef map_ptr,
991                 LLVMValueRef buffer_size_adj,
992                 LLVMValueRef *inputs,
993                 LLVMValueRef index)
994 {
995    LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
996    LLVMTypeRef aosf_t, aosi_t;
997    LLVMValueRef zero = LLVMConstNull(i32_t);
998    LLVMBuilderRef builder = gallivm->builder;
999    LLVMValueRef stride, buffer_overflowed, aos, index_valid;
1000    unsigned i;
1001
1002    aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
1003    aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
1004
1005    /* This mul can overflow. Wraparound is ok. */
1006    stride = LLVMBuildMul(builder, vb_stride, index, "");
1007
1008    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
1009                                      stride, buffer_size_adj,
1010                                      "buffer_overflowed");
1011
1012    if (0) {
1013       lp_build_print_value(gallivm, "   instance index = ", index);
1014       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
1015    }
1016
1017    index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
1018    index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
1019    stride = LLVMBuildAnd(builder, stride, index_valid, "");
1020
1021    aos = lp_build_fetch_rgba_aos(gallivm,
1022                                  format_desc,
1023                                  lp_float32_vec4_type(),
1024                                  FALSE,
1025                                  map_ptr,
1026                                  stride, zero, zero,
1027                                  NULL);
1028
1029    index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
1030    aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
1031    aos = LLVMBuildAnd(builder, aos, index_valid, "");
1032    aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
1033
1034    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1035       LLVMValueRef index = lp_build_const_int32(gallivm, i);
1036       inputs[i] = lp_build_extract_broadcast(gallivm,
1037                                              lp_float32_vec4_type(),
1038                                              vs_type, aos, index);
1039    }
1040 }
1041
1042
1043 static void
1044 fetch_vector(struct gallivm_state *gallivm,
1045              const struct util_format_description *format_desc,
1046              struct lp_type vs_type,
1047              LLVMValueRef vb_stride,
1048              LLVMValueRef map_ptr,
1049              LLVMValueRef buffer_size_adj,
1050              LLVMValueRef *inputs,
1051              LLVMValueRef indices)
1052 {
1053    LLVMBuilderRef builder = gallivm->builder;
1054    struct lp_build_context blduivec;
1055    struct lp_type fetch_type = vs_type;
1056    LLVMValueRef offset, valid_mask;
1057    unsigned i;
1058
1059    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
1060
1061    vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
1062    buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
1063
1064    /* This mul can overflow. Wraparound is ok. */
1065    offset = lp_build_mul(&blduivec, vb_stride, indices);
1066
1067    valid_mask = lp_build_compare(gallivm, blduivec.type,
1068                                  PIPE_FUNC_LESS, offset, buffer_size_adj);
1069
1070    /* not valid elements use offset 0 */
1071    offset = LLVMBuildAnd(builder, offset, valid_mask, "");
1072
1073    if (0) {
1074       lp_build_print_value(gallivm, "   indices = ", indices);
1075       lp_build_print_value(gallivm, "   offsets = ", offset);
1076       lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
1077    }
1078
1079    /*
1080     * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
1081     * This should always produce better code.
1082     */
1083
1084    /* The type handling is annoying here... */
1085    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
1086        format_desc->channel[0].pure_integer) {
1087       if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
1088          fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
1089       }
1090       else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1091          fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
1092       }
1093    }
1094
1095    lp_build_fetch_rgba_soa(gallivm, format_desc,
1096                            fetch_type, FALSE, map_ptr, offset,
1097                            blduivec.zero, blduivec.zero,
1098                            NULL, inputs);
1099
1100    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1101       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
1102                                    lp_build_vec_type(gallivm, vs_type), "");
1103    }
1104
1105    /* out-of-bound fetches return all zeros */
1106    for (i = 0; i < format_desc->nr_channels; i++) {
1107       inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
1108       inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
1109       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
1110                                    lp_build_vec_type(gallivm, vs_type), "");
1111    }
1112 }
1113
1114
1115 static void
1116 store_aos(struct gallivm_state *gallivm,
1117           LLVMTypeRef io_type,
1118           LLVMValueRef io_ptr,
1119           LLVMValueRef index,
1120           LLVMValueRef value)
1121 {
1122    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
1123    LLVMBuilderRef builder = gallivm->builder;
1124    LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_type, io_ptr);
1125    LLVMValueRef indices[3];
1126
1127    indices[0] = lp_build_const_int32(gallivm, 0);
1128    indices[1] = index;
1129    indices[2] = lp_build_const_int32(gallivm, 0);
1130
1131    data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
1132    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
1133
1134 #if DEBUG_STORE
1135    lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
1136 #endif
1137
1138    /* Unaligned store due to the vertex header */
1139    LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
1140 }
1141
1142 /**
1143  * Adjust the mask to architecture endianess. The mask will the store in struct:
1144  *
1145  * struct vertex_header {
1146  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
1147  *    unsigned edgeflag:1;
1148  *    unsigned pad:1;
1149  *    unsigned vertex_id:16;
1150  *    [...]
1151  * }
1152  *
1153  * On little-endian machine nothing needs to done, however on bit-endian machine
1154  * the mask's fields need to be adjusted with the algorithm:
1155  *
1156  * uint32_t reverse (uint32_t x)
1157  * {
1158  *   return (x >> 16) |              // vertex_id
1159  *          ((x & 0x3fff) << 18) |   // clipmask
1160  *          ((x & 0x4000) << 3) |    // edgeflag
1161  *          ((x & 0x8000) << 1);     // pad
1162  * }
1163  */
1164 static LLVMValueRef
1165 adjust_mask(struct gallivm_state *gallivm,
1166             LLVMValueRef mask)
1167 {
1168 #if UTIL_ARCH_BIG_ENDIAN
1169    LLVMBuilderRef builder = gallivm->builder;
1170    LLVMValueRef vertex_id;
1171    LLVMValueRef clipmask;
1172    LLVMValueRef pad;
1173    LLVMValueRef edgeflag;
1174
1175    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
1176    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
1177    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
1178    if (0) {
1179       pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
1180       pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");
1181    }
1182    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
1183    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");
1184
1185    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
1186    if (0) {
1187       mask = LLVMBuildOr(builder, mask, pad, "");
1188    }
1189    mask = LLVMBuildOr(builder, mask, edgeflag, "");
1190 #endif
1191    return mask;
1192 }
1193
1194 static void
1195 store_aos_array(struct gallivm_state *gallivm,
1196                 struct lp_type soa_type,
1197                 LLVMTypeRef io_type,
1198                 LLVMValueRef io_ptr,
1199                 LLVMValueRef *indices,
1200                 LLVMValueRef* aos,
1201                 int attrib,
1202                 int num_outputs,
1203                 LLVMValueRef clipmask,
1204                 boolean need_edgeflag)
1205 {
1206    LLVMBuilderRef builder = gallivm->builder;
1207    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
1208    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1209    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
1210    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1211    int vector_length = soa_type.length;
1212    int i;
1213
1214    assert(TGSI_NUM_CHANNELS == 4);
1215
1216    for (i = 0; i < vector_length; i++) {
1217       linear_inds[i] = lp_build_const_int32(gallivm, i);
1218       if (indices) {
1219          inds[i] = indices[i];
1220       } else {
1221          inds[i] = linear_inds[i];
1222       }
1223       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
1224    }
1225
1226    if (attrib == 0) {
1227       /* store vertex header for each of the n vertices */
1228       LLVMValueRef val, cliptmp;
1229       int vertex_id_pad_edgeflag;
1230
1231       /* If this assertion fails, it means we need to update the bit twidding
1232        * code here.  See struct vertex_header in draw_private.h.
1233        */
1234       assert(DRAW_TOTAL_CLIP_PLANES==14);
1235       /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
1236       if (!need_edgeflag) {
1237          vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
1238       }
1239       else {
1240          vertex_id_pad_edgeflag = (0xffff << 16);
1241       }
1242       val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
1243                                    vertex_id_pad_edgeflag);
1244       /* OR with the clipmask */
1245       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
1246       for (i = 0; i < vector_length; i++) {
1247          LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_type, io_ptrs[i]);
1248          val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
1249          val = adjust_mask(gallivm, val);
1250 #if DEBUG_STORE
1251          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
1252                          io_ptrs[i], inds[i], val);
1253 #endif
1254          LLVMBuildStore(builder, val, id_ptr);
1255       }
1256    }
1257
1258    /* store for each of the n vertices */
1259    for (i = 0; i < vector_length; i++) {
1260       store_aos(gallivm, io_type, io_ptrs[i], attr_index, aos[i]);
1261    }
1262 }
1263
1264
1265 static void
1266 convert_to_aos(struct gallivm_state *gallivm,
1267                LLVMTypeRef io_type,
1268                LLVMValueRef io,
1269                LLVMValueRef *indices,
1270                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1271                LLVMValueRef clipmask,
1272                int num_outputs,
1273                struct lp_type soa_type,
1274                boolean need_edgeflag)
1275 {
1276    LLVMBuilderRef builder = gallivm->builder;
1277    unsigned chan, attrib, i;
1278
1279 #if DEBUG_STORE
1280    lp_build_printf(gallivm, "   # storing begin\n");
1281 #endif
1282    for (attrib = 0; attrib < num_outputs; ++attrib) {
1283       LLVMValueRef soa[TGSI_NUM_CHANNELS];
1284       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
1285       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1286          if (outputs[attrib][chan]) {
1287             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
1288             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
1289 #if DEBUG_STORE
1290             lp_build_printf(gallivm, "output %d : %d ",
1291                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
1292                                          attrib, 0),
1293                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
1294                                          chan, 0));
1295             lp_build_print_value(gallivm, "val = ", out);
1296             {
1297                LLVMValueRef iv =
1298                   LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
1299
1300                lp_build_print_value(gallivm, "  ival = ", iv);
1301             }
1302 #endif
1303             soa[chan] = out;
1304          }
1305          else {
1306             soa[chan] = 0;
1307          }
1308       }
1309
1310
1311       if (soa_type.length == TGSI_NUM_CHANNELS) {
1312          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
1313       } else {
1314          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
1315
1316          for (i = 0; i < soa_type.length; ++i) {
1317             aos[i] = lp_build_extract_range(gallivm,
1318                                             soa[i % TGSI_NUM_CHANNELS],
1319                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1320                                             TGSI_NUM_CHANNELS);
1321          }
1322       }
1323
1324       store_aos_array(gallivm,
1325                       soa_type,
1326                       io_type,
1327                       io,
1328                       indices,
1329                       aos,
1330                       attrib,
1331                       num_outputs,
1332                       clipmask,
1333                       need_edgeflag);
1334    }
1335 #if DEBUG_STORE
1336    lp_build_printf(gallivm, "   # storing end\n");
1337 #endif
1338 }
1339
1340
1341 /**
1342  * Stores original vertex positions in clip coordinates
1343  */
1344 static void
1345 store_clip(struct gallivm_state *gallivm,
1346            const struct lp_type vs_type,
1347            LLVMTypeRef io_type,
1348            LLVMValueRef io_ptr,
1349            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1350            int idx)
1351 {
1352    LLVMBuilderRef builder = gallivm->builder;
1353    LLVMValueRef soa[4];
1354    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1355    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1356    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1357    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1358    LLVMTypeRef clip_ptr_type =
1359       LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
1360                                      4), 0);
1361    int i, j;
1362
1363    for (i = 0; i < vs_type.length; i++) {
1364       inds[i] = lp_build_const_int32(gallivm, i);
1365       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
1366    }
1367
1368    soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
1369    soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
1370    soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
1371    soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
1372
1373    for (i = 0; i < vs_type.length; i++) {
1374       clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_type, io_ptrs[i]);
1375    }
1376
1377    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1378    for (i = 0; i < vs_type.length; ++i) {
1379       aos[i] = lp_build_extract_range(gallivm,
1380                                       soa[i % TGSI_NUM_CHANNELS],
1381                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1382                                       TGSI_NUM_CHANNELS);
1383    }
1384
1385    for (j = 0; j < vs_type.length; j++) {
1386       LLVMValueRef clip_ptr;
1387
1388       clip_ptr = LLVMBuildPointerCast(builder, clip_ptrs[j], clip_ptr_type, "");
1389
1390       /* Unaligned store */
1391       LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1392    }
1393 }
1394
1395
1396 /**
1397  * Transforms the outputs for viewport mapping
1398  */
1399 static void
1400 generate_viewport(struct draw_llvm_variant *variant,
1401                   LLVMBuilderRef builder,
1402                   struct lp_type vs_type,
1403                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1404                   LLVMValueRef context_ptr)
1405 {
1406    int i;
1407    struct gallivm_state *gallivm = variant->gallivm;
1408    struct lp_type f32_type = vs_type;
1409    const unsigned pos = variant->llvm->draw->vs.position_output;
1410    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1411    LLVMValueRef out3 = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][3], ""); /*w0 w1 .. wn*/
1412    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
1413    LLVMValueRef vp_ptr = draw_jit_context_viewports(variant, context_ptr);
1414
1415    /* We treat pipe_viewport_state as a float array */
1416    const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
1417    const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
1418
1419    /* for 1/w convention*/
1420    out3 = LLVMBuildFDiv(builder, const1, out3, "");
1421    LLVMBuildStore(builder, out3, outputs[pos][3]);
1422
1423    LLVMTypeRef elem_type = lp_build_elem_type(gallivm, vs_type);
1424
1425    /* Viewport Mapping */
1426    for (i=0; i<3; i++) {
1427       LLVMValueRef out = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][i], ""); /*x0 x1 .. xn*/
1428       LLVMValueRef scale;
1429       LLVMValueRef trans;
1430       LLVMValueRef scale_i;
1431       LLVMValueRef trans_i;
1432       LLVMValueRef index;
1433
1434       index = lp_build_const_int32(gallivm, i + scale_index_offset);
1435       scale_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1436
1437       index = lp_build_const_int32(gallivm, i + trans_index_offset);
1438       trans_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1439
1440       scale = lp_build_broadcast(gallivm, vs_type_llvm,
1441                                  LLVMBuildLoad2(builder, elem_type, scale_i, "scale"));
1442       trans = lp_build_broadcast(gallivm, vs_type_llvm,
1443                                  LLVMBuildLoad2(builder, elem_type, trans_i, "trans"));
1444
1445       /* divide by w */
1446       out = LLVMBuildFMul(builder, out, out3, "");
1447       /* mult by scale, add translation */
1448       out = lp_build_fmuladd(builder, out, scale, trans);
1449
1450       /* store transformed outputs */
1451       LLVMBuildStore(builder, out, outputs[pos][i]);
1452    }
1453
1454 }
1455
1456
1457 /**
1458  * Returns clipmask as nxi32 bitmask for the n vertices
1459  */
1460 static LLVMValueRef
1461 generate_clipmask(struct draw_llvm *llvm,
1462                   struct gallivm_state *gallivm,
1463                   struct lp_type vs_type,
1464                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1465                   struct draw_llvm_variant_key *key,
1466                   LLVMTypeRef context_type,
1467                   LLVMValueRef context_ptr,
1468                   boolean *have_clipdist)
1469 {
1470    LLVMBuilderRef builder = gallivm->builder;
1471    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1472    LLVMValueRef test, temp;
1473    LLVMValueRef zero, shift;
1474    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1475    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1476    LLVMValueRef plane1, planes, plane_ptr, sum;
1477    struct lp_type f32_type = vs_type;
1478    struct lp_type i32_type = lp_int_type(vs_type);
1479    const unsigned pos = llvm->draw->vs.position_output;
1480    const unsigned cv = llvm->draw->vs.clipvertex_output;
1481    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1482    boolean have_cd = false;
1483    boolean clip_user = key->clip_user;
1484    unsigned ucp_enable = key->ucp_enable;
1485    unsigned cd[2];
1486
1487    cd[0] = llvm->draw->vs.ccdistance_output[0];
1488    cd[1] = llvm->draw->vs.ccdistance_output[1];
1489
1490    if (cd[0] != pos || cd[1] != pos)
1491       have_cd = true;
1492
1493    if (num_written_clipdistance && !clip_user) {
1494       clip_user = true;
1495       ucp_enable = (1 << num_written_clipdistance) - 1;
1496    }
1497
1498    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1499    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1500    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
1501    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
1502
1503    LLVMTypeRef vec_type = LLVMTypeOf(zero);
1504
1505    /*
1506     * load clipvertex and position from correct locations.
1507     * if they are the same just load them once.
1508     */
1509    pos_x = LLVMBuildLoad2(builder, vec_type, outputs[pos][0], ""); /*x0 x1 .. xn */
1510    pos_y = LLVMBuildLoad2(builder, vec_type, outputs[pos][1], ""); /*y0 y1 .. yn */
1511    pos_z = LLVMBuildLoad2(builder, vec_type, outputs[pos][2], ""); /*z0 z1 .. zn */
1512    pos_w = LLVMBuildLoad2(builder, vec_type, outputs[pos][3], ""); /*w0 w1 .. wn */
1513
1514    if (clip_user && cv != pos) {
1515       cv_x = LLVMBuildLoad2(builder, vec_type, outputs[cv][0], ""); /*x0 x1 .. xn */
1516       cv_y = LLVMBuildLoad2(builder, vec_type, outputs[cv][1], ""); /*y0 y1 .. yn */
1517       cv_z = LLVMBuildLoad2(builder, vec_type, outputs[cv][2], ""); /*z0 z1 .. zn */
1518       cv_w = LLVMBuildLoad2(builder, vec_type, outputs[cv][3], ""); /*w0 w1 .. wn */
1519    } else {
1520       cv_x = pos_x;
1521       cv_y = pos_y;
1522       cv_z = pos_z;
1523       cv_w = pos_w;
1524    }
1525
1526    /*
1527     * Be careful with the comparisons and NaNs (using llvm's unordered
1528     * comparisons here).
1529     */
1530    /* Cliptest, for hardwired planes */
1531    /*
1532     * XXX should take guardband into account (currently not in key).
1533     * Otherwise might run the draw pipeline stages for nothing.
1534     */
1535    if (key->clip_xy) {
1536       /* plane 1 */
1537       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1538       temp = shift;
1539       test = LLVMBuildAnd(builder, test, temp, "");
1540       mask = test;
1541
1542       /* plane 2 */
1543       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1544       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1545       temp = LLVMBuildShl(builder, temp, shift, "");
1546       test = LLVMBuildAnd(builder, test, temp, "");
1547       mask = LLVMBuildOr(builder, mask, test, "");
1548
1549       /* plane 3 */
1550       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1551       temp = LLVMBuildShl(builder, temp, shift, "");
1552       test = LLVMBuildAnd(builder, test, temp, "");
1553       mask = LLVMBuildOr(builder, mask, test, "");
1554
1555       /* plane 4 */
1556       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1557       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1558       temp = LLVMBuildShl(builder, temp, shift, "");
1559       test = LLVMBuildAnd(builder, test, temp, "");
1560       mask = LLVMBuildOr(builder, mask, test, "");
1561    }
1562
1563    if (key->clip_z) {
1564       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
1565       if (key->clip_halfz) {
1566          /* plane 5 */
1567          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1568          test = LLVMBuildAnd(builder, test, temp, "");
1569          mask = LLVMBuildOr(builder, mask, test, "");
1570       }
1571       else {
1572          /* plane 5 */
1573          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1574          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1575          test = LLVMBuildAnd(builder, test, temp, "");
1576          mask = LLVMBuildOr(builder, mask, test, "");
1577       }
1578       /* plane 6 */
1579       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1580       temp = LLVMBuildShl(builder, temp, shift, "");
1581       test = LLVMBuildAnd(builder, test, temp, "");
1582       mask = LLVMBuildOr(builder, mask, test, "");
1583    }
1584
1585    if (clip_user) {
1586       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_type, context_ptr);
1587       LLVMValueRef indices[3];
1588       LLVMValueRef is_nan_or_inf;
1589
1590       /* userclip planes */
1591       while (ucp_enable) {
1592          unsigned plane_idx = ffs(ucp_enable)-1;
1593          ucp_enable &= ~(1 << plane_idx);
1594          plane_idx += 6;
1595
1596          if (have_cd && num_written_clipdistance) {
1597             LLVMValueRef clipdist;
1598             int i;
1599             i = plane_idx - 6;
1600
1601             *have_clipdist = TRUE;
1602             if (i < 4) {
1603                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[0]][i], "");
1604             } else {
1605                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[1]][i-4], "");
1606             }
1607             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1608             is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
1609             test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
1610             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1611             test = LLVMBuildAnd(builder, test, temp, "");
1612             mask = LLVMBuildOr(builder, mask, test, "");
1613          } else {
1614             LLVMTypeRef vs_elem_type = lp_build_elem_type(gallivm, vs_type);
1615             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1616             indices[0] = lp_build_const_int32(gallivm, 0);
1617             indices[1] = lp_build_const_int32(gallivm, plane_idx);
1618
1619             for (int i = 0; i < 4; ++i) {
1620                indices[2] = lp_build_const_int32(gallivm, i);
1621                plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1622                plane1 = LLVMBuildLoad2(builder, vs_elem_type, plane_ptr,
1623                                        (const char *[]){"plane_x", "plane_y", "plane_z", "plane_w"}[i]);
1624                planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1625                if (i == 0) {
1626                   sum = LLVMBuildFMul(builder, planes, cv_x, "");
1627                } else {
1628                   sum = lp_build_fmuladd(builder, planes,
1629                                          (LLVMValueRef[]){cv_x, cv_y, cv_z, cv_w}[i], sum);
1630                }
1631             }
1632
1633             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1634             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1635             test = LLVMBuildAnd(builder, test, temp, "");
1636             mask = LLVMBuildOr(builder, mask, test, "");
1637          }
1638       }
1639    }
1640    if (key->need_edgeflags) {
1641       /*
1642        * This isn't really part of clipmask but stored the same in vertex
1643        * header later, so do it here.
1644        */
1645       unsigned edge_attr = llvm->draw->vs.edgeflag_output;
1646       LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
1647       LLVMValueRef edgeflag = LLVMBuildLoad2(builder, vec_type, outputs[edge_attr][0], "");
1648       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
1649       temp = lp_build_const_int_vec(gallivm, i32_type,
1650                                     1LL << DRAW_TOTAL_CLIP_PLANES);
1651       test = LLVMBuildAnd(builder, test, temp, "");
1652       mask = LLVMBuildOr(builder, mask, test, "");
1653    }
1654    return mask;
1655 }
1656
1657
1658 /**
1659  * Returns boolean if any clipping has occurred
1660  * Used zero/one i8 value to represent boolean
1661  */
1662 static LLVMValueRef
1663 clipmask_booli8(struct gallivm_state *gallivm,
1664                 const struct lp_type vs_type,
1665                 LLVMValueRef clipmask_bool_ptr,
1666                 boolean edgeflag_in_clipmask)
1667 {
1668    LLVMBuilderRef builder = gallivm->builder;
1669    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
1670    LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
1671    LLVMValueRef ret;
1672    struct lp_build_context bldivec;
1673
1674    lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
1675
1676    /*
1677     * We need to invert the edgeflag bit from the clipmask here
1678     * (because the result is really if we want to run the pipeline or not
1679     * and we (may) need it if edgeflag was 0).
1680     */
1681    if (edgeflag_in_clipmask) {
1682       LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
1683                                                  1LL << DRAW_TOTAL_CLIP_PLANES);
1684       clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
1685    }
1686
1687    /*
1688     * XXX: probably should mask off bits from the mask which come from
1689     * vertices which were beyond the count (i.e. indices_valid for
1690     * linear fetches, for elts ones we don't have the correct mask
1691     * right now). Otherwise might run the pipeline for nothing,
1692     * though everything should still work.
1693     */
1694    ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
1695    ret = LLVMBuildZExt(builder, ret, int8_type, "");
1696    return ret;
1697 }
1698
1699 static LLVMValueRef
1700 draw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
1701                          struct lp_build_context * bld,
1702                          boolean is_vindex_indirect,
1703                          LLVMValueRef vertex_index,
1704                          boolean is_aindex_indirect,
1705                          LLVMValueRef attrib_index,
1706                          LLVMValueRef swizzle_index)
1707 {
1708    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
1709    struct gallivm_state *gallivm = bld->gallivm;
1710    LLVMBuilderRef builder = gallivm->builder;
1711    LLVMValueRef indices[3];
1712    LLVMValueRef res;
1713    struct lp_type type = bld->type;
1714
1715    if (is_vindex_indirect || is_aindex_indirect) {
1716       int i;
1717       res = bld->zero;
1718       for (i = 0; i < type.length; ++i) {
1719          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1720          LLVMValueRef vert_chan_index = vertex_index;
1721          LLVMValueRef attr_chan_index = attrib_index;
1722          LLVMValueRef channel_vec, value;
1723
1724          if (is_vindex_indirect) {
1725             vert_chan_index = LLVMBuildExtractElement(builder,
1726                                                       vertex_index, idx, "");
1727          }
1728          if (is_aindex_indirect) {
1729             attr_chan_index = LLVMBuildExtractElement(builder,
1730                                                       attrib_index, idx, "");
1731          }
1732
1733          indices[0] = vert_chan_index;
1734          indices[1] = attr_chan_index;
1735          indices[2] = swizzle_index;
1736
1737          channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1738          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
1739          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
1740
1741          res = LLVMBuildInsertElement(builder, res, value, idx, "");
1742       }
1743    } else {
1744       indices[0] = vertex_index;
1745       indices[1] = attrib_index;
1746       indices[2] = swizzle_index;
1747
1748       res = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1749       res = LLVMBuildLoad(builder, res, "");
1750    }
1751
1752    return res;
1753 }
1754
1755 static void
1756 draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
1757                          struct lp_build_context * bld,
1758                          LLVMValueRef (*outputs)[4],
1759                          LLVMValueRef emitted_vertices_vec,
1760                          LLVMValueRef mask_vec, LLVMValueRef stream_id)
1761 {
1762    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1763    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1764    struct gallivm_state *gallivm = variant->gallivm;
1765    LLVMBuilderRef builder = gallivm->builder;
1766    struct lp_type gs_type = bld->type;
1767    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1768                                                   lp_int_type(gs_type), 0);
1769    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1770    LLVMValueRef next_prim_offset =
1771       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1772    LLVMValueRef io = variant->io_ptr;
1773    unsigned i;
1774    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1775
1776    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1777    for (i = 0; i < gs_type.length; ++i) {
1778       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1779       LLVMValueRef currently_emitted =
1780          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1781       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1782       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
1783       indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
1784                                    lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
1785    }
1786
1787    LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
1788    LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1789    struct lp_build_if_state if_ctx;
1790    lp_build_if(&if_ctx, gallivm, cnd);
1791    io = lp_build_pointer_get(builder, io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));
1792
1793    if (variant->key.clamp_vertex_color) {
1794       do_clamp_vertex_color(gallivm, gs_type,
1795                             gs_info, outputs);
1796    }
1797    convert_to_aos(gallivm, variant->vertex_header_type,
1798                   io, indices,
1799                   outputs, clipmask,
1800                   gs_info->num_outputs, gs_type,
1801                   FALSE);
1802    lp_build_endif(&if_ctx);
1803 }
1804
1805 static void
1806 draw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
1807                            struct lp_build_context * bld,
1808                            LLVMValueRef total_emitted_vertices_vec_ptr,
1809                            LLVMValueRef verts_per_prim_vec,
1810                            LLVMValueRef emitted_prims_vec,
1811                            LLVMValueRef mask_vec, unsigned stream)
1812 {
1813    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1814    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1815    struct gallivm_state *gallivm = variant->gallivm;
1816    LLVMBuilderRef builder = gallivm->builder;
1817    LLVMValueRef prim_lengts_ptr =
1818       draw_gs_jit_prim_lengths(variant, variant->context_ptr);
1819    unsigned i;
1820
1821    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1822    for (i = 0; i < bld->type.length; ++i) {
1823       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1824       LLVMValueRef prims_emitted =
1825          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1826       LLVMValueRef store_ptr;
1827       LLVMValueRef num_vertices =
1828          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1829
1830       LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");
1831       struct lp_build_if_state ifthen;
1832       lp_build_if(&ifthen, gallivm, this_cond);
1833       prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1834       prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");
1835       store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
1836       store_ptr = LLVMBuildLoad(builder, store_ptr, "");
1837       store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
1838       LLVMBuildStore(builder, num_vertices, store_ptr);
1839       lp_build_endif(&ifthen);
1840    }
1841 }
1842
1843 static void
1844 draw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
1845                       LLVMValueRef total_emitted_vertices_vec,
1846                       LLVMValueRef emitted_prims_vec, unsigned stream)
1847 {
1848    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1849    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1850    struct gallivm_state *gallivm = variant->gallivm;
1851    LLVMBuilderRef builder = gallivm->builder;
1852    LLVMValueRef emitted_verts_ptr =
1853       draw_gs_jit_emitted_vertices(variant, variant->context_ptr);
1854    LLVMValueRef emitted_prims_ptr =
1855       draw_gs_jit_emitted_prims(variant, variant->context_ptr);
1856    LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);
1857
1858    emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &stream_val, 1, "");
1859    emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &stream_val, 1, "");
1860
1861    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1862    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1863 }
1864
1865 static void
1866 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1867 {
1868    struct gallivm_state *gallivm = variant->gallivm;
1869    LLVMContextRef context = gallivm->context;
1870    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1871    LLVMTypeRef arg_types[13];
1872    unsigned num_arg_types = ARRAY_SIZE(arg_types);
1873    LLVMTypeRef func_type;
1874    LLVMValueRef context_ptr;
1875    LLVMBasicBlockRef block;
1876    LLVMBuilderRef builder;
1877    char func_name[64];
1878    struct lp_type vs_type;
1879    LLVMValueRef count, fetch_elts, start_or_maxelt;
1880    LLVMValueRef vertex_id_offset;
1881    LLVMValueRef stride, step, io_itr;
1882    LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
1883    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1884    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
1885    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
1886    LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
1887    LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
1888    LLVMValueRef fake_buf_ptr, fake_buf;
1889
1890    struct draw_context *draw = llvm->draw;
1891    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1892    unsigned i, j;
1893    struct lp_build_context bld, blduivec;
1894    struct lp_build_loop_state lp_loop;
1895    struct lp_build_if_state if_ctx;
1896    const int vector_length = lp_native_vector_width / 32;
1897    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1898    struct lp_build_sampler_soa *sampler = 0;
1899    struct lp_build_image_soa *image = NULL;
1900    LLVMValueRef ret, clipmask_bool_ptr;
1901    struct draw_llvm_variant_key *key = &variant->key;
1902    /* If geometry shader is present we need to skip both the viewport
1903     * transformation and clipping otherwise the inputs to the geometry
1904     * shader will be incorrect.
1905     * The code can't handle vp transform when vs writes vp index neither
1906     * (though this would be fixable here, but couldn't just broadcast
1907     * the values).
1908     */
1909    const boolean bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||
1910                                    vs_info->writes_viewport_index;
1911    const boolean enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||
1912                                                     key->clip_z ||
1913                                                     key->clip_user ||
1914                                                     key->need_edgeflags);
1915    LLVMValueRef variant_func;
1916    const unsigned pos = draw->vs.position_output;
1917    const unsigned cv = draw->vs.clipvertex_output;
1918    boolean have_clipdist = FALSE;
1919    struct lp_bld_tgsi_system_values system_values;
1920
1921    memset(&system_values, 0, sizeof(system_values));
1922    memset(&outputs, 0, sizeof(outputs));
1923    snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");
1924
1925    i = 0;
1926    arg_types[i++] = get_context_ptr_type(variant);       /* context */
1927    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1928    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
1929    arg_types[i++] = int32_type;                          /* count */
1930    arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
1931    arg_types[i++] = int32_type;                          /* stride */
1932    arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
1933    arg_types[i++] = int32_type;                          /* instance_id */
1934    arg_types[i++] = int32_type;                          /* vertex_id_offset */
1935    arg_types[i++] = int32_type;                          /* start_instance */
1936    arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
1937    arg_types[i++] = int32_type;                          /* draw_id */
1938    arg_types[i++] = int32_type;                          /* view_id */
1939
1940    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
1941                                 arg_types, num_arg_types, 0);
1942
1943    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
1944    variant->function = variant_func;
1945
1946    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1947    for (i = 0; i < num_arg_types; ++i)
1948       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1949          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
1950
1951    if (gallivm->cache && gallivm->cache->data_size)
1952       return;
1953    context_ptr               = LLVMGetParam(variant_func, 0);
1954    io_ptr                    = LLVMGetParam(variant_func, 1);
1955    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
1956    count                     = LLVMGetParam(variant_func, 3);
1957    /*
1958     * XXX: the maxelt part is unused. Not really useful, since we cannot
1959     * get index buffer overflows due to vsplit (which provides its own
1960     * elts buffer, with a different size than what's passed in here).
1961     */
1962    start_or_maxelt           = LLVMGetParam(variant_func, 4);
1963    /*
1964     * XXX: stride is actually unused. The stride we use is strictly calculated
1965     * from the number of outputs (including the draw_extra outputs).
1966     * Should probably fix some day (we need a new vs just because of extra
1967     * outputs which the generated vs won't touch).
1968     */
1969    stride                    = LLVMGetParam(variant_func, 5);
1970    vb_ptr                    = LLVMGetParam(variant_func, 6);
1971    system_values.instance_id = LLVMGetParam(variant_func, 7);
1972    vertex_id_offset          = LLVMGetParam(variant_func, 8);
1973    system_values.base_instance = LLVMGetParam(variant_func, 9);
1974    fetch_elts                = LLVMGetParam(variant_func, 10);
1975    system_values.draw_id     = LLVMGetParam(variant_func, 11);
1976    system_values.view_index  = LLVMGetParam(variant_func, 12);
1977
1978    lp_build_name(context_ptr, "context");
1979    lp_build_name(io_ptr, "io");
1980    lp_build_name(vbuffers_ptr, "vbuffers");
1981    lp_build_name(count, "count");
1982    lp_build_name(start_or_maxelt, "start_or_maxelt");
1983    lp_build_name(stride, "stride");
1984    lp_build_name(vb_ptr, "vb");
1985    lp_build_name(system_values.instance_id, "instance_id");
1986    lp_build_name(vertex_id_offset, "vertex_id_offset");
1987    lp_build_name(system_values.base_instance, "start_instance");
1988    lp_build_name(fetch_elts, "fetch_elts");
1989    lp_build_name(system_values.draw_id, "draw_id");
1990
1991    /*
1992     * Function body
1993     */
1994
1995    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
1996    builder = gallivm->builder;
1997    LLVMPositionBuilderAtEnd(builder, block);
1998
1999    memset(&vs_type, 0, sizeof vs_type);
2000    vs_type.floating = TRUE; /* floating point values */
2001    vs_type.sign = TRUE;     /* values are signed */
2002    vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
2003    vs_type.width = 32;      /* 32-bit float */
2004    vs_type.length = vector_length;
2005
2006    lp_build_context_init(&bld, gallivm, lp_type_uint(32));
2007    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
2008
2009    /* hold temporary "bool" clipmask */
2010    clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
2011
2012    fake_buf = lp_build_alloca_undef(gallivm,
2013                  LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
2014    fake_buf = LLVMBuildBitCast(builder, fake_buf,
2015                  LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
2016    fake_buf_ptr = LLVMBuildGEP2(builder, LLVMInt8TypeInContext(context), fake_buf, &bld.zero, 1, "");
2017
2018    /* code generated texture sampling */
2019    sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key),
2020                                           MAX2(key->nr_samplers,
2021                                                key->nr_sampler_views));
2022    image = draw_llvm_image_soa_create(draw_llvm_variant_key_images(key),
2023                                       key->nr_images);
2024
2025    step = lp_build_const_int32(gallivm, vector_length);
2026
2027    ind_vec = blduivec.undef;
2028    for (i = 0; i < vs_type.length; i++) {
2029       LLVMValueRef index = lp_build_const_int32(gallivm, i);
2030       ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
2031    }
2032
2033    have_elts = LLVMBuildICmp(builder, LLVMIntNE,
2034                              LLVMConstPointerNull(arg_types[10]), fetch_elts, "");
2035
2036    fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
2037    fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
2038    /*
2039     * Only needed for non-indexed path.
2040     */
2041    start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt);
2042
2043    /*
2044     * Pre-calculate everything which is constant per shader invocation.
2045     */
2046    for (j = 0; j < key->nr_vertex_elements; ++j) {
2047       LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
2048       LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
2049       struct pipe_vertex_element *velem = &key->vertex_element[j];
2050       LLVMValueRef vb_index =
2051          lp_build_const_int32(gallivm, velem->vertex_buffer_index);
2052       LLVMValueRef bsize = lp_build_const_int32(gallivm,
2053                                                 util_format_get_blocksize(velem->src_format));
2054       LLVMValueRef src_offset = lp_build_const_int32(gallivm,
2055                                                      velem->src_offset);
2056       struct lp_build_if_state if_ctx;
2057
2058       if (velem->src_format != PIPE_FORMAT_NONE) {
2059          vbuffer_ptr = LLVMBuildGEP2(builder, variant->buffer_type, vbuffers_ptr, &vb_index, 1, "");
2060          vb_info = LLVMBuildGEP2(builder, variant->vb_type, vb_ptr, &vb_index, 1, "");
2061          vb_stride[j] = draw_jit_vbuffer_stride(gallivm, variant->vb_type, vb_info);
2062          vb_stride[j] = LLVMBuildZExt(gallivm->builder, vb_stride[j],
2063                                       LLVMInt32TypeInContext(context), "");
2064          vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, variant->vb_type, vb_info);
2065          map_ptr[j] = draw_jit_dvbuffer_map(gallivm, variant->buffer_type, vbuffer_ptr);
2066          buffer_size = draw_jit_dvbuffer_size(gallivm, variant->buffer_type, vbuffer_ptr);
2067
2068          ofbit = NULL;
2069          /*
2070           * We'll set buffer_size_adj to zero if we have of, so it will
2071           * always overflow later automatically without having to keep ofbit.
2072           * Overflows (with normal wraparound) doing the actual offset
2073           * calculation should be ok, just not for the buffer size calc.
2074           * It would also be possible to detect such overflows and return
2075           * zeros if that happens, but this would be more complex.
2076           */
2077          buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
2078          tmp = lp_build_sub(&bld, bsize, bld.one);
2079          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
2080                                                      &ofbit);
2081          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
2082                                                      buf_offset, &ofbit);
2083
2084          /*
2085           * We can't easily set fake vertex buffers outside the generated code.
2086           * Hence, set fake vertex buffers here instead basically, so fetch
2087           * code can always fetch using offset 0, eliminating all control flow
2088           * inside the main loop.
2089           * (Alternatively, could have control flow per vector skipping fetch
2090           * if ofbit is true.)
2091           */
2092          if (velem->instance_divisor) {
2093             /*
2094              * Index is equal to the start instance plus the number of current
2095              * instance divided by the divisor. In this case we compute it as:
2096              * index = start_instance + (instance_id  / divisor).
2097              * Note we could actually do the fetch here, outside the loop -
2098              * it's all constant, hopefully llvm recognizes this.
2099              */
2100             LLVMValueRef current_instance;
2101             current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
2102                                              lp_build_const_int32(gallivm,
2103                                                                   velem->instance_divisor),
2104                                              "instance_divisor");
2105             instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,
2106                                                        current_instance, &ofbit);
2107          }
2108
2109          buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
2110                                               buffer_size_adj[j], "");
2111
2112          LLVMTypeRef byte_type = LLVMInt8TypeInContext(context);
2113          LLVMTypeRef byte_ptr_type = LLVMPointerType(byte_type, 0);
2114          temp_ptr = lp_build_alloca_undef(gallivm, byte_ptr_type, "");
2115
2116          lp_build_if(&if_ctx, gallivm, ofbit);
2117          {
2118             LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
2119          }
2120          lp_build_else(&if_ctx);
2121          {
2122             map_ptr[j] = LLVMBuildGEP2(builder, byte_type, map_ptr[j], &buf_offset, 1, "");
2123             LLVMBuildStore(builder, map_ptr[j], temp_ptr);
2124          }
2125          lp_build_endif(&if_ctx);
2126          map_ptr[j] = LLVMBuildLoad2(builder, byte_ptr_type, temp_ptr, "map_ptr");
2127
2128          if (0) {
2129             lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
2130                             lp_build_const_int32(gallivm, j),
2131                             vb_index, vb_stride[j]);
2132             lp_build_printf(gallivm,
2133                             "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
2134                             vb_buffer_offset, src_offset, buf_offset);
2135             lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
2136                             buffer_size, bsize);
2137             lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
2138          }
2139       }
2140    }
2141
2142    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
2143    {
2144       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
2145       LLVMValueRef io;
2146       LLVMValueRef clipmask;   /* holds the clipmask value */
2147       LLVMValueRef true_index_array, index_store;
2148       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
2149
2150       io_itr = lp_loop.counter;
2151
2152       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &io_itr, 1, "");
2153 #if DEBUG_STORE
2154       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
2155                       io_itr, io, lp_loop.counter);
2156 #endif
2157
2158       true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
2159       true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
2160
2161       LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
2162       /*
2163        * Limit indices to fetch_max, otherwise might try to access indices
2164        * beyond index buffer (or rather vsplit elt buffer) size.
2165        * Could probably safely (?) skip this for non-indexed draws and
2166        * simplify things minimally (by removing it could combine the ind_vec
2167        * and start_vec adds). I think the only effect for non-indexed draws will
2168        * be that for the invalid elements they will be all fetched from the
2169        * same location as the last valid one, but noone should really care.
2170        */
2171       true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
2172
2173       index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
2174
2175       lp_build_if(&if_ctx, gallivm, have_elts);
2176       {
2177          /*
2178           * Note: you'd expect some comparison/clamp against fetch_elt_max
2179           * here.
2180           * There used to be one here but it was incorrect: overflow was
2181           * detected if index > fetch_elt_max - but the correct condition
2182           * would be index >= fetch_elt_max (since this is just size of elts
2183           * buffer / element size).
2184           * Using the correct condition however will cause failures - due to
2185           * vsplit/vcache code which rebases indices. So, as an example, if
2186           * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
2187           * replace all invalid indices with 0 - which in case of elt_bias
2188           * not being zero will get a different fetch index than the valid
2189           * index 0. So, just rely on vsplit code preventing out-of-bounds
2190           * fetches. This is also why it's safe to do elts fetch even if there
2191           * was no index buffer bound - the real buffer is never seen here, at
2192           * least not if there are index buffer overflows...
2193           */
2194
2195          /*
2196           * XXX should not have to do this, as scale can be handled
2197           * natively by loads (hits asserts though).
2198           */
2199          tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
2200          fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
2201                                        LLVMPointerType(LLVMInt8TypeInContext(context),
2202                                                        0), "");
2203          tmp = lp_build_gather(gallivm, vs_type.length,
2204                                32, bld.type, TRUE,
2205                                fetch_elts, tmp, FALSE);
2206          LLVMBuildStore(builder, tmp, index_store);
2207       }
2208       lp_build_else(&if_ctx);
2209       {
2210          tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
2211          LLVMBuildStore(builder, tmp, index_store);
2212       }
2213       lp_build_endif(&if_ctx);
2214
2215       true_index_array = LLVMBuildLoad2(builder, blduivec.vec_type, index_store, "");
2216
2217       for (j = 0; j < key->nr_vertex_elements; ++j) {
2218          struct pipe_vertex_element *velem = &key->vertex_element[j];
2219          const struct util_format_description *format_desc =
2220             util_format_description(velem->src_format);
2221
2222          if (format_desc->format == PIPE_FORMAT_NONE) {
2223             for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
2224                inputs[j][i] = lp_build_zero(gallivm, vs_type);
2225             }
2226          }
2227          else if (velem->instance_divisor) {
2228             fetch_instanced(gallivm, format_desc, vs_type,
2229                             vb_stride[j], map_ptr[j],
2230                             buffer_size_adj[j],
2231                             inputs[j], instance_index[j]);
2232          }
2233          else {
2234             fetch_vector(gallivm, format_desc, vs_type,
2235                          vb_stride[j], map_ptr[j],
2236                          buffer_size_adj[j],
2237                          inputs[j], true_index_array);
2238          }
2239       }
2240
2241       struct lp_build_mask_context mask;
2242
2243       lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);
2244       /* In the paths with elts vertex id has to be unaffected by the
2245        * index bias and because indices inside our elements array have
2246        * already had index bias applied we need to subtract it here to
2247        * get back to the original index.
2248        * in the linear paths vertex id has to be unaffected by the
2249        * original start index and because we abuse the 'start' variable
2250        * to either represent the actual start index or the index at which
2251        * the primitive was split (we split rendering into chunks of at
2252        * most 4095-vertices) we need to back out the original start
2253        * index out of our vertex id here.
2254        * for ARB_shader_draw_parameters, base_vertex should be 0 for non-indexed draws.
2255        */
2256       LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));
2257       system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);
2258       /* first vertex is for Vulkan base vertex support */
2259       LLVMValueRef first_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, start_or_maxelt);
2260       system_values.firstvertex = lp_build_broadcast_scalar(&blduivec, first_vertex);
2261       system_values.vertex_id = true_index_array;
2262       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
2263                                                     lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");
2264
2265       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
2266       generate_vs(variant,
2267                   builder,
2268                   vs_type,
2269                   outputs,
2270                   ptr_aos,
2271                   &system_values,
2272                   context_ptr,
2273                   sampler,
2274                   image,
2275                   key->clamp_vertex_color,
2276                   &mask);
2277
2278       lp_build_mask_end(&mask);
2279       if (pos != -1 && cv != -1) {
2280          /* store original positions in clip before further manipulation */
2281          store_clip(gallivm, vs_type, variant->vertex_header_type, io, outputs, pos);
2282
2283          /* do cliptest */
2284          if (enable_cliptest) {
2285             LLVMValueRef temp = LLVMBuildLoad2(builder, blduivec.vec_type, clipmask_bool_ptr, "");
2286             /* allocate clipmask, assign it integer type */
2287             clipmask = generate_clipmask(llvm,
2288                                          gallivm,
2289                                          vs_type,
2290                                          outputs,
2291                                          key,
2292                                          variant->context_type,
2293                                          context_ptr, &have_clipdist);
2294             temp = LLVMBuildOr(builder, clipmask, temp, "");
2295             /* store temporary clipping boolean value */
2296             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
2297          }
2298          else {
2299             clipmask = blduivec.zero;
2300          }
2301
2302          /* do viewport mapping */
2303          if (!bypass_viewport) {
2304             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
2305          }
2306       }
2307       else {
2308          clipmask = blduivec.zero;
2309       }
2310
2311       /* store clipmask in vertex header,
2312        * original positions in clip
2313        * and transformed positions in data
2314        */
2315       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
2316                      vs_info->num_outputs, vs_type,
2317                      enable_cliptest && key->need_edgeflags);
2318    }
2319    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
2320
2321    sampler->destroy(sampler);
2322    image->destroy(image);
2323
2324    /* return clipping boolean value for function */
2325    ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr,
2326                          enable_cliptest && key->need_edgeflags);
2327
2328    LLVMBuildRet(builder, ret);
2329
2330    gallivm_verify_function(gallivm, variant_func);
2331 }
2332
2333
2334 struct draw_llvm_variant_key *
2335 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2336 {
2337    unsigned i;
2338    struct draw_llvm_variant_key *key;
2339    struct draw_sampler_static_state *draw_sampler;
2340    struct draw_image_static_state *draw_image;
2341
2342    key = (struct draw_llvm_variant_key *)store;
2343
2344    memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
2345
2346
2347    /* will have to rig this up properly later */
2348    key->clip_xy = llvm->draw->clip_xy;
2349    key->clip_z = llvm->draw->clip_z;
2350    key->clip_user = llvm->draw->clip_user;
2351    key->bypass_viewport = llvm->draw->bypass_viewport;
2352    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
2353    /* XXX assumes edgeflag output not at 0 */
2354    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
2355    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
2356    key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;
2357    key->num_outputs = draw_total_vs_outputs(llvm->draw);
2358
2359    key->clamp_vertex_color = !key->has_gs_or_tes &&
2360       llvm->draw->rasterizer->clamp_vertex_color;
2361
2362    /* All variants of this shader will have the same value for
2363     * nr_samplers.  Not yet trying to compact away holes in the
2364     * sampler array.
2365     */
2366    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2367    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2368       key->nr_sampler_views =
2369          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2370    }
2371    else {
2372       key->nr_sampler_views = key->nr_samplers;
2373    }
2374
2375    key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2376
2377    /* Presumably all variants of the shader should have the same
2378     * number of vertex elements - ie the number of shader inputs.
2379     * NOTE: we NEED to store the needed number of needed inputs
2380     * here, not the number of provided elements to match keysize
2381     * (and the offset of sampler state in the key).
2382     * If we have excess number of vertex elements, this is valid,
2383     * but the excess ones don't matter.
2384     * If we don't have enough vertex elements (which looks not really
2385     * valid but we'll handle it gracefully) fill out missing ones with
2386     * zero (we'll recognize these later by PIPE_FORMAT_NONE).
2387     */
2388    key->nr_vertex_elements =
2389       llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
2390
2391    if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
2392       debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
2393                    key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
2394       memset(key->vertex_element, 0,
2395              sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
2396    }
2397    memcpy(key->vertex_element,
2398           llvm->draw->pt.vertex_element,
2399           sizeof(struct pipe_vertex_element) *
2400              MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
2401
2402    draw_sampler = draw_llvm_variant_key_samplers(key);
2403    memset(draw_sampler, 0,
2404           MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2405
2406    for (i = 0 ; i < key->nr_samplers; i++) {
2407       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2408                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
2409    }
2410    for (i = 0 ; i < key->nr_sampler_views; i++) {
2411       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2412                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
2413    }
2414
2415    draw_image = draw_llvm_variant_key_images(key);
2416    memset(draw_image, 0,
2417           key->nr_images * sizeof *draw_image);
2418    for (i = 0; i < key->nr_images; i++) {
2419       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2420                                             llvm->draw->images[PIPE_SHADER_VERTEX][i]);
2421    }
2422    return key;
2423 }
2424
2425
2426 void
2427 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
2428 {
2429    unsigned i;
2430    struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
2431    struct draw_image_static_state *image = draw_llvm_variant_key_images(key);
2432    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2433    debug_printf("clip_xy = %u\n", key->clip_xy);
2434    debug_printf("clip_z = %u\n", key->clip_z);
2435    debug_printf("clip_user = %u\n", key->clip_user);
2436    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
2437    debug_printf("clip_halfz = %u\n", key->clip_halfz);
2438    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
2439    debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);
2440    debug_printf("ucp_enable = %u\n", key->ucp_enable);
2441
2442    for (i = 0 ; i < key->nr_vertex_elements; i++) {
2443       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
2444       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
2445       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
2446       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
2447    }
2448
2449    for (i = 0 ; i < key->nr_sampler_views; i++) {
2450       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
2451    }
2452
2453    for (i = 0 ; i < key->nr_images; i++)
2454       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2455 }
2456
2457
2458 void
2459 draw_llvm_set_mapped_texture(struct draw_context *draw,
2460                              enum pipe_shader_type shader_stage,
2461                              unsigned sview_idx,
2462                              uint32_t width, uint32_t height, uint32_t depth,
2463                              uint32_t first_level, uint32_t last_level,
2464                              uint32_t num_samples,
2465                              uint32_t sample_stride,
2466                              const void *base_ptr,
2467                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
2468                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
2469                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
2470 {
2471    unsigned j;
2472    struct draw_jit_texture *jit_tex;
2473
2474    switch (shader_stage) {
2475    case PIPE_SHADER_VERTEX:
2476       assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_context.textures));
2477       jit_tex = &draw->llvm->jit_context.textures[sview_idx];
2478       break;
2479    case PIPE_SHADER_GEOMETRY:
2480       assert(sview_idx < ARRAY_SIZE(draw->llvm->gs_jit_context.textures));
2481       jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
2482       break;
2483    case PIPE_SHADER_TESS_CTRL:
2484       assert(sview_idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.textures));
2485       jit_tex = &draw->llvm->tcs_jit_context.textures[sview_idx];
2486       break;
2487    case PIPE_SHADER_TESS_EVAL:
2488       assert(sview_idx < ARRAY_SIZE(draw->llvm->tes_jit_context.textures));
2489       jit_tex = &draw->llvm->tes_jit_context.textures[sview_idx];
2490       break;
2491    default:
2492       assert(0);
2493       return;
2494    }
2495
2496    jit_tex->width = width;
2497    jit_tex->height = height;
2498    jit_tex->depth = depth;
2499    jit_tex->first_level = first_level;
2500    jit_tex->last_level = last_level;
2501    jit_tex->base = base_ptr;
2502    jit_tex->num_samples = num_samples;
2503    jit_tex->sample_stride = sample_stride;
2504
2505    for (j = first_level; j <= last_level; j++) {
2506       jit_tex->mip_offsets[j] = mip_offsets[j];
2507       jit_tex->row_stride[j] = row_stride[j];
2508       jit_tex->img_stride[j] = img_stride[j];
2509    }
2510 }
2511
2512 void
2513 draw_llvm_set_mapped_image(struct draw_context *draw,
2514                            enum pipe_shader_type shader_stage,
2515                            unsigned idx,
2516                            uint32_t width, uint32_t height, uint32_t depth,
2517                            const void *base_ptr,
2518                            uint32_t row_stride,
2519                            uint32_t img_stride,
2520                            uint32_t num_samples,
2521                            uint32_t sample_stride)
2522 {
2523    struct draw_jit_image *jit_image;
2524
2525    switch (shader_stage) {
2526    case PIPE_SHADER_VERTEX:
2527       assert(idx < ARRAY_SIZE(draw->llvm->jit_context.images));
2528       jit_image = &draw->llvm->jit_context.images[idx];
2529       break;
2530    case PIPE_SHADER_GEOMETRY:
2531       assert(idx < ARRAY_SIZE(draw->llvm->gs_jit_context.images));
2532       jit_image = &draw->llvm->gs_jit_context.images[idx];
2533       break;
2534    case PIPE_SHADER_TESS_CTRL:
2535       assert(idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.images));
2536       jit_image = &draw->llvm->tcs_jit_context.images[idx];
2537       break;
2538    case PIPE_SHADER_TESS_EVAL:
2539       assert(idx < ARRAY_SIZE(draw->llvm->tes_jit_context.images));
2540       jit_image = &draw->llvm->tes_jit_context.images[idx];
2541       break;
2542    default:
2543       assert(0);
2544       return;
2545    }
2546
2547    jit_image->width = width;
2548    jit_image->height = height;
2549    jit_image->depth = depth;
2550    jit_image->base = base_ptr;
2551
2552    jit_image->row_stride = row_stride;
2553    jit_image->img_stride = img_stride;
2554    jit_image->num_samples = num_samples;
2555    jit_image->sample_stride = sample_stride;
2556 }
2557
2558
2559 void
2560 draw_llvm_set_sampler_state(struct draw_context *draw,
2561                             enum pipe_shader_type shader_type)
2562 {
2563    unsigned i;
2564
2565    switch (shader_type) {
2566    case PIPE_SHADER_VERTEX:
2567       for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
2568          struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
2569
2570          if (draw->samplers[PIPE_SHADER_VERTEX][i]) {
2571             const struct pipe_sampler_state *s
2572                = draw->samplers[PIPE_SHADER_VERTEX][i];
2573             jit_sam->min_lod = s->min_lod;
2574             jit_sam->max_lod = s->max_lod;
2575             jit_sam->lod_bias = s->lod_bias;
2576             jit_sam->max_aniso = s->max_anisotropy;
2577             COPY_4V(jit_sam->border_color, s->border_color.f);
2578          }
2579       }
2580       break;
2581    case PIPE_SHADER_GEOMETRY:
2582       for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
2583          struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
2584
2585          if (draw->samplers[PIPE_SHADER_GEOMETRY][i]) {
2586             const struct pipe_sampler_state *s
2587                = draw->samplers[PIPE_SHADER_GEOMETRY][i];
2588             jit_sam->min_lod = s->min_lod;
2589             jit_sam->max_lod = s->max_lod;
2590             jit_sam->lod_bias = s->lod_bias;
2591             jit_sam->max_aniso = s->max_anisotropy;
2592             COPY_4V(jit_sam->border_color, s->border_color.f);
2593          }
2594       }
2595       break;
2596    case PIPE_SHADER_TESS_CTRL:
2597       for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_CTRL]; i++) {
2598          struct draw_jit_sampler *jit_sam = &draw->llvm->tcs_jit_context.samplers[i];
2599
2600          if (draw->samplers[PIPE_SHADER_TESS_CTRL][i]) {
2601             const struct pipe_sampler_state *s
2602                = draw->samplers[PIPE_SHADER_TESS_CTRL][i];
2603             jit_sam->min_lod = s->min_lod;
2604             jit_sam->max_lod = s->max_lod;
2605             jit_sam->lod_bias = s->lod_bias;
2606             jit_sam->max_aniso = s->max_anisotropy;
2607             COPY_4V(jit_sam->border_color, s->border_color.f);
2608          }
2609       }
2610       break;
2611    case PIPE_SHADER_TESS_EVAL:
2612       for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_EVAL]; i++) {
2613          struct draw_jit_sampler *jit_sam = &draw->llvm->tes_jit_context.samplers[i];
2614
2615          if (draw->samplers[PIPE_SHADER_TESS_EVAL][i]) {
2616             const struct pipe_sampler_state *s
2617                = draw->samplers[PIPE_SHADER_TESS_EVAL][i];
2618             jit_sam->min_lod = s->min_lod;
2619             jit_sam->max_lod = s->max_lod;
2620             jit_sam->lod_bias = s->lod_bias;
2621             jit_sam->max_aniso = s->max_anisotropy;
2622             COPY_4V(jit_sam->border_color, s->border_color.f);
2623          }
2624       }
2625       break;
2626    default:
2627       assert(0);
2628       break;
2629    }
2630 }
2631
2632
2633 void
2634 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
2635 {
2636    struct draw_llvm *llvm = variant->llvm;
2637
2638    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2639       debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
2640                     variant->shader->variants_cached, llvm->nr_variants);
2641    }
2642
2643    gallivm_destroy(variant->gallivm);
2644
2645    list_del(&variant->list_item_local.list);
2646    variant->shader->variants_cached--;
2647    list_del(&variant->list_item_global.list);
2648    llvm->nr_variants--;
2649    FREE(variant);
2650 }
2651
2652
2653 /**
2654  * Create LLVM types for various structures.
2655  */
2656 static void
2657 create_gs_jit_types(struct draw_gs_llvm_variant *var)
2658 {
2659    struct gallivm_state *gallivm = var->gallivm;
2660    LLVMTypeRef texture_type, sampler_type, image_type, buffer_type;
2661
2662    texture_type = create_jit_texture_type(gallivm, "texture");
2663    sampler_type = create_jit_sampler_type(gallivm, "sampler");
2664    image_type = create_jit_image_type(gallivm, "image");
2665    buffer_type = lp_build_create_jit_buffer_type(gallivm);
2666
2667    var->context_type = create_gs_jit_context_type(gallivm,
2668                                              var->shader->base.vector_length,
2669                                              buffer_type,
2670                                              texture_type, sampler_type,
2671                                              image_type,
2672                                              "draw_gs_jit_context");
2673    var->context_ptr_type = LLVMPointerType(var->context_type, 0);
2674
2675    var->input_array_type = create_gs_jit_input_type(gallivm);
2676 }
2677
2678 static LLVMTypeRef
2679 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
2680 {
2681    if (!variant->context_ptr_type)
2682       create_gs_jit_types(variant);
2683    return variant->context_ptr_type;
2684 }
2685
2686 static LLVMValueRef
2687 generate_mask_value(struct draw_gs_llvm_variant *variant,
2688                     struct lp_type gs_type)
2689 {
2690    struct gallivm_state *gallivm = variant->gallivm;
2691    LLVMBuilderRef builder = gallivm->builder;
2692    struct lp_type mask_type = lp_int_type(gs_type);
2693    LLVMValueRef num_prims;
2694    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2695    unsigned i;
2696
2697    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
2698                                   variant->num_prims);
2699    for (i = 0; i < gs_type.length; i++) {
2700       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2701       mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
2702    }
2703    mask_val = lp_build_compare(gallivm, mask_type,
2704                                PIPE_FUNC_GREATER, num_prims, mask_val);
2705
2706    return mask_val;
2707 }
2708
2709 static void
2710 draw_gs_llvm_generate(struct draw_llvm *llvm,
2711                       struct draw_gs_llvm_variant *variant)
2712 {
2713    struct gallivm_state *gallivm = variant->gallivm;
2714    LLVMContextRef context = gallivm->context;
2715    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2716    LLVMTypeRef arg_types[8];
2717    LLVMTypeRef func_type;
2718    LLVMValueRef variant_func;
2719    LLVMValueRef context_ptr;
2720    LLVMValueRef prim_id_ptr;
2721    LLVMBasicBlockRef block;
2722    LLVMBuilderRef builder;
2723    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2724    struct lp_build_sampler_soa *sampler = 0;
2725    struct lp_build_image_soa *image = NULL;
2726    struct lp_build_context bld;
2727    struct lp_bld_tgsi_system_values system_values;
2728    char func_name[64];
2729    struct lp_type gs_type;
2730    unsigned i;
2731    struct draw_gs_llvm_iface gs_iface;
2732    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2733    LLVMValueRef consts_ptr;
2734    LLVMValueRef ssbos_ptr;
2735    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2736    struct lp_build_mask_context mask;
2737    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2738    unsigned vector_length = variant->shader->base.vector_length;
2739
2740    memset(&system_values, 0, sizeof(system_values));
2741    memset(&outputs, 0, sizeof(outputs));
2742
2743    snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");
2744
2745    assert(variant->vertex_header_ptr_type);
2746
2747    LLVMTypeRef prim_id_type = LLVMVectorType(int32_type, vector_length);
2748    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
2749    arg_types[1] = variant->input_array_type;           /* input */
2750    arg_types[2] = LLVMPointerType(variant->vertex_header_ptr_type, 0);     /* vertex_header */
2751    arg_types[3] = int32_type;                          /* num_prims */
2752    arg_types[4] = int32_type;                          /* instance_id */
2753    arg_types[5] = LLVMPointerType(prim_id_type, 0);    /* prim_id_ptr */
2754    arg_types[6] = int32_type;
2755    arg_types[7] = int32_type;
2756
2757    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
2758
2759    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2760
2761    variant->function = variant_func;
2762
2763    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2764
2765    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
2766       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
2767          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2768
2769    if (gallivm->cache && gallivm->cache->data_size)
2770       return;
2771    context_ptr               = LLVMGetParam(variant_func, 0);
2772    input_array               = LLVMGetParam(variant_func, 1);
2773    io_ptr                    = LLVMGetParam(variant_func, 2);
2774    num_prims                 = LLVMGetParam(variant_func, 3);
2775    system_values.instance_id = LLVMGetParam(variant_func, 4);
2776    prim_id_ptr               = LLVMGetParam(variant_func, 5);
2777    system_values.invocation_id = LLVMGetParam(variant_func, 6);
2778    system_values.view_index  = LLVMGetParam(variant_func, 7);
2779
2780    lp_build_name(context_ptr, "context");
2781    lp_build_name(input_array, "input");
2782    lp_build_name(io_ptr, "io");
2783    lp_build_name(num_prims, "num_prims");
2784    lp_build_name(system_values.instance_id, "instance_id");
2785    lp_build_name(prim_id_ptr, "prim_id_ptr");
2786    lp_build_name(system_values.invocation_id, "invocation_id");
2787    lp_build_name(system_values.view_index, "view_index");
2788
2789    variant->context_ptr = context_ptr;
2790    variant->io_ptr = io_ptr;
2791    variant->num_prims = num_prims;
2792
2793    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2794    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2795    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2796    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2797    gs_iface.input = input_array;
2798    gs_iface.variant = variant;
2799
2800    /*
2801     * Function body
2802     */
2803
2804    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2805    builder = gallivm->builder;
2806    LLVMPositionBuilderAtEnd(builder, block);
2807
2808    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2809
2810    memset(&gs_type, 0, sizeof gs_type);
2811    gs_type.floating = TRUE; /* floating point values */
2812    gs_type.sign = TRUE;     /* values are signed */
2813    gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
2814    gs_type.width = 32;      /* 32-bit float */
2815    gs_type.length = vector_length;
2816
2817    consts_ptr = draw_gs_jit_context_constants(variant, context_ptr);
2818
2819    ssbos_ptr = draw_gs_jit_context_ssbos(variant, context_ptr);
2820
2821    /* code generated texture sampling */
2822    sampler = draw_llvm_sampler_soa_create(variant->key.samplers,
2823                                           MAX2(variant->key.nr_samplers,
2824                                                variant->key.nr_sampler_views));
2825    image = draw_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),
2826                                       variant->key.nr_images);
2827    mask_val = generate_mask_value(variant, gs_type);
2828    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2829
2830    if (gs_info->uses_primid) {
2831       system_values.prim_id = LLVMBuildLoad2(builder, prim_id_type, prim_id_ptr, "prim_id");
2832    }
2833
2834    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2835       if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2836          tgsi_dump(tokens, 0);
2837       else
2838          nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);
2839       draw_gs_llvm_dump_variant_key(&variant->key);
2840    }
2841
2842    struct lp_build_tgsi_params params;
2843    memset(&params, 0, sizeof(params));
2844
2845    params.type = gs_type;
2846    params.mask = &mask;
2847    params.consts_ptr = consts_ptr;
2848    params.system_values = &system_values;
2849    params.context_ptr = context_ptr;
2850    params.sampler = sampler;
2851    params.info = &llvm->draw->gs.geometry_shader->info;
2852    params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;
2853    params.ssbo_ptr = ssbos_ptr;
2854    params.image = image;
2855    params.gs_vertex_streams = variant->shader->base.num_vertex_streams;
2856    params.aniso_filter_table = draw_gs_jit_context_aniso_filter_table(variant, context_ptr);
2857
2858    if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2859       lp_build_tgsi_soa(variant->gallivm,
2860                         tokens,
2861                         &params,
2862                         outputs);
2863    else
2864       lp_build_nir_soa(variant->gallivm,
2865                        llvm->draw->gs.geometry_shader->state.ir.nir,
2866                        &params,
2867                        outputs);
2868
2869    sampler->destroy(sampler);
2870    image->destroy(image);
2871
2872    lp_build_mask_end(&mask);
2873
2874    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2875
2876    gallivm_verify_function(gallivm, variant_func);
2877 }
2878
2879 struct draw_gs_llvm_variant *
2880 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
2881                             unsigned num_outputs,
2882                             const struct draw_gs_llvm_variant_key *key)
2883 {
2884    struct draw_gs_llvm_variant *variant;
2885    struct llvm_geometry_shader *shader =
2886       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2887    char module_name[64];
2888    unsigned char ir_sha1_cache_key[20];
2889    struct lp_cached_code cached = { 0 };
2890    bool needs_caching = false;
2891
2892    variant = MALLOC(sizeof *variant +
2893                     shader->variant_key_size -
2894                     sizeof variant->key);
2895    if (!variant)
2896       return NULL;
2897
2898    variant->llvm = llvm;
2899    variant->shader = shader;
2900
2901    snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
2902             variant->shader->variants_cached);
2903
2904    memcpy(&variant->key, key, shader->variant_key_size);
2905
2906    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
2907       draw_get_ir_cache_key(shader->base.state.ir.nir,
2908                             key,
2909                             shader->variant_key_size,
2910                             num_outputs,
2911                             ir_sha1_cache_key);
2912
2913       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
2914                                          &cached,
2915                                          ir_sha1_cache_key);
2916       if (!cached.data_size)
2917          needs_caching = true;
2918    }
2919    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
2920
2921    create_gs_jit_types(variant);
2922
2923    variant->vertex_header_type = create_jit_vertex_header(variant->gallivm, num_outputs);
2924    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
2925
2926    draw_gs_llvm_generate(llvm, variant);
2927
2928    gallivm_compile_module(variant->gallivm);
2929
2930    variant->jit_func = (draw_gs_jit_func)
2931          gallivm_jit_function(variant->gallivm, variant->function);
2932
2933    if (needs_caching)
2934       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
2935                                            &cached,
2936                                            ir_sha1_cache_key);
2937    gallivm_free_ir(variant->gallivm);
2938
2939    variant->list_item_global.base = variant;
2940    variant->list_item_local.base = variant;
2941    /*variant->no = */shader->variants_created++;
2942    variant->list_item_global.base = variant;
2943
2944    return variant;
2945 }
2946
2947 void
2948 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
2949 {
2950    struct draw_llvm *llvm = variant->llvm;
2951
2952    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2953       debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
2954                     variant->shader->variants_cached, llvm->nr_gs_variants);
2955    }
2956
2957    gallivm_destroy(variant->gallivm);
2958
2959    list_del(&variant->list_item_local.list);
2960    variant->shader->variants_cached--;
2961    list_del(&variant->list_item_global.list);
2962    llvm->nr_gs_variants--;
2963    FREE(variant);
2964 }
2965
2966 struct draw_gs_llvm_variant_key *
2967 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2968 {
2969    unsigned i;
2970    struct draw_gs_llvm_variant_key *key;
2971    struct draw_sampler_static_state *draw_sampler;
2972    struct draw_image_static_state *draw_image;
2973
2974    key = (struct draw_gs_llvm_variant_key *)store;
2975
2976    memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
2977
2978    key->num_outputs = draw_total_gs_outputs(llvm->draw);
2979
2980    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color;
2981
2982    /* All variants of this shader will have the same value for
2983     * nr_samplers.  Not yet trying to compact away holes in the
2984     * sampler array.
2985     */
2986    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2987    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2988       key->nr_sampler_views =
2989          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2990    }
2991    else {
2992       key->nr_sampler_views = key->nr_samplers;
2993    }
2994
2995    key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2996
2997    draw_sampler = key->samplers;
2998
2999    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3000
3001    for (i = 0 ; i < key->nr_samplers; i++) {
3002       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3003                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
3004    }
3005    for (i = 0 ; i < key->nr_sampler_views; i++) {
3006       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3007                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
3008    }
3009
3010    draw_image = draw_gs_llvm_variant_key_images(key);
3011    memset(draw_image, 0,
3012           key->nr_images * sizeof *draw_image);
3013    for (i = 0; i < key->nr_images; i++) {
3014       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3015                                             llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);
3016    }
3017    return key;
3018 }
3019
3020 void
3021 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
3022 {
3023    unsigned i;
3024    struct draw_sampler_static_state *sampler = key->samplers;
3025    struct draw_image_static_state *image = draw_gs_llvm_variant_key_images(key);
3026
3027    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
3028    for (i = 0 ; i < key->nr_sampler_views; i++) {
3029       debug_printf("sampler[%i].src_format = %s\n", i,
3030                    util_format_name(sampler[i].texture_state.format));
3031    }
3032
3033    for (i = 0 ; i < key->nr_images; i++)
3034       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3035
3036 }
3037
3038 static void
3039 create_tcs_jit_types(struct draw_tcs_llvm_variant *var)
3040 {
3041    struct gallivm_state *gallivm = var->gallivm;
3042    LLVMTypeRef texture_type, sampler_type, image_type, buffer_type;
3043
3044    texture_type = create_jit_texture_type(gallivm, "texture");
3045    sampler_type = create_jit_sampler_type(gallivm, "sampler");
3046    image_type = create_jit_image_type(gallivm, "image");
3047    buffer_type = lp_build_create_jit_buffer_type(gallivm);
3048
3049    var->context_type = create_tcs_jit_context_type(gallivm,
3050                                               0,
3051                                               buffer_type,
3052                                               texture_type, sampler_type,
3053                                               image_type,
3054                                               "draw_tcs_jit_context");
3055    var->input_array_type = create_tcs_jit_input_type(gallivm);
3056    var->output_array_type = create_tcs_jit_output_type(gallivm);
3057    var->context_ptr_type = LLVMPointerType(var->context_type, 0);
3058 }
3059
3060 static LLVMTypeRef
3061 get_tcs_context_ptr_type(struct draw_tcs_llvm_variant *variant)
3062 {
3063    if (!variant->context_ptr_type)
3064       create_tcs_jit_types(variant);
3065    return variant->context_ptr_type;
3066 }
3067
3068 static LLVMValueRef
3069 draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,
3070                                struct lp_build_context *bld,
3071                                boolean is_vindex_indirect,
3072                                LLVMValueRef vertex_index,
3073                                boolean is_aindex_indirect,
3074                                LLVMValueRef attrib_index,
3075                                boolean is_sindex_indirect,
3076                                LLVMValueRef swizzle_index)
3077 {
3078    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
3079    struct gallivm_state *gallivm = bld->gallivm;
3080    LLVMBuilderRef builder = gallivm->builder;
3081    LLVMValueRef indices[3];
3082    LLVMValueRef res;
3083    struct lp_type type = bld->type;
3084
3085    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3086       int i;
3087
3088       res = bld->zero;
3089       for (i = 0; i < type.length; ++i) {
3090          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3091          LLVMValueRef vert_chan_index = vertex_index;
3092          LLVMValueRef attr_chan_index = attrib_index;
3093          LLVMValueRef swiz_chan_index = swizzle_index;
3094          LLVMValueRef channel_vec;
3095
3096          if (is_vindex_indirect) {
3097             vert_chan_index = LLVMBuildExtractElement(builder,
3098                                                       vertex_index, idx, "");
3099          }
3100          if (is_aindex_indirect) {
3101             attr_chan_index = LLVMBuildExtractElement(builder,
3102                                                       attrib_index, idx, "");
3103          }
3104          if (is_sindex_indirect) {
3105             swiz_chan_index = LLVMBuildExtractElement(builder,
3106                                                       swizzle_index, idx, "");
3107          }
3108
3109          indices[0] = vert_chan_index;
3110          indices[1] = attr_chan_index;
3111          indices[2] = swiz_chan_index;
3112
3113          channel_vec = LLVMBuildGEP(builder, tcs->input, indices, 3, "");
3114          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
3115
3116          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3117       }
3118    } else {
3119       indices[0] = vertex_index;
3120       indices[1] = attrib_index;
3121       indices[2] = swizzle_index;
3122
3123       res = LLVMBuildGEP(builder, tcs->input, indices, 3, "");
3124       res = LLVMBuildLoad(builder, res, "");
3125       res = lp_build_broadcast_scalar(bld, res);
3126    }
3127    return res;
3128 }
3129
3130 static LLVMValueRef
3131 draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,
3132                                 struct lp_build_context *bld,
3133                                 boolean is_vindex_indirect,
3134                                 LLVMValueRef vertex_index,
3135                                 boolean is_aindex_indirect,
3136                                 LLVMValueRef attrib_index,
3137                                 boolean is_sindex_indirect,
3138                                 LLVMValueRef swizzle_index,
3139                                 uint32_t name)
3140 {
3141    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
3142    struct gallivm_state *gallivm = bld->gallivm;
3143    LLVMBuilderRef builder = gallivm->builder;
3144    LLVMValueRef indices[3];
3145    LLVMValueRef res;
3146    struct lp_type type = bld->type;
3147
3148    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3149       int i;
3150
3151       res = bld->zero;
3152       for (i = 0; i < type.length; ++i) {
3153          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3154          LLVMValueRef vert_chan_index = vertex_index;
3155          LLVMValueRef attr_chan_index = attrib_index;
3156          LLVMValueRef swiz_chan_index = swizzle_index;
3157          LLVMValueRef channel_vec;
3158
3159          if (is_vindex_indirect) {
3160             vert_chan_index = LLVMBuildExtractElement(builder,
3161                                                       vertex_index, idx, "");
3162          }
3163          if (is_aindex_indirect) {
3164             attr_chan_index = LLVMBuildExtractElement(builder,
3165                                                       attrib_index, idx, "");
3166          }
3167          if (is_sindex_indirect) {
3168             swiz_chan_index = LLVMBuildExtractElement(builder,
3169                                                       swizzle_index, idx, "");
3170          }
3171
3172          indices[0] = vert_chan_index;
3173          indices[1] = attr_chan_index;
3174          indices[2] = swiz_chan_index;
3175
3176          channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3177          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
3178
3179          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3180       }
3181    } else {
3182       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
3183       indices[1] = attrib_index;
3184       indices[2] = swizzle_index;
3185
3186       res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3187       res = LLVMBuildLoad(builder, res, "");
3188       res = lp_build_broadcast_scalar(bld, res);
3189    }
3190    return res;
3191 }
3192
3193 static void
3194 draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,
3195                                 struct lp_build_context *bld,
3196                                 unsigned name,
3197                                 boolean is_vindex_indirect,
3198                                 LLVMValueRef vertex_index,
3199                                 boolean is_aindex_indirect,
3200                                 LLVMValueRef attrib_index,
3201                                 boolean is_sindex_indirect,
3202                                 LLVMValueRef swizzle_index,
3203                                 LLVMValueRef value,
3204                                 LLVMValueRef mask_vec)
3205 {
3206    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
3207    struct gallivm_state *gallivm = bld->gallivm;
3208    LLVMBuilderRef builder = gallivm->builder;
3209    LLVMValueRef indices[3];
3210    LLVMValueRef res;
3211    struct lp_type type = bld->type;
3212
3213    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3214       int i;
3215
3216       for (i = 0; i < type.length; ++i) {
3217          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3218          LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
3219          LLVMValueRef attr_chan_index = attrib_index;
3220          LLVMValueRef swiz_chan_index = swizzle_index;
3221          LLVMValueRef channel_vec;
3222
3223          if (is_vindex_indirect) {
3224             vert_chan_index = LLVMBuildExtractElement(builder,
3225                                                       vertex_index, idx, "");
3226          }
3227          if (is_aindex_indirect) {
3228             attr_chan_index = LLVMBuildExtractElement(builder,
3229                                                       attrib_index, idx, "");
3230          }
3231
3232          if (is_sindex_indirect) {
3233             swiz_chan_index = LLVMBuildExtractElement(builder,
3234                                                       swizzle_index, idx, "");
3235          }
3236
3237          indices[0] = vert_chan_index;
3238          indices[1] = attr_chan_index;
3239          indices[2] = swiz_chan_index;
3240
3241          channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3242
3243          res = LLVMBuildExtractElement(builder, value, idx, "");
3244
3245          struct lp_build_if_state ifthen;
3246          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
3247          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
3248          lp_build_if(&ifthen, gallivm, cond);
3249          LLVMBuildStore(builder, res, channel_vec);
3250          lp_build_endif(&ifthen);
3251       }
3252    } else {
3253       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
3254       indices[1] = attrib_index;
3255       indices[2] = swizzle_index;
3256
3257       res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
3258       for (unsigned i = 0; i < type.length; ++i) {
3259          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3260          LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
3261
3262          struct lp_build_if_state ifthen;
3263          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
3264          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
3265          lp_build_if(&ifthen, gallivm, cond);
3266          LLVMBuildStore(builder, val, res);
3267          lp_build_endif(&ifthen);
3268       }
3269    }
3270 }
3271
3272
3273 static LLVMValueRef
3274 generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,
3275                         struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3276 {
3277    struct gallivm_state *gallivm = variant->gallivm;
3278    LLVMBuilderRef builder = gallivm->builder;
3279    struct lp_type mask_type = lp_int_type(tcs_type);
3280    LLVMValueRef num_vecs;
3281    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3282    unsigned i;
3283
3284    num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3285    for (i = 0; i < tcs_type.length; i++) {
3286       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3287       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3288    }
3289    mask_val = lp_build_compare(gallivm, mask_type,
3290                                PIPE_FUNC_GREATER, num_vecs, mask_val);
3291
3292    return mask_val;
3293 }
3294
3295 static void
3296 draw_tcs_llvm_generate(struct draw_llvm *llvm,
3297                        struct draw_tcs_llvm_variant *variant)
3298 {
3299    struct gallivm_state *gallivm = variant->gallivm;
3300    LLVMContextRef context = gallivm->context;
3301    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3302    LLVMTypeRef arg_types[7];
3303    LLVMTypeRef func_type, coro_func_type;
3304    LLVMValueRef variant_func, variant_coro;
3305    LLVMValueRef context_ptr;
3306    LLVMValueRef view_index;
3307    LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;
3308    LLVMValueRef mask_val;
3309    LLVMBasicBlockRef block;
3310    LLVMBuilderRef builder;
3311    struct lp_build_context bld, bldvec;
3312    struct lp_build_sampler_soa *sampler = 0;
3313    struct lp_build_image_soa *image = NULL;
3314    struct lp_bld_tgsi_system_values system_values;
3315    char func_name[64], func_name_coro[64];
3316    unsigned i;
3317    struct draw_tcs_llvm_iface tcs_iface;
3318    struct lp_build_mask_context mask;
3319    LLVMValueRef consts_ptr;
3320    LLVMValueRef ssbos_ptr;
3321    struct lp_type tcs_type;
3322    unsigned vector_length = variant->shader->base.vector_length;
3323
3324    memset(&system_values, 0, sizeof(system_values));
3325
3326    snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");
3327
3328    snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");
3329
3330    arg_types[0] = get_tcs_context_ptr_type(variant);    /* context */
3331    arg_types[1] = variant->input_array_type;           /* input */
3332    arg_types[2] = variant->output_array_type;
3333    arg_types[3] = int32_type;
3334    arg_types[4] = int32_type;
3335    arg_types[5] = int32_type;
3336    arg_types[6] = int32_type; /* coroutine only */
3337
3338    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);
3339
3340    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);
3341
3342    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3343
3344    variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
3345
3346    variant->function = variant_func;
3347    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3348
3349    LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);
3350
3351    LLVMAddTargetDependentFunctionAttr(variant_coro, "coroutine.presplit", "0");
3352
3353    for (i = 0; i < ARRAY_SIZE(arg_types); ++i) {
3354       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
3355          lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);
3356          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3357       }
3358    }
3359
3360    if (gallivm->cache && gallivm->cache->data_size)
3361       return;
3362    context_ptr               = LLVMGetParam(variant_func, 0);
3363    input_array               = LLVMGetParam(variant_func, 1);
3364    output_array              = LLVMGetParam(variant_func, 2);
3365    prim_id                   = LLVMGetParam(variant_func, 3);
3366    patch_vertices_in         = LLVMGetParam(variant_func, 4);
3367    view_index                = LLVMGetParam(variant_func, 5);
3368
3369    lp_build_name(context_ptr, "context");
3370    lp_build_name(input_array, "input");
3371    lp_build_name(output_array, "output");
3372    lp_build_name(prim_id, "prim_id");
3373    lp_build_name(patch_vertices_in, "patch_vertices_in");
3374    lp_build_name(view_index, "view_index");
3375
3376    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3377    builder = gallivm->builder;
3378    LLVMPositionBuilderAtEnd(builder, block);
3379
3380    lp_build_context_init(&bld, gallivm, lp_type_int(32));
3381
3382    memset(&tcs_type, 0, sizeof tcs_type);
3383    tcs_type.floating = TRUE; /* floating point values */
3384    tcs_type.sign = TRUE;     /* values are signed */
3385    tcs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
3386    tcs_type.width = 32;      /* 32-bit float */
3387    tcs_type.length = vector_length;
3388
3389    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));
3390
3391    LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);
3392    LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);
3393
3394    struct lp_build_loop_state loop_state[2];
3395    LLVMValueRef num_inner_loop;
3396    unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);
3397    num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);
3398    LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
3399    LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");
3400    unsigned end_coroutine = INT_MAX;
3401    lp_build_loop_begin(&loop_state[1], gallivm,
3402                        lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
3403    lp_build_loop_begin(&loop_state[0], gallivm,
3404                        lp_build_const_int32(gallivm, 0)); /* inner loop */
3405    {
3406       LLVMValueRef args[7];
3407       args[0] = context_ptr;
3408       args[1] = input_array;
3409       args[2] = output_array;
3410       args[3] = prim_id;
3411       args[4] = patch_vertices_in;
3412       args[5] = view_index;
3413       args[6] = loop_state[0].counter;
3414       LLVMValueRef coro_entry = LLVMBuildGEP2(builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
3415       LLVMValueRef coro_hdl = LLVMBuildLoad2(builder, hdl_ptr_type, coro_entry, "coro_hdl");
3416
3417       struct lp_build_if_state ifstate;
3418       LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,
3419                                        lp_build_const_int32(gallivm, 0), "");
3420       /* first time here - call the coroutine function entry point */
3421       lp_build_if(&ifstate, gallivm, cmp);
3422       LLVMValueRef coro_ret = LLVMBuildCall2(builder, coro_func_type, variant_coro, args, 7, "");
3423       LLVMBuildStore(builder, coro_ret, coro_entry);
3424       lp_build_else(&ifstate);
3425       /* subsequent calls for this invocation - check if done. */
3426       LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
3427       struct lp_build_if_state ifstate2;
3428       lp_build_if(&ifstate2, gallivm, coro_done);
3429       /* if done destroy and force loop exit */
3430       lp_build_coro_destroy(gallivm, coro_hdl);
3431       lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
3432       lp_build_else(&ifstate2);
3433       /* otherwise resume the coroutine */
3434       lp_build_coro_resume(gallivm, coro_hdl);
3435       lp_build_endif(&ifstate2);
3436       lp_build_endif(&ifstate);
3437       lp_build_loop_force_reload_counter(&loop_state[1]);
3438    }
3439    lp_build_loop_end_cond(&loop_state[0],
3440                           num_inner_loop,
3441                           NULL,  LLVMIntUGE);
3442    lp_build_loop_end_cond(&loop_state[1],
3443                           lp_build_const_int32(gallivm, end_coroutine),
3444                           NULL, LLVMIntEQ);
3445    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3446
3447    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");
3448    LLVMPositionBuilderAtEnd(builder, block);
3449
3450    context_ptr = LLVMGetParam(variant_coro, 0);
3451    input_array = LLVMGetParam(variant_coro, 1);
3452    output_array = LLVMGetParam(variant_coro, 2);
3453    prim_id = LLVMGetParam(variant_coro, 3);
3454    patch_vertices_in = LLVMGetParam(variant_coro, 4);
3455    view_index = LLVMGetParam(variant_coro, 5);
3456
3457    consts_ptr = draw_tcs_jit_context_constants(variant, context_ptr);
3458
3459    ssbos_ptr = draw_tcs_jit_context_ssbos(variant, context_ptr);
3460    sampler = draw_llvm_sampler_soa_create(variant->key.samplers,
3461                                           MAX2(variant->key.nr_samplers,
3462                                                variant->key.nr_sampler_views));
3463    image = draw_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),
3464                                       variant->key.nr_images);
3465
3466    LLVMValueRef counter = LLVMGetParam(variant_coro, 6);
3467    LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
3468    for (i = 0; i < vector_length; i++) {
3469       LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i);
3470       LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, "");
3471       invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, "");
3472    }
3473
3474    system_values.invocation_id = invocvec;
3475    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3476    system_values.view_index = view_index;
3477    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3478    tcs_iface.input = input_array;
3479    tcs_iface.output = output_array;
3480    tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;
3481    tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;
3482    tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;
3483
3484
3485    {
3486       LLVMValueRef coro_id = lp_build_coro_id(gallivm);
3487       LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
3488
3489       mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));
3490       lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);
3491
3492       struct lp_build_coro_suspend_info coro_info;
3493
3494       LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");
3495       LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");
3496
3497       coro_info.suspend = sus_block;
3498       coro_info.cleanup = clean_block;
3499
3500       struct lp_build_tgsi_params params;
3501       memset(&params, 0, sizeof(params));
3502
3503       params.type = tcs_type;
3504       params.mask = &mask;
3505       params.consts_ptr = consts_ptr;
3506       params.system_values = &system_values;
3507       params.context_ptr = context_ptr;
3508       params.sampler = sampler;
3509       params.info = &llvm->draw->tcs.tess_ctrl_shader->info;
3510       params.ssbo_ptr = ssbos_ptr;
3511       params.image = image;
3512       params.coro = &coro_info;
3513       params.tcs_iface = &tcs_iface.base;
3514       params.aniso_filter_table = draw_tcs_jit_context_aniso_filter_table(variant, context_ptr);
3515
3516       lp_build_nir_soa(variant->gallivm,
3517                        llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,
3518                        &params, NULL);
3519
3520       lp_build_mask_end(&mask);
3521
3522       lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
3523       LLVMPositionBuilderAtEnd(builder, clean_block);
3524
3525       lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
3526
3527       LLVMBuildBr(builder, sus_block);
3528       LLVMPositionBuilderAtEnd(builder, sus_block);
3529
3530       lp_build_coro_end(gallivm, coro_hdl);
3531       LLVMBuildRet(builder, coro_hdl);
3532    }
3533
3534    sampler->destroy(sampler);
3535    image->destroy(image);
3536    gallivm_verify_function(gallivm, variant_func);
3537    gallivm_verify_function(gallivm, variant_coro);
3538 }
3539
3540 struct draw_tcs_llvm_variant *
3541 draw_tcs_llvm_create_variant(struct draw_llvm *llvm,
3542                              unsigned num_outputs,
3543                              const struct draw_tcs_llvm_variant_key *key)
3544 {
3545    struct draw_tcs_llvm_variant *variant;
3546    struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);
3547    char module_name[64];
3548    unsigned char ir_sha1_cache_key[20];
3549    struct lp_cached_code cached = { 0 };
3550    bool needs_caching = false;
3551
3552    variant = MALLOC(sizeof *variant +
3553                     shader->variant_key_size - sizeof variant->key);
3554    if (!variant)
3555       return NULL;
3556
3557    variant->llvm = llvm;
3558    variant->shader = shader;
3559
3560    snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",
3561             variant->shader->variants_cached);
3562
3563    memcpy(&variant->key, key, shader->variant_key_size);
3564
3565    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3566       draw_get_ir_cache_key(shader->base.state.ir.nir,
3567                             key,
3568                             shader->variant_key_size,
3569                             num_outputs,
3570                             ir_sha1_cache_key);
3571
3572       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3573                                          &cached,
3574                                          ir_sha1_cache_key);
3575       if (!cached.data_size)
3576          needs_caching = true;
3577    }
3578
3579    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
3580
3581    create_tcs_jit_types(variant);
3582
3583    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3584       nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);
3585       draw_tcs_llvm_dump_variant_key(&variant->key);
3586    }
3587
3588    lp_build_coro_declare_malloc_hooks(variant->gallivm);
3589    draw_tcs_llvm_generate(llvm, variant);
3590
3591    gallivm_compile_module(variant->gallivm);
3592
3593    lp_build_coro_add_malloc_hooks(variant->gallivm);
3594    variant->jit_func = (draw_tcs_jit_func)
3595       gallivm_jit_function(variant->gallivm, variant->function);
3596
3597    if (needs_caching)
3598       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3599                                            &cached,
3600                                            ir_sha1_cache_key);
3601    gallivm_free_ir(variant->gallivm);
3602
3603    variant->list_item_global.base = variant;
3604    variant->list_item_local.base = variant;
3605    /*variant->no = */shader->variants_created++;
3606    variant->list_item_global.base = variant;
3607
3608    return variant;
3609 }
3610
3611 void
3612 draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)
3613 {
3614    struct draw_llvm *llvm = variant->llvm;
3615
3616    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3617       debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",
3618                     variant->shader->variants_cached, llvm->nr_tcs_variants);
3619    }
3620
3621    gallivm_destroy(variant->gallivm);
3622
3623    list_del(&variant->list_item_local.list);
3624    variant->shader->variants_cached--;
3625    list_del(&variant->list_item_global.list);
3626    llvm->nr_tcs_variants--;
3627    FREE(variant);
3628 }
3629
3630 struct draw_tcs_llvm_variant_key *
3631 draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3632 {
3633    unsigned i;
3634    struct draw_tcs_llvm_variant_key *key;
3635    struct draw_sampler_static_state *draw_sampler;
3636    struct draw_image_static_state *draw_image;
3637
3638    key = (struct draw_tcs_llvm_variant_key *)store;
3639
3640    memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));
3641
3642    /* All variants of this shader will have the same value for
3643     * nr_samplers.  Not yet trying to compact away holes in the
3644     * sampler array.
3645     */
3646    key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3647    if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3648       key->nr_sampler_views =
3649          llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3650    }
3651    else {
3652       key->nr_sampler_views = key->nr_samplers;
3653    }
3654
3655    key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3656
3657    draw_sampler = key->samplers;
3658
3659    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3660
3661    for (i = 0 ; i < key->nr_samplers; i++) {
3662       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3663                                       llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);
3664    }
3665    for (i = 0 ; i < key->nr_sampler_views; i++) {
3666       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3667                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);
3668    }
3669
3670    draw_image = draw_tcs_llvm_variant_key_images(key);
3671    memset(draw_image, 0,
3672           key->nr_images * sizeof *draw_image);
3673    for (i = 0; i < key->nr_images; i++) {
3674       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3675                                             llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);
3676    }
3677    return key;
3678 }
3679
3680 void
3681 draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)
3682 {
3683    unsigned i;
3684    struct draw_sampler_static_state *sampler = key->samplers;
3685    struct draw_image_static_state *image = draw_tcs_llvm_variant_key_images(key);
3686    for (i = 0 ; i < key->nr_sampler_views; i++) {
3687       debug_printf("sampler[%i].src_format = %s\n", i,
3688                    util_format_name(sampler[i].texture_state.format));
3689    }
3690
3691    for (i = 0 ; i < key->nr_images; i++)
3692       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3693
3694 }
3695
3696 static void
3697 create_tes_jit_types(struct draw_tes_llvm_variant *var)
3698 {
3699    struct gallivm_state *gallivm = var->gallivm;
3700    LLVMTypeRef texture_type, sampler_type, image_type, buffer_type;
3701
3702    texture_type = create_jit_texture_type(gallivm, "texture");
3703    sampler_type = create_jit_sampler_type(gallivm, "sampler");
3704    image_type = create_jit_image_type(gallivm, "image");
3705    buffer_type = lp_build_create_jit_buffer_type(gallivm);
3706
3707    var->context_type = create_tes_jit_context_type(gallivm,
3708                                               0,
3709                                               buffer_type,
3710                                               texture_type, sampler_type,
3711                                               image_type,
3712                                               "draw_tes_jit_context");
3713    var->context_ptr_type = LLVMPointerType(var->context_type, 0);
3714
3715    var->input_array_deref_type = create_tes_jit_input_deref_type(gallivm);
3716    var->input_array_type = LLVMPointerType(var->input_array_deref_type, 0); /* num vertices per prim */
3717 }
3718
3719 static LLVMTypeRef
3720 get_tes_context_ptr_type(struct draw_tes_llvm_variant *variant)
3721 {
3722    if (!variant->context_ptr_type)
3723       create_tes_jit_types(variant);
3724    return variant->context_ptr_type;
3725 }
3726
3727 static LLVMValueRef
3728 generate_tes_mask_value(struct draw_tes_llvm_variant *variant,
3729                         struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3730 {
3731    struct gallivm_state *gallivm = variant->gallivm;
3732    LLVMBuilderRef builder = gallivm->builder;
3733    struct lp_type mask_type = lp_int_type(tes_type);
3734    LLVMValueRef num_prims;
3735    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3736    unsigned i;
3737
3738    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3739    for (i = 0; i < tes_type.length; i++) {
3740       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3741       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3742    }
3743    mask_val = lp_build_compare(gallivm, mask_type,
3744                                PIPE_FUNC_GREATER, num_prims, mask_val);
3745
3746    return mask_val;
3747 }
3748
3749 static LLVMValueRef
3750 draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,
3751                                  struct lp_build_context *bld,
3752                                  boolean is_vindex_indirect,
3753                                  LLVMValueRef vertex_index,
3754                                  boolean is_aindex_indirect,
3755                                  LLVMValueRef attrib_index,
3756                                  boolean is_sindex_indirect,
3757                                  LLVMValueRef swizzle_index)
3758 {
3759    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3760    struct gallivm_state *gallivm = bld->gallivm;
3761    LLVMBuilderRef builder = gallivm->builder;
3762    LLVMValueRef indices[3];
3763    LLVMValueRef res;
3764    struct lp_type type = bld->type;
3765
3766    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3767       int i;
3768
3769       res = bld->zero;
3770
3771       for (i = 0; i < type.length; ++i) {
3772          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3773          LLVMValueRef vert_chan_index = vertex_index;
3774          LLVMValueRef attr_chan_index = attrib_index;
3775          LLVMValueRef swiz_chan_index = swizzle_index;
3776          LLVMValueRef channel_vec;
3777
3778          if (is_vindex_indirect) {
3779             vert_chan_index = LLVMBuildExtractElement(builder,
3780                                                       vertex_index, idx, "");
3781          }
3782          if (is_aindex_indirect) {
3783             attr_chan_index = LLVMBuildExtractElement(builder,
3784                                                       attrib_index, idx, "");
3785          }
3786          if (is_sindex_indirect) {
3787             swiz_chan_index = LLVMBuildExtractElement(builder,
3788                                                       swizzle_index, idx, "");
3789          }
3790
3791          indices[0] = vert_chan_index;
3792          indices[1] = attr_chan_index;
3793          indices[2] = swiz_chan_index;
3794
3795          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3796          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3797
3798          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3799       }
3800    } else {
3801       indices[0] = vertex_index;
3802       indices[1] = attrib_index;
3803       indices[2] = swizzle_index;
3804
3805       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3806       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3807       res = lp_build_broadcast_scalar(bld, res);
3808    }
3809    return res;
3810 }
3811
3812 static LLVMValueRef
3813 draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
3814                                 struct lp_build_context *bld,
3815                                 boolean is_aindex_indirect,
3816                                 LLVMValueRef attrib_index,
3817                                 LLVMValueRef swizzle_index)
3818 {
3819    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3820    struct gallivm_state *gallivm = bld->gallivm;
3821    LLVMBuilderRef builder = gallivm->builder;
3822    LLVMValueRef indices[3];
3823    LLVMValueRef res;
3824    struct lp_type type = bld->type;
3825
3826    if (is_aindex_indirect) {
3827       int i;
3828
3829       res = bld->zero;
3830
3831       for (i = 0; i < type.length; ++i) {
3832          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3833          LLVMValueRef attr_chan_index = attrib_index;
3834          LLVMValueRef channel_vec;
3835
3836          if (is_aindex_indirect) {
3837             attr_chan_index = LLVMBuildExtractElement(builder,
3838                                                       attrib_index, idx, "");
3839          }
3840
3841          indices[0] = lp_build_const_int32(gallivm, 0);
3842          indices[1] = attr_chan_index;
3843          indices[2] = swizzle_index;
3844
3845          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3846          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3847
3848          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3849       }
3850    } else {
3851       indices[0] = lp_build_const_int32(gallivm, 0);
3852       indices[1] = attrib_index;
3853       indices[2] = swizzle_index;
3854
3855       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3856       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3857       res = lp_build_broadcast_scalar(bld, res);
3858    }
3859    return res;
3860 }
3861
3862 static void
3863 draw_tes_llvm_generate(struct draw_llvm *llvm,
3864                        struct draw_tes_llvm_variant *variant)
3865 {
3866    struct gallivm_state *gallivm = variant->gallivm;
3867    LLVMContextRef context = gallivm->context;
3868    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3869    LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);
3870    LLVMTypeRef arg_types[11];
3871    LLVMTypeRef func_type;
3872    LLVMValueRef variant_func;
3873    LLVMValueRef context_ptr;
3874    LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;
3875    LLVMValueRef view_index;
3876    LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;
3877    LLVMBasicBlockRef block;
3878    LLVMBuilderRef builder;
3879    LLVMValueRef mask_val;
3880    struct lp_build_context bld, bldvec;
3881    struct lp_build_sampler_soa *sampler = 0;
3882    struct lp_build_image_soa *image = NULL;
3883    struct lp_bld_tgsi_system_values system_values;
3884    char func_name[64];
3885    unsigned i;
3886    struct draw_tes_llvm_iface tes_iface;
3887    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
3888    struct lp_build_mask_context mask;
3889    LLVMValueRef consts_ptr;
3890    LLVMValueRef ssbos_ptr;
3891    LLVMValueRef step;
3892    struct lp_type tes_type;
3893    unsigned vector_length = variant->shader->base.vector_length;
3894
3895    memset(&system_values, 0, sizeof(system_values));
3896    memset(&outputs, 0, sizeof(outputs));
3897
3898    snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");
3899
3900    LLVMTypeRef tess_outer_deref_type = LLVMArrayType(flt_type, 4);
3901    LLVMTypeRef tess_inner_deref_type = LLVMArrayType(flt_type, 2);
3902
3903    arg_types[0] = get_tes_context_ptr_type(variant);    /* context */
3904    arg_types[1] = variant->input_array_type;           /* input */
3905    arg_types[2] = variant->vertex_header_ptr_type;
3906    arg_types[3] = int32_type;
3907    arg_types[4] = int32_type;
3908    arg_types[5] = LLVMPointerType(flt_type, 0);
3909    arg_types[6] = LLVMPointerType(flt_type, 0);
3910    arg_types[7] = LLVMPointerType(tess_outer_deref_type, 0);
3911    arg_types[8] = LLVMPointerType(tess_inner_deref_type, 0);
3912    arg_types[9] = int32_type;
3913    arg_types[10] = int32_type;
3914
3915    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
3916    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3917
3918    variant->function = variant_func;
3919    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3920
3921    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
3922       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
3923          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3924
3925    if (gallivm->cache && gallivm->cache->data_size)
3926       return;
3927    context_ptr               = LLVMGetParam(variant_func, 0);
3928    input_array               = LLVMGetParam(variant_func, 1);
3929    io_ptr                    = LLVMGetParam(variant_func, 2);
3930    prim_id                   = LLVMGetParam(variant_func, 3);
3931    num_tess_coord            = LLVMGetParam(variant_func, 4);
3932    tess_coord[0]             = LLVMGetParam(variant_func, 5);
3933    tess_coord[1]             = LLVMGetParam(variant_func, 6);
3934    tess_outer                = LLVMGetParam(variant_func, 7);
3935    tess_inner                = LLVMGetParam(variant_func, 8);
3936    patch_vertices_in         = LLVMGetParam(variant_func, 9);
3937    view_index                = LLVMGetParam(variant_func, 10);
3938
3939    lp_build_name(context_ptr, "context");
3940    lp_build_name(input_array, "input");
3941    lp_build_name(io_ptr, "io");
3942    lp_build_name(prim_id, "prim_id");
3943    lp_build_name(num_tess_coord, "num_tess_coord");
3944    lp_build_name(tess_coord[0], "tess_coord[0]");
3945    lp_build_name(tess_coord[1], "tess_coord[1]");
3946    lp_build_name(tess_outer, "tess_outer");
3947    lp_build_name(tess_inner, "tess_inner");
3948    lp_build_name(patch_vertices_in, "patch_vertices_in");
3949    lp_build_name(view_index, "view_index");
3950
3951    tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;
3952    tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;
3953    tes_iface.input = input_array;
3954    tes_iface.variant = variant;
3955
3956    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3957    builder = gallivm->builder;
3958    LLVMPositionBuilderAtEnd(builder, block);
3959
3960    lp_build_context_init(&bld, gallivm, lp_type_int(32));
3961
3962    memset(&tes_type, 0, sizeof tes_type);
3963    tes_type.floating = TRUE; /* floating point values */
3964    tes_type.sign = TRUE;     /* values are signed */
3965    tes_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
3966    tes_type.width = 32;      /* 32-bit float */
3967    tes_type.length = vector_length;
3968
3969    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));
3970    consts_ptr = draw_tes_jit_context_constants(variant, context_ptr);
3971
3972    ssbos_ptr = draw_tes_jit_context_ssbos(variant, context_ptr);
3973
3974    sampler = draw_llvm_sampler_soa_create(variant->key.samplers,
3975                                           MAX2(variant->key.nr_samplers,
3976                                                variant->key.nr_sampler_views));
3977    image = draw_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),
3978                                       variant->key.nr_images);
3979    step = lp_build_const_int32(gallivm, vector_length);
3980
3981    system_values.tess_outer = LLVMBuildLoad2(builder, tess_outer_deref_type, tess_outer, "");
3982    system_values.tess_inner = LLVMBuildLoad2(builder, tess_inner_deref_type, tess_inner, "");
3983
3984    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3985
3986    system_values.view_index = view_index;
3987
3988    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3989
3990    if (variant->key.primid_needed) {
3991       int slot = variant->key.primid_output;
3992       for (unsigned i = 0; i < 4; i++) {
3993          outputs[slot][i] = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, tes_type), "primid");
3994          LLVMBuildStore(builder, system_values.prim_id, outputs[slot][i]);
3995       }
3996    }
3997    struct lp_build_loop_state lp_loop;
3998    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
3999    {
4000       LLVMValueRef io;
4001
4002       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &lp_loop.counter, 1, "");
4003       mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);
4004       lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);
4005
4006       system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));
4007       for (i = 0; i < 3; i++) {
4008          LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));
4009          for (unsigned j = 0; j < vector_length; j++) {
4010             LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");
4011             LLVMValueRef tc_val;
4012             if (i == 2) {
4013                if (variant->shader->base.prim_mode == PIPE_PRIM_TRIANGLES) {
4014                   tc_val = lp_build_const_float(gallivm, 1.0);
4015                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[0], idx), "");
4016                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[1], idx), "");
4017                } else
4018                   tc_val = lp_build_const_float(gallivm, 0.0);
4019             } else
4020                tc_val = lp_build_pointer_get(builder, tess_coord[i], idx);
4021
4022             tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");
4023          }
4024          system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");
4025       }
4026
4027       struct lp_build_tgsi_params params;
4028       memset(&params, 0, sizeof(params));
4029
4030       params.type = tes_type;
4031       params.mask = &mask;
4032       params.consts_ptr = consts_ptr;
4033       params.system_values = &system_values;
4034       params.context_ptr = context_ptr;
4035       params.sampler = sampler;
4036       params.info = &llvm->draw->tes.tess_eval_shader->info;
4037       params.ssbo_ptr = ssbos_ptr;
4038       params.image = image;
4039       params.tes_iface = &tes_iface.base;
4040       params.aniso_filter_table = draw_tes_jit_context_aniso_filter_table(variant, context_ptr);
4041
4042       lp_build_nir_soa(variant->gallivm,
4043                        llvm->draw->tes.tess_eval_shader->state.ir.nir,
4044                        &params,
4045                        outputs);
4046
4047       lp_build_mask_end(&mask);
4048
4049       if (variant->key.clamp_vertex_color) {
4050          const struct tgsi_shader_info *info = &llvm->draw->tes.tess_eval_shader->info;
4051          do_clamp_vertex_color(variant->gallivm,
4052                                tes_type, info,
4053                                outputs);
4054       }
4055       LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
4056                                                      lp_int_type(tes_type), 0);
4057
4058       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
4059                      draw_total_tes_outputs(llvm->draw), tes_type, FALSE);
4060    }
4061    lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);
4062    sampler->destroy(sampler);
4063    image->destroy(image);
4064
4065    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
4066    gallivm_verify_function(gallivm, variant_func);
4067 }
4068
4069 struct draw_tes_llvm_variant *
4070 draw_tes_llvm_create_variant(struct draw_llvm *llvm,
4071                              unsigned num_outputs,
4072                              const struct draw_tes_llvm_variant_key *key)
4073 {
4074    struct draw_tes_llvm_variant *variant;
4075    struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);
4076    char module_name[64];
4077    unsigned char ir_sha1_cache_key[20];
4078    struct lp_cached_code cached = { 0 };
4079    bool needs_caching = false;
4080
4081    variant = MALLOC(sizeof *variant +
4082                     shader->variant_key_size - sizeof variant->key);
4083    if (!variant)
4084       return NULL;
4085
4086    variant->llvm = llvm;
4087    variant->shader = shader;
4088
4089    snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",
4090             variant->shader->variants_cached);
4091
4092    memcpy(&variant->key, key, shader->variant_key_size);
4093    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
4094       draw_get_ir_cache_key(shader->base.state.ir.nir,
4095                             key,
4096                             shader->variant_key_size,
4097                             num_outputs,
4098                             ir_sha1_cache_key);
4099
4100       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
4101                                          &cached,
4102                                          ir_sha1_cache_key);
4103       if (!cached.data_size)
4104          needs_caching = true;
4105    }
4106    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
4107
4108    create_tes_jit_types(variant);
4109
4110    variant->vertex_header_type = create_jit_vertex_header(variant->gallivm, num_outputs);
4111    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
4112
4113    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
4114       nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);
4115       draw_tes_llvm_dump_variant_key(&variant->key);
4116    }
4117
4118    draw_tes_llvm_generate(llvm, variant);
4119
4120    gallivm_compile_module(variant->gallivm);
4121
4122    variant->jit_func = (draw_tes_jit_func)
4123       gallivm_jit_function(variant->gallivm, variant->function);
4124
4125    if (needs_caching)
4126       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
4127                                            &cached,
4128                                            ir_sha1_cache_key);
4129    gallivm_free_ir(variant->gallivm);
4130
4131    variant->list_item_global.base = variant;
4132    variant->list_item_local.base = variant;
4133    /*variant->no = */shader->variants_created++;
4134    variant->list_item_global.base = variant;
4135
4136    return variant;
4137 }
4138
4139 void
4140 draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)
4141 {
4142    struct draw_llvm *llvm = variant->llvm;
4143
4144    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
4145       debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",
4146                     variant->shader->variants_cached, llvm->nr_tes_variants);
4147    }
4148
4149    gallivm_destroy(variant->gallivm);
4150
4151    list_del(&variant->list_item_local.list);
4152    variant->shader->variants_cached--;
4153    list_del(&variant->list_item_global.list);
4154    llvm->nr_tes_variants--;
4155    FREE(variant);
4156 }
4157
4158 struct draw_tes_llvm_variant_key *
4159 draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
4160 {
4161    unsigned i;
4162    struct draw_tes_llvm_variant_key *key;
4163    struct draw_sampler_static_state *draw_sampler;
4164    struct draw_image_static_state *draw_image;
4165
4166    key = (struct draw_tes_llvm_variant_key *)store;
4167
4168    memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));
4169
4170    int primid_output = draw_find_shader_output(llvm->draw, TGSI_SEMANTIC_PRIMID, 0);
4171    if (primid_output >= 0) {
4172       key->primid_output = primid_output;
4173       key->primid_needed = true;
4174    }
4175
4176    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color &&
4177       llvm->draw->gs.geometry_shader == NULL;
4178
4179    /* All variants of this shader will have the same value for
4180     * nr_samplers.  Not yet trying to compact away holes in the
4181     * sampler array.
4182     */
4183    key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
4184    if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
4185       key->nr_sampler_views =
4186          llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
4187    }
4188    else {
4189       key->nr_sampler_views = key->nr_samplers;
4190    }
4191
4192    key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
4193
4194    draw_sampler = key->samplers;
4195
4196    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
4197
4198    for (i = 0 ; i < key->nr_samplers; i++) {
4199       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
4200                                       llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);
4201    }
4202    for (i = 0 ; i < key->nr_sampler_views; i++) {
4203       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
4204                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);
4205    }
4206
4207    draw_image = draw_tes_llvm_variant_key_images(key);
4208    memset(draw_image, 0,
4209           key->nr_images * sizeof *draw_image);
4210    for (i = 0; i < key->nr_images; i++) {
4211       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
4212                                             llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);
4213    }
4214    return key;
4215 }
4216
4217 void
4218 draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)
4219 {
4220    unsigned i;
4221    struct draw_sampler_static_state *sampler = key->samplers;
4222    struct draw_image_static_state *image = draw_tes_llvm_variant_key_images(key);
4223
4224    if (key->primid_needed)
4225       debug_printf("prim id output %d\n", key->primid_output);
4226    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
4227    for (i = 0 ; i < key->nr_sampler_views; i++) {
4228       debug_printf("sampler[%i].src_format = %s\n", i,
4229                    util_format_name(sampler[i].texture_state.format));
4230    }
4231
4232    for (i = 0 ; i < key->nr_images; i++)
4233       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
4234
4235 }