cogl/cogl-journal.c

   1 /*
   2  * Cogl
   3  *
   4  * An object oriented GL/GLES Abstraction/Utility Layer
   5  *
   6  * Copyright (C) 2007,2008,2009 Intel Corporation.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
  20  *
  21  *
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27
  28 #include "cogl-debug.h"
  29 #include "cogl-internal.h"
  30 #include "cogl-context-private.h"
  31 #include "cogl-journal-private.h"
  32 #include "cogl-texture-private.h"
  33 #include "cogl-pipeline-private.h"
  34 #include "cogl-pipeline-opengl-private.h"
  35 #include "cogl-vertex-buffer-private.h"
  36 #include "cogl-framebuffer-private.h"
  37 #include "cogl-profile.h"
  38 #include "cogl-attribute-private.h"
  39 #include "cogl-point-in-poly-private.h"
  40 #include "cogl-private.h"
  41 #include "cogl1-context.h"
  42
  43 #include <string.h>
  44 #include <gmodule.h>
  45 #include <math.h>
  46
  47 /* XXX NB:
  48  * The data logged in logged_vertices is formatted as follows:
  49  *
  50  * Per entry:
  51  *   4 RGBA GLubytes for the color
  52  *   2 floats for the top left position
  53  *   2 * n_layers floats for the top left texture coordinates
  54  *   2 floats for the bottom right position
  55  *   2 * n_layers floats for the bottom right texture coordinates
  56  */
  57 #define GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS(N_LAYERS) \
  58   (N_LAYERS * 2 + 2)
  59
  60 /* XXX NB:
  61  * Once in the vertex array, the journal's vertex data is arranged as follows:
  62  * 4 vertices per quad:
  63  *    2 or 3 GLfloats per position (3 when doing software transforms)
  64  *    4 RGBA GLubytes,
  65  *    2 GLfloats per tex coord * n_layers
  66  *
  67  * Where n_layers corresponds to the number of pipeline layers enabled
  68  *
  69  * To avoid frequent changes in the stride of our vertex data we always pad
  70  * n_layers to be >= 2
  71  *
  72  * There will be four vertices per quad in the vertex array
  73  *
  74  * When we are transforming quads in software we need to also track the z
  75  * coordinate of transformed vertices.
  76  *
  77  * So for a given number of layers this gets the stride in 32bit words:
  78  */
  79 #define SW_TRANSFORM      (!(COGL_DEBUG_ENABLED \
  80                              (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)))
  81 #define POS_STRIDE        (SW_TRANSFORM ? 3 : 2) /* number of 32bit words */
  82 #define N_POS_COMPONENTS  POS_STRIDE
  83 #define COLOR_STRIDE      1 /* number of 32bit words */
  84 #define TEX_STRIDE        2 /* number of 32bit words */
  85 #define MIN_LAYER_PADING  2
  86 #define GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS(N_LAYERS) \
  87   (POS_STRIDE + COLOR_STRIDE + \
  88    TEX_STRIDE * (N_LAYERS < MIN_LAYER_PADING ? MIN_LAYER_PADING : N_LAYERS))
  89
  90 /* If a batch is longer than this threshold then we'll assume it's not
  91    worth doing software clipping and it's cheaper to program the GPU
  92    to do the clip */
  93 #define COGL_JOURNAL_HARDWARE_CLIP_THRESHOLD 8
  94
  95 typedef struct _CoglJournalFlushState
  96 {
  97   CoglJournal         *journal;
  98
  99   CoglAttributeBuffer *attribute_buffer;
 100   GArray              *attributes;
 101   int                  current_attribute;
 102
 103   gsize                stride;
 104   size_t               array_offset;
 105   GLuint               current_vertex;
 106
 107   CoglIndices         *indices;
 108   gsize                indices_type_size;
 109
 110   CoglMatrixStack     *modelview_stack;
 111   CoglMatrixStack     *projection_stack;
 112
 113   CoglPipeline        *pipeline;
 114 } CoglJournalFlushState;
 115
 116 typedef void (*CoglJournalBatchCallback) (CoglJournalEntry *start,
 117                                           int n_entries,
 118                                           void *data);
 119 typedef gboolean (*CoglJournalBatchTest) (CoglJournalEntry *entry0,
 120                                           CoglJournalEntry *entry1);
 121
 122 static void _cogl_journal_free (CoglJournal *journal);
 123
 124 COGL_OBJECT_INTERNAL_DEFINE (Journal, journal);
 125
 126 static void
 127 _cogl_journal_free (CoglJournal *journal)
 128 {
 129   int i;
 130
 131   if (journal->entries)
 132     g_array_free (journal->entries, TRUE);
 133   if (journal->vertices)
 134     g_array_free (journal->vertices, TRUE);
 135
 136   for (i = 0; i < COGL_JOURNAL_VBO_POOL_SIZE; i++)
 137     if (journal->vbo_pool[i])
 138       cogl_object_unref (journal->vbo_pool[i]);
 139
 140   g_slice_free (CoglJournal, journal);
 141 }
 142
 143 CoglJournal *
 144 _cogl_journal_new (CoglFramebuffer *framebuffer)
 145 {
 146   CoglJournal *journal = g_slice_new0 (CoglJournal);
 147
 148   /* The journal keeps a pointer back to the framebuffer because there
 149      is effectively a 1:1 mapping between journals and framebuffers.
 150      However, to avoid a circular reference the journal doesn't take a
 151      reference unless it is non-empty. The framebuffer has a special
 152      unref implementation to ensure that the journal is flushed when
 153      the journal is the only thing keeping it alive */
 154   journal->framebuffer = framebuffer;
 155
 156   journal->entries = g_array_new (FALSE, FALSE, sizeof (CoglJournalEntry));
 157   journal->vertices = g_array_new (FALSE, FALSE, sizeof (float));
 158
 159   return _cogl_journal_object_new (journal);
 160 }
 161
 162 static void
 163 _cogl_journal_dump_logged_quad (guint8 *data, int n_layers)
 164 {
 165   gsize stride = GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (n_layers);
 166   int i;
 167
 168   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
 169
 170   g_print ("n_layers = %d; rgba=0x%02X%02X%02X%02X\n",
 171            n_layers, data[0], data[1], data[2], data[3]);
 172
 173   data += 4;
 174
 175   for (i = 0; i < 2; i++)
 176     {
 177       float *v = (float *)data + (i * stride);
 178       int j;
 179
 180       g_print ("v%d: x = %f, y = %f", i, v[0], v[1]);
 181
 182       for (j = 0; j < n_layers; j++)
 183         {
 184           float *t = v + 2 + TEX_STRIDE * j;
 185           g_print (", tx%d = %f, ty%d = %f", j, t[0], j, t[1]);
 186         }
 187       g_print ("\n");
 188     }
 189 }
 190
 191 static void
 192 _cogl_journal_dump_quad_vertices (guint8 *data, int n_layers)
 193 {
 194   gsize stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers);
 195   int i;
 196
 197   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
 198
 199   g_print ("n_layers = %d; stride = %d; pos stride = %d; color stride = %d; "
 200            "tex stride = %d; stride in bytes = %d\n",
 201            n_layers, (int)stride, POS_STRIDE, COLOR_STRIDE,
 202            TEX_STRIDE, (int)stride * 4);
 203
 204   for (i = 0; i < 4; i++)
 205     {
 206       float *v = (float *)data + (i * stride);
 207       guint8 *c = data + (POS_STRIDE * 4) + (i * stride * 4);
 208       int j;
 209
 210       if (G_UNLIKELY (COGL_DEBUG_ENABLED
 211                       (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)))
 212         g_print ("v%d: x = %f, y = %f, rgba=0x%02X%02X%02X%02X",
 213                  i, v[0], v[1], c[0], c[1], c[2], c[3]);
 214       else
 215         g_print ("v%d: x = %f, y = %f, z = %f, rgba=0x%02X%02X%02X%02X",
 216                  i, v[0], v[1], v[2], c[0], c[1], c[2], c[3]);
 217       for (j = 0; j < n_layers; j++)
 218         {
 219           float *t = v + POS_STRIDE + COLOR_STRIDE + TEX_STRIDE * j;
 220           g_print (", tx%d = %f, ty%d = %f", j, t[0], j, t[1]);
 221         }
 222       g_print ("\n");
 223     }
 224 }
 225
 226 static void
 227 _cogl_journal_dump_quad_batch (guint8 *data, int n_layers, int n_quads)
 228 {
 229   gsize byte_stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers) * 4;
 230   int i;
 231
 232   g_print ("_cogl_journal_dump_quad_batch: n_layers = %d, n_quads = %d\n",
 233            n_layers, n_quads);
 234   for (i = 0; i < n_quads; i++)
 235     _cogl_journal_dump_quad_vertices (data + byte_stride * 2 * i, n_layers);
 236 }
 237
 238 static void
 239 batch_and_call (CoglJournalEntry *entries,
 240                 int n_entries,
 241                 CoglJournalBatchTest can_batch_callback,
 242                 CoglJournalBatchCallback batch_callback,
 243                 void *data)
 244 {
 245   int i;
 246   int batch_len = 1;
 247   CoglJournalEntry *batch_start = entries;
 248
 249   if (n_entries < 1)
 250     return;
 251
 252   for (i = 1; i < n_entries; i++)
 253     {
 254       CoglJournalEntry *entry0 = &entries[i - 1];
 255       CoglJournalEntry *entry1 = entry0 + 1;
 256
 257       if (can_batch_callback (entry0, entry1))
 258         {
 259           batch_len++;
 260           continue;
 261         }
 262
 263       batch_callback (batch_start, batch_len, data);
 264
 265       batch_start = entry1;
 266       batch_len = 1;
 267     }
 268
 269   /* The last batch... */
 270   batch_callback (batch_start, batch_len, data);
 271 }
 272
 273 static void
 274 _cogl_journal_flush_modelview_and_entries (CoglJournalEntry *batch_start,
 275                                            int               batch_len,
 276                                            void             *data)
 277 {
 278   CoglJournalFlushState *state = data;
 279   CoglFramebuffer *framebuffer = state->journal->framebuffer;
 280   CoglAttribute **attributes;
 281   CoglDrawFlags draw_flags = (COGL_DRAW_SKIP_JOURNAL_FLUSH |
 282                               COGL_DRAW_SKIP_PIPELINE_VALIDATION |
 283                               COGL_DRAW_SKIP_FRAMEBUFFER_FLUSH |
 284                               COGL_DRAW_SKIP_LEGACY_STATE);
 285
 286   COGL_STATIC_TIMER (time_flush_modelview_and_entries,
 287                      "flush: pipeline+entries", /* parent */
 288                      "flush: modelview+entries",
 289                      "The time spent flushing modelview + entries",
 290                      0 /* no application private data */);
 291
 292   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
 293
 294   COGL_TIMER_START (_cogl_uprof_context, time_flush_modelview_and_entries);
 295
 296   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING)))
 297     g_print ("BATCHING:     modelview batch len = %d\n", batch_len);
 298
 299   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)))
 300     {
 301       _cogl_matrix_stack_set (state->modelview_stack,
 302                               &batch_start->model_view);
 303       _cogl_context_set_current_modelview (ctx, state->modelview_stack);
 304     }
 305
 306   attributes = (CoglAttribute **)state->attributes->data;
 307
 308   if (!_cogl_pipeline_get_real_blend_enabled (state->pipeline))
 309     draw_flags |= COGL_DRAW_COLOR_ATTRIBUTE_IS_OPAQUE;
 310
 311 #ifdef HAVE_COGL_GL
 312   if (ctx->driver == COGL_DRIVER_GL)
 313     {
 314       /* XXX: it's rather evil that we sneak in the GL_QUADS enum here... */
 315       _cogl_framebuffer_draw_attributes (framebuffer,
 316                                          state->pipeline,
 317                                          GL_QUADS,
 318                                          state->current_vertex, batch_len * 4,
 319                                          attributes,
 320                                          state->attributes->len,
 321                                          draw_flags);
 322     }
 323   else
 324 #endif /* HAVE_COGL_GL */
 325     {
 326       if (batch_len > 1)
 327         {
 328           CoglVerticesMode mode = COGL_VERTICES_MODE_TRIANGLES;
 329           int first_vertex = state->current_vertex * 6 / 4;
 330           _cogl_framebuffer_draw_indexed_attributes (framebuffer,
 331                                                      state->pipeline,
 332                                                      mode,
 333                                                      first_vertex,
 334                                                      batch_len * 6,
 335                                                      state->indices,
 336                                                      attributes,
 337                                                      state->attributes->len,
 338                                                      draw_flags);
 339         }
 340       else
 341         {
 342           _cogl_framebuffer_draw_attributes (framebuffer,
 343                                              state->pipeline,
 344                                              COGL_VERTICES_MODE_TRIANGLE_FAN,
 345                                              state->current_vertex, 4,
 346                                              attributes,
 347                                              state->attributes->len,
 348                                              draw_flags);
 349         }
 350     }
 351
 352   /* DEBUGGING CODE XXX: This path will cause all rectangles to be
 353    * drawn with a coloured outline. Each batch will be rendered with
 354    * the same color. This may e.g. help with debugging texture slicing
 355    * issues, visually seeing what is batched and debugging blending
 356    * issues, plus it looks quite cool.
 357    */
 358   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_RECTANGLES)))
 359     {
 360       static CoglPipeline *outline = NULL;
 361       guint8 color_intensity;
 362       int i;
 363       CoglAttribute *loop_attributes[1];
 364
 365       _COGL_GET_CONTEXT (ctxt, NO_RETVAL);
 366
 367       if (outline == NULL)
 368         outline = cogl_pipeline_new (ctxt);
 369
 370       /* The least significant three bits represent the three
 371          components so that the order of colours goes red, green,
 372          yellow, blue, magenta, cyan. Black and white are skipped. The
 373          next two bits give four scales of intensity for those colours
 374          in the order 0xff, 0xcc, 0x99, and 0x66. This gives a total
 375          of 24 colours. If there are more than 24 batches on the stage
 376          then it will wrap around */
 377       color_intensity = 0xff - 0x33 * (ctxt->journal_rectangles_color >> 3);
 378       cogl_pipeline_set_color4ub (outline,
 379                                   (ctxt->journal_rectangles_color & 1) ?
 380                                   color_intensity : 0,
 381                                   (ctxt->journal_rectangles_color & 2) ?
 382                                   color_intensity : 0,
 383                                   (ctxt->journal_rectangles_color & 4) ?
 384                                   color_intensity : 0,
 385                                   0xff);
 386
 387       loop_attributes[0] = attributes[0]; /* we just want the position */
 388       for (i = 0; i < batch_len; i++)
 389         _cogl_framebuffer_draw_attributes (framebuffer,
 390                                            outline,
 391                                            COGL_VERTICES_MODE_LINE_LOOP,
 392                                            4 * i + state->current_vertex, 4,
 393                                            loop_attributes,
 394                                            1,
 395                                            draw_flags);
 396
 397       /* Go to the next color */
 398       do
 399         ctxt->journal_rectangles_color = ((ctxt->journal_rectangles_color + 1) &
 400                                           ((1 << 5) - 1));
 401       /* We don't want to use black or white */
 402       while ((ctxt->journal_rectangles_color & 0x07) == 0
 403              || (ctxt->journal_rectangles_color & 0x07) == 0x07);
 404     }
 405
 406   state->current_vertex += (4 * batch_len);
 407
 408   COGL_TIMER_STOP (_cogl_uprof_context, time_flush_modelview_and_entries);
 409 }
 410
 411 static gboolean
 412 compare_entry_modelviews (CoglJournalEntry *entry0,
 413                           CoglJournalEntry *entry1)
 414 {
 415   /* Batch together quads with the same model view matrix */
 416
 417   /* FIXME: this is nasty, there are much nicer ways to track this
 418    * (at the add_quad_vertices level) without resorting to a memcmp!
 419    *
 420    * E.g. If the cogl-current-matrix code maintained an "age" for
 421    * the modelview matrix we could simply check in add_quad_vertices
 422    * if the age has increased, and if so record the change as a
 423    * boolean in the journal.
 424    */
 425
 426   if (memcmp (&entry0->model_view, &entry1->model_view,
 427               sizeof (GLfloat) * 16) == 0)
 428     return TRUE;
 429   else
 430     return FALSE;
 431 }
 432
 433 /* At this point we have a run of quads that we know have compatible
 434  * pipelines, but they may not all have the same modelview matrix */
 435 static void
 436 _cogl_journal_flush_pipeline_and_entries (CoglJournalEntry *batch_start,
 437                                           int               batch_len,
 438                                           void             *data)
 439 {
 440   CoglJournalFlushState *state = data;
 441   COGL_STATIC_TIMER (time_flush_pipeline_entries,
 442                      "flush: texcoords+pipeline+entries", /* parent */
 443                      "flush: pipeline+entries",
 444                      "The time spent flushing pipeline + entries",
 445                      0 /* no application private data */);
 446
 447   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
 448
 449   COGL_TIMER_START (_cogl_uprof_context, time_flush_pipeline_entries);
 450
 451   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING)))
 452     g_print ("BATCHING:    pipeline batch len = %d\n", batch_len);
 453
 454   state->pipeline = batch_start->pipeline;
 455
 456   /* If we haven't transformed the quads in software then we need to also break
 457    * up batches according to changes in the modelview matrix... */
 458   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)))
 459     {
 460       batch_and_call (batch_start,
 461                       batch_len,
 462                       compare_entry_modelviews,
 463                       _cogl_journal_flush_modelview_and_entries,
 464                       data);
 465     }
 466   else
 467     _cogl_journal_flush_modelview_and_entries (batch_start, batch_len, data);
 468
 469   COGL_TIMER_STOP (_cogl_uprof_context, time_flush_pipeline_entries);
 470 }
 471
 472 static gboolean
 473 compare_entry_pipelines (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
 474 {
 475   /* batch rectangles using compatible pipelines */
 476
 477   if (_cogl_pipeline_equal (entry0->pipeline,
 478                             entry1->pipeline,
 479                             (COGL_PIPELINE_STATE_ALL &
 480                              ~COGL_PIPELINE_STATE_COLOR),
 481                             COGL_PIPELINE_LAYER_STATE_ALL,
 482                             0))
 483     return TRUE;
 484   else
 485     return FALSE;
 486 }
 487
 488 /* Since the stride may not reflect the number of texture layers in use
 489  * (due to padding) we deal with texture coordinate offsets separately
 490  * from vertex and color offsets... */
 491 static void
 492 _cogl_journal_flush_texcoord_vbo_offsets_and_entries (
 493                                           CoglJournalEntry *batch_start,
 494                                           int               batch_len,
 495                                           void             *data)
 496 {
 497   CoglJournalFlushState *state = data;
 498   int                    i;
 499   COGL_STATIC_TIMER (time_flush_texcoord_pipeline_entries,
 500                      "flush: vbo+texcoords+pipeline+entries", /* parent */
 501                      "flush: texcoords+pipeline+entries",
 502                      "The time spent flushing texcoord offsets + pipeline "
 503                      "+ entries",
 504                      0 /* no application private data */);
 505
 506   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
 507
 508   COGL_TIMER_START (_cogl_uprof_context, time_flush_texcoord_pipeline_entries);
 509
 510   /* NB: attributes 0 and 1 are position and color */
 511
 512   for (i = 2; i < state->attributes->len; i++)
 513     cogl_object_unref (g_array_index (state->attributes, CoglAttribute *, i));
 514
 515   g_array_set_size (state->attributes, batch_start->n_layers + 2);
 516
 517   for (i = 0; i < batch_start->n_layers; i++)
 518     {
 519       CoglAttribute **attribute_entry =
 520         &g_array_index (state->attributes, CoglAttribute *, i + 2);
 521       const char *names[] = {
 522           "cogl_tex_coord0_in",
 523           "cogl_tex_coord1_in",
 524           "cogl_tex_coord2_in",
 525           "cogl_tex_coord3_in",
 526           "cogl_tex_coord4_in",
 527           "cogl_tex_coord5_in",
 528           "cogl_tex_coord6_in",
 529           "cogl_tex_coord7_in"
 530       };
 531       char *name;
 532
 533       /* XXX NB:
 534        * Our journal's vertex data is arranged as follows:
 535        * 4 vertices per quad:
 536        *    2 or 3 floats per position (3 when doing software transforms)
 537        *    4 RGBA bytes,
 538        *    2 floats per tex coord * n_layers
 539        * (though n_layers may be padded; see definition of
 540        *  GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details)
 541        */
 542       name = i < 8 ? (char *)names[i] :
 543         g_strdup_printf ("cogl_tex_coord%d_in", i);
 544
 545       /* XXX: it may be worth having some form of static initializer for
 546        * attributes... */
 547       *attribute_entry =
 548         cogl_attribute_new (state->attribute_buffer,
 549                             name,
 550                             state->stride,
 551                             state->array_offset +
 552                             (POS_STRIDE + COLOR_STRIDE) * 4 +
 553                             TEX_STRIDE * 4 * i,
 554                             2,
 555                             COGL_ATTRIBUTE_TYPE_FLOAT);
 556
 557       if (i >= 8)
 558         g_free (name);
 559     }
 560
 561   batch_and_call (batch_start,
 562                   batch_len,
 563                   compare_entry_pipelines,
 564                   _cogl_journal_flush_pipeline_and_entries,
 565                   data);
 566   COGL_TIMER_STOP (_cogl_uprof_context, time_flush_texcoord_pipeline_entries);
 567 }
 568
 569 static gboolean
 570 compare_entry_n_layers (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
 571 {
 572   if (entry0->n_layers == entry1->n_layers)
 573     return TRUE;
 574   else
 575     return FALSE;
 576 }
 577
 578 /* At this point we know the stride has changed from the previous batch
 579  * of journal entries */
 580 static void
 581 _cogl_journal_flush_vbo_offsets_and_entries (CoglJournalEntry *batch_start,
 582                                              int               batch_len,
 583                                              void             *data)
 584 {
 585   CoglJournalFlushState   *state = data;
 586   CoglContext             *ctx = state->journal->framebuffer->context;
 587   gsize                    stride;
 588   int                      i;
 589   CoglAttribute          **attribute_entry;
 590   COGL_STATIC_TIMER (time_flush_vbo_texcoord_pipeline_entries,
 591                      "flush: clip+vbo+texcoords+pipeline+entries", /* parent */
 592                      "flush: vbo+texcoords+pipeline+entries",
 593                      "The time spent flushing vbo + texcoord offsets + "
 594                      "pipeline + entries",
 595                      0 /* no application private data */);
 596
 597   COGL_TIMER_START (_cogl_uprof_context,
 598                     time_flush_vbo_texcoord_pipeline_entries);
 599
 600   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING)))
 601     g_print ("BATCHING:   vbo offset batch len = %d\n", batch_len);
 602
 603   /* XXX NB:
 604    * Our journal's vertex data is arranged as follows:
 605    * 4 vertices per quad:
 606    *    2 or 3 GLfloats per position (3 when doing software transforms)
 607    *    4 RGBA GLubytes,
 608    *    2 GLfloats per tex coord * n_layers
 609    * (though n_layers may be padded; see definition of
 610    *  GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details)
 611    */
 612   stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (batch_start->n_layers);
 613   stride *= sizeof (float);
 614   state->stride = stride;
 615
 616   for (i = 0; i < state->attributes->len; i++)
 617     cogl_object_unref (g_array_index (state->attributes, CoglAttribute *, i));
 618
 619   g_array_set_size (state->attributes, 2);
 620
 621   attribute_entry = &g_array_index (state->attributes, CoglAttribute *, 0);
 622   *attribute_entry = cogl_attribute_new (state->attribute_buffer,
 623                                          "cogl_position_in",
 624                                          stride,
 625                                          state->array_offset,
 626                                          N_POS_COMPONENTS,
 627                                          COGL_ATTRIBUTE_TYPE_FLOAT);
 628
 629   attribute_entry = &g_array_index (state->attributes, CoglAttribute *, 1);
 630   *attribute_entry =
 631     cogl_attribute_new (state->attribute_buffer,
 632                         "cogl_color_in",
 633                         stride,
 634                         state->array_offset + (POS_STRIDE * 4),
 635                         4,
 636                         COGL_ATTRIBUTE_TYPE_UNSIGNED_BYTE);
 637
 638   if (ctx->driver != COGL_DRIVER_GL)
 639     state->indices = cogl_get_rectangle_indices (ctx, batch_len);
 640
 641   /* We only create new Attributes when the stride within the
 642    * AttributeBuffer changes. (due to a change in the number of pipeline
 643    * layers) While the stride remains constant we walk forward through
 644    * the above AttributeBuffer using a vertex offset passed to
 645    * cogl_draw_attributes
 646    */
 647   state->current_vertex = 0;
 648
 649   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_JOURNAL)))
 650     {
 651       guint8 *verts;
 652
 653       /* Mapping a buffer for read is probably a really bad thing to
 654          do but this will only happen during debugging so it probably
 655          doesn't matter */
 656       verts = ((guint8 *)cogl_buffer_map (COGL_BUFFER (state->attribute_buffer),
 657                                           COGL_BUFFER_ACCESS_READ, 0) +
 658                state->array_offset);
 659
 660       _cogl_journal_dump_quad_batch (verts,
 661                                      batch_start->n_layers,
 662                                      batch_len);
 663
 664       cogl_buffer_unmap (COGL_BUFFER (state->attribute_buffer));
 665     }
 666
 667   batch_and_call (batch_start,
 668                   batch_len,
 669                   compare_entry_n_layers,
 670                   _cogl_journal_flush_texcoord_vbo_offsets_and_entries,
 671                   data);
 672
 673   /* progress forward through the VBO containing all our vertices */
 674   state->array_offset += (stride * 4 * batch_len);
 675   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_JOURNAL)))
 676     g_print ("new vbo offset = %lu\n", (unsigned long)state->array_offset);
 677
 678   COGL_TIMER_STOP (_cogl_uprof_context,
 679                    time_flush_vbo_texcoord_pipeline_entries);
 680 }
 681
 682 static gboolean
 683 compare_entry_strides (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
 684 {
 685   /* Currently the only thing that affects the stride for our vertex arrays
 686    * is the number of pipeline layers. We need to update our VBO offsets
 687    * whenever the stride changes. */
 688   /* TODO: We should be padding the n_layers == 1 case as if it were
 689    * n_layers == 2 so we can reduce the need to split batches. */
 690   if (entry0->n_layers == entry1->n_layers ||
 691       (entry0->n_layers <= MIN_LAYER_PADING &&
 692        entry1->n_layers <= MIN_LAYER_PADING))
 693     return TRUE;
 694   else
 695     return FALSE;
 696 }
 697
 698 /* At this point we know the batch has a unique clip stack */
 699 static void
 700 _cogl_journal_flush_clip_stacks_and_entries (CoglJournalEntry *batch_start,
 701                                              int               batch_len,
 702                                              void             *data)
 703 {
 704   CoglJournalFlushState *state = data;
 705
 706   COGL_STATIC_TIMER (time_flush_clip_stack_pipeline_entries,
 707                      "Journal Flush", /* parent */
 708                      "flush: clip+vbo+texcoords+pipeline+entries",
 709                      "The time spent flushing clip + vbo + texcoord offsets + "
 710                      "pipeline + entries",
 711                      0 /* no application private data */);
 712
 713   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
 714
 715   COGL_TIMER_START (_cogl_uprof_context,
 716                     time_flush_clip_stack_pipeline_entries);
 717
 718   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING)))
 719     g_print ("BATCHING:  clip stack batch len = %d\n", batch_len);
 720
 721   _cogl_clip_stack_flush (batch_start->clip_stack, state->journal->framebuffer);
 722
 723   /* XXX: Because we are manually flushing clip state here we need to
 724    * make sure that the clip state gets updated the next time we flush
 725    * framebuffer state by marking the current framebuffer's clip state
 726    * as changed. */
 727   ctx->current_draw_buffer_changes |= COGL_FRAMEBUFFER_STATE_CLIP;
 728
 729   _cogl_matrix_stack_push (state->modelview_stack);
 730
 731   /* If we have transformed all our quads at log time then we ensure
 732    * no further model transform is applied by loading the identity
 733    * matrix here. We need to do this after flushing the clip stack
 734    * because the clip stack flushing code can modify the matrix */
 735   if (G_LIKELY (!(COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))))
 736     {
 737       _cogl_matrix_stack_load_identity (state->modelview_stack);
 738       _cogl_context_set_current_modelview (ctx, state->modelview_stack);
 739     }
 740
 741   /* Setting up the clip state can sometimes also flush the projection
 742      matrix so we should flush it again. This will be a no-op if the
 743      clip code didn't modify the projection */
 744   _cogl_context_set_current_projection (ctx, state->projection_stack);
 745
 746   batch_and_call (batch_start,
 747                   batch_len,
 748                   compare_entry_strides,
 749                   _cogl_journal_flush_vbo_offsets_and_entries, /* callback */
 750                   data);
 751
 752   _cogl_matrix_stack_pop (state->modelview_stack);
 753
 754   COGL_TIMER_STOP (_cogl_uprof_context,
 755                    time_flush_clip_stack_pipeline_entries);
 756 }
 757
 758 static gboolean
 759 calculate_translation (const CoglMatrix *a,
 760                        const CoglMatrix *b,
 761                        float *tx_p,
 762                        float *ty_p)
 763 {
 764   float tx, ty;
 765   int x, y;
 766
 767   /* Assuming we had the original matrix in this form:
 768    *
 769    *      [ a₁₁, a₁₂, a₁₃, a₁₄ ]
 770    *      [ a₂₁, a₂₂, a₂₃, a₂₄ ]
 771    *  a = [ a₃₁, a₃₂, a₃₃, a₃₄ ]
 772    *      [ a₄₁, a₄₂, a₄₃, a₄₄ ]
 773    *
 774    * then a translation of that matrix would be a multiplication by a
 775    * matrix of this form:
 776    *
 777    *      [ 1, 0, 0, x ]
 778    *      [ 0, 1, 0, y ]
 779    *  t = [ 0, 0, 1, 0 ]
 780    *      [ 0, 0, 0, 1 ]
 781    *
 782    * That would give us a matrix of this form.
 783    *
 784    *              [ a₁₁, a₁₂, a₁₃, a₁₁ x + a₁₂ y + a₁₄ ]
 785    *              [ a₂₁, a₂₂, a₂₃, a₂₁ x + a₂₂ y + a₂₄ ]
 786    *  b = a ⋅ t = [ a₃₁, a₃₂, a₃₃, a₃₁ x + a₃₂ y + a₃₄ ]
 787    *              [ a₄₁, a₄₂, a₄₃, a₄₁ x + a₄₂ y + a₄₄ ]
 788    *
 789    * We can use the two equations from the top left of the matrix to
 790    * work out the x and y translation given the two matrices:
 791    *
 792    *  b₁₄ = a₁₁x + a₁₂y + a₁₄
 793    *  b₂₄ = a₂₁x + a₂₂y + a₂₄
 794    *
 795    * Rearranging gives us:
 796    *
 797    *        a₁₂ b₂₄ - a₂₄ a₁₂
 798    *        -----------------  +  a₁₄ - b₁₄
 799    *              a₂₂
 800    *  x =  ---------------------------------
 801    *                a₁₂ a₂₁
 802    *                -------  -  a₁₁
 803    *                  a₂₂
 804    *
 805    *      b₂₄ - a₂₁x - a₂₄
 806    *  y = ----------------
 807    *            a₂₂
 808    *
 809    * Once we've worked out what x and y would be if this was a valid
 810    * translation then we can simply verify that the rest of the matrix
 811    * matches up.
 812    */
 813
 814   /* The leftmost 3x4 part of the matrix shouldn't change by a
 815      translation so we can just compare it directly */
 816   for (y = 0; y < 4; y++)
 817     for (x = 0; x < 3; x++)
 818       if ((&a->xx)[x * 4 + y] != (&b->xx)[x * 4 + y])
 819         return FALSE;
 820
 821   tx = (((a->xy * b->yw - a->yw * a->xy) / a->yy + a->xw - b->xw) /
 822         ((a->xy * a->yx) / a->yy - a->xx));
 823   ty = (b->yw - a->yx * tx - a->yw) / a->yy;
 824
 825 #define APPROX_EQUAL(a, b) (fabsf ((a) - (b)) < 1e-6f)
 826
 827   /* Check whether the 4th column of the matrices match up to the
 828      calculation */
 829   if (!APPROX_EQUAL (b->xw, a->xx * tx + a->xy * ty + a->xw) ||
 830       !APPROX_EQUAL (b->yw, a->yx * tx + a->yy * ty + a->yw) ||
 831       !APPROX_EQUAL (b->zw, a->zx * tx + a->zy * ty + a->zw) ||
 832       !APPROX_EQUAL (b->ww, a->wx * tx + a->wy * ty + a->ww))
 833     return FALSE;
 834
 835 #undef APPROX_EQUAL
 836
 837   *tx_p = tx;
 838   *ty_p = ty;
 839
 840   return TRUE;
 841 }
 842
 843 typedef struct
 844 {
 845   float x_1, y_1;
 846   float x_2, y_2;
 847 } ClipBounds;
 848
 849 static gboolean
 850 can_software_clip_entry (CoglJournalEntry *journal_entry,
 851                          CoglJournalEntry *prev_journal_entry,
 852                          CoglClipStack *clip_stack,
 853                          ClipBounds *clip_bounds_out)
 854 {
 855   CoglPipeline *pipeline = journal_entry->pipeline;
 856   CoglClipStack *clip_entry;
 857   int layer_num;
 858
 859   clip_bounds_out->x_1 = -G_MAXFLOAT;
 860   clip_bounds_out->y_1 = -G_MAXFLOAT;
 861   clip_bounds_out->x_2 = G_MAXFLOAT;
 862   clip_bounds_out->y_2 = G_MAXFLOAT;
 863
 864   /* Check the pipeline is usable. We can short-cut here for
 865      entries using the same pipeline as the previous entry */
 866   if (prev_journal_entry == NULL || pipeline != prev_journal_entry->pipeline)
 867     {
 868       /* If the pipeline has a user program then we can't reliably modify
 869          the texture coordinates */
 870       if (cogl_pipeline_get_user_program (pipeline))
 871         return FALSE;
 872
 873       /* If any of the pipeline layers have a texture matrix then we can't
 874          reliably modify the texture coordinates */
 875       for (layer_num = cogl_pipeline_get_n_layers (pipeline) - 1;
 876            layer_num >= 0;
 877            layer_num--)
 878         if (_cogl_pipeline_layer_has_user_matrix (pipeline, layer_num))
 879           return FALSE;
 880     }
 881
 882   /* Now we need to verify that each clip entry's matrix is just a
 883      translation of the journal entry's modelview matrix. We can
 884      also work out the bounds of the clip in modelview space using
 885      this translation */
 886   for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent)
 887     {
 888       float rect_x1, rect_y1, rect_x2, rect_y2;
 889       CoglClipStackRect *clip_rect;
 890       float tx, ty;
 891
 892       clip_rect = (CoglClipStackRect *) clip_entry;
 893
 894       if (!calculate_translation (&clip_rect->matrix,
 895                                   &journal_entry->model_view,
 896                                   &tx, &ty))
 897         return FALSE;
 898
 899       if (clip_rect->x0 < clip_rect->x1)
 900         {
 901           rect_x1 = clip_rect->x0;
 902           rect_x2 = clip_rect->x1;
 903         }
 904       else
 905         {
 906           rect_x1 = clip_rect->x1;
 907           rect_x2 = clip_rect->x0;
 908         }
 909       if (clip_rect->y0 < clip_rect->y1)
 910         {
 911           rect_y1 = clip_rect->y0;
 912           rect_y2 = clip_rect->y1;
 913         }
 914       else
 915         {
 916           rect_y1 = clip_rect->y1;
 917           rect_y2 = clip_rect->y0;
 918         }
 919
 920       clip_bounds_out->x_1 = MAX (clip_bounds_out->x_1, rect_x1 - tx);
 921       clip_bounds_out->y_1 = MAX (clip_bounds_out->y_1, rect_y1 - ty);
 922       clip_bounds_out->x_2 = MIN (clip_bounds_out->x_2, rect_x2 - tx);
 923       clip_bounds_out->y_2 = MIN (clip_bounds_out->y_2, rect_y2 - ty);
 924     }
 925
 926   if (clip_bounds_out->x_2 <= clip_bounds_out->x_1 ||
 927       clip_bounds_out->y_2 <= clip_bounds_out->y_1)
 928     memset (clip_bounds_out, 0, sizeof (ClipBounds));
 929
 930   return TRUE;
 931 }
 932
 933 static void
 934 software_clip_entry (CoglJournalEntry *journal_entry,
 935                      float *verts,
 936                      ClipBounds *clip_bounds)
 937 {
 938   size_t stride =
 939     GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (journal_entry->n_layers);
 940   float rx1, ry1, rx2, ry2;
 941   float vx1, vy1, vx2, vy2;
 942   int layer_num;
 943
 944   /* Remove the clip on the entry */
 945   _cogl_clip_stack_unref (journal_entry->clip_stack);
 946   journal_entry->clip_stack = NULL;
 947
 948   vx1 = verts[0];
 949   vy1 = verts[1];
 950   vx2 = verts[stride];
 951   vy2 = verts[stride + 1];
 952
 953   if (vx1 < vx2)
 954     {
 955       rx1 = vx1;
 956       rx2 = vx2;
 957     }
 958   else
 959     {
 960       rx1 = vx2;
 961       rx2 = vx1;
 962     }
 963   if (vy1 < vy2)
 964     {
 965       ry1 = vy1;
 966       ry2 = vy2;
 967     }
 968   else
 969     {
 970       ry1 = vy2;
 971       ry2 = vy1;
 972     }
 973
 974   rx1 = CLAMP (rx1, clip_bounds->x_1, clip_bounds->x_2);
 975   ry1 = CLAMP (ry1, clip_bounds->y_1, clip_bounds->y_2);
 976   rx2 = CLAMP (rx2, clip_bounds->x_1, clip_bounds->x_2);
 977   ry2 = CLAMP (ry2, clip_bounds->y_1, clip_bounds->y_2);
 978
 979   /* Check if the rectangle intersects the clip at all */
 980   if (rx1 == rx2 || ry1 == ry2)
 981     /* Will set all of the vertex data to 0 in the hope that this
 982        will create a degenerate rectangle and the GL driver will
 983        be able to clip it quickly */
 984     memset (verts, 0, sizeof (float) * stride * 2);
 985   else
 986     {
 987       if (vx1 > vx2)
 988         {
 989           float t = rx1;
 990           rx1 = rx2;
 991           rx2 = t;
 992         }
 993       if (vy1 > vy2)
 994         {
 995           float t = ry1;
 996           ry1 = ry2;
 997           ry2 = t;
 998         }
 999
1000       verts[0] = rx1;
1001       verts[1] = ry1;
1002       verts[stride] = rx2;
1003       verts[stride + 1] = ry2;
1004
1005       /* Convert the rectangle coordinates to a fraction of the original
1006          rectangle */
1007       rx1 = (rx1 - vx1) / (vx2 - vx1);
1008       ry1 = (ry1 - vy1) / (vy2 - vy1);
1009       rx2 = (rx2 - vx1) / (vx2 - vx1);
1010       ry2 = (ry2 - vy1) / (vy2 - vy1);
1011
1012       for (layer_num = 0; layer_num < journal_entry->n_layers; layer_num++)
1013         {
1014           float *t = verts + 2 + 2 * layer_num;
1015           float tx1 = t[0], ty1 = t[1];
1016           float tx2 = t[stride], ty2 = t[stride + 1];
1017           t[0] = rx1 * (tx2 - tx1) + tx1;
1018           t[1] = ry1 * (ty2 - ty1) + ty1;
1019           t[stride] = rx2 * (tx2 - tx1) + tx1;
1020           t[stride + 1] = ry2 * (ty2 - ty1) + ty1;
1021         }
1022     }
1023 }
1024
1025 static void
1026 maybe_software_clip_entries (CoglJournalEntry      *batch_start,
1027                              int                    batch_len,
1028                              CoglJournalFlushState *state)
1029 {
1030   CoglJournal *journal = state->journal;
1031   CoglClipStack *clip_stack, *clip_entry;
1032   int entry_num;
1033
1034   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
1035
1036   /* This tries to find cases where the entry is logged with a clip
1037      but it would be faster to modify the vertex and texture
1038      coordinates rather than flush the clip so that it can batch
1039      better */
1040
1041   /* If the batch is reasonably long then it's worthwhile programming
1042      the GPU to do the clip */
1043   if (batch_len >= COGL_JOURNAL_HARDWARE_CLIP_THRESHOLD)
1044     return;
1045
1046   clip_stack = batch_start->clip_stack;
1047
1048   if (clip_stack == NULL)
1049     return;
1050
1051   /* Verify that all of the clip stack entries are a simple rectangle
1052      clip */
1053   for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent)
1054     if (clip_entry->type != COGL_CLIP_STACK_RECT)
1055       return;
1056
1057   /* This scratch buffer is used to store the translation for each
1058      entry in the journal. We store it in a separate buffer because
1059      it's expensive to calculate but at this point we still don't know
1060      whether we can clip all of the entries so we don't want to do the
1061      rest of the dependant calculations until we're sure we can. */
1062   if (ctx->journal_clip_bounds == NULL)
1063     ctx->journal_clip_bounds = g_array_new (FALSE, FALSE, sizeof (ClipBounds));
1064   g_array_set_size (ctx->journal_clip_bounds, batch_len);
1065
1066   for (entry_num = 0; entry_num < batch_len; entry_num++)
1067     {
1068       CoglJournalEntry *journal_entry = batch_start + entry_num;
1069       CoglJournalEntry *prev_journal_entry =
1070         entry_num ? batch_start + (entry_num - 1) : NULL;
1071       ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds,
1072                                                 ClipBounds, entry_num);
1073
1074       if (!can_software_clip_entry (journal_entry, prev_journal_entry,
1075                                     clip_stack,
1076                                     clip_bounds))
1077         return;
1078     }
1079
1080   /* If we make it here then we know we can software clip the entire batch */
1081
1082   COGL_NOTE (CLIPPING, "Software clipping a batch of length %i", batch_len);
1083
1084   for (entry_num = 0; entry_num < batch_len; entry_num++)
1085     {
1086       CoglJournalEntry *journal_entry = batch_start + entry_num;
1087       float *verts = &g_array_index (journal->vertices, float,
1088                                      journal_entry->array_offset + 1);
1089       ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds,
1090                                                 ClipBounds, entry_num);
1091
1092       software_clip_entry (journal_entry, verts, clip_bounds);
1093     }
1094
1095   return;
1096 }
1097
1098 static void
1099 _cogl_journal_maybe_software_clip_entries (CoglJournalEntry *batch_start,
1100                                            int               batch_len,
1101                                            void             *data)
1102 {
1103   CoglJournalFlushState *state = data;
1104
1105   COGL_STATIC_TIMER (time_check_software_clip,
1106                      "Journal Flush", /* parent */
1107                      "flush: software clipping",
1108                      "Time spent software clipping",
1109                      0 /* no application private data */);
1110
1111   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
1112
1113   COGL_TIMER_START (_cogl_uprof_context,
1114                     time_check_software_clip);
1115
1116   maybe_software_clip_entries (batch_start, batch_len, state);
1117
1118   COGL_TIMER_STOP (_cogl_uprof_context,
1119                    time_check_software_clip);
1120 }
1121
1122 static gboolean
1123 compare_entry_clip_stacks (CoglJournalEntry *entry0, CoglJournalEntry *entry1)
1124 {
1125   return entry0->clip_stack == entry1->clip_stack;
1126 }
1127
1128 /* Gets a new vertex array from the pool. A reference is taken on the
1129    array so it can be treated as if it was just newly allocated */
1130 static CoglAttributeBuffer *
1131 create_attribute_buffer (CoglJournal *journal,
1132                          gsize n_bytes)
1133 {
1134   CoglAttributeBuffer *vbo;
1135
1136   _COGL_GET_CONTEXT (ctx, NULL);
1137
1138   /* If CoglBuffers are being emulated with malloc then there's not
1139      really any point in using the pool so we'll just allocate the
1140      buffer directly */
1141   if (!(ctx->private_feature_flags & COGL_PRIVATE_FEATURE_VBOS))
1142     return cogl_attribute_buffer_new (ctx, n_bytes, NULL);
1143
1144   vbo = journal->vbo_pool[journal->next_vbo_in_pool];
1145
1146   if (vbo == NULL)
1147     {
1148       vbo = cogl_attribute_buffer_new (ctx, n_bytes, NULL);
1149       journal->vbo_pool[journal->next_vbo_in_pool] = vbo;
1150     }
1151   else if (cogl_buffer_get_size (COGL_BUFFER (vbo)) < n_bytes)
1152     {
1153       /* If the buffer is too small then we'll just recreate it */
1154       cogl_object_unref (vbo);
1155       vbo = cogl_attribute_buffer_new (ctx, n_bytes, NULL);
1156       journal->vbo_pool[journal->next_vbo_in_pool] = vbo;
1157     }
1158
1159   journal->next_vbo_in_pool = ((journal->next_vbo_in_pool + 1) %
1160                                COGL_JOURNAL_VBO_POOL_SIZE);
1161
1162   return cogl_object_ref (vbo);
1163 }
1164
1165 static CoglAttributeBuffer *
1166 upload_vertices (CoglJournal            *journal,
1167                  const CoglJournalEntry *entries,
1168                  int                     n_entries,
1169                  size_t                  needed_vbo_len,
1170                  GArray                 *vertices)
1171 {
1172   CoglAttributeBuffer *attribute_buffer;
1173   CoglBuffer *buffer;
1174   const float *vin;
1175   float *vout;
1176   int entry_num;
1177   int i;
1178
1179   g_assert (needed_vbo_len);
1180
1181   attribute_buffer = create_attribute_buffer (journal, needed_vbo_len * 4);
1182   buffer = COGL_BUFFER (attribute_buffer);
1183   cogl_buffer_set_update_hint (buffer, COGL_BUFFER_UPDATE_HINT_STATIC);
1184
1185   vout = _cogl_buffer_map_for_fill_or_fallback (buffer);
1186   vin = &g_array_index (vertices, float, 0);
1187
1188   /* Expand the number of vertices from 2 to 4 while uploading */
1189   for (entry_num = 0; entry_num < n_entries; entry_num++)
1190     {
1191       const CoglJournalEntry *entry = entries + entry_num;
1192       size_t vb_stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (entry->n_layers);
1193       size_t array_stride =
1194         GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (entry->n_layers);
1195
1196       /* Copy the color to all four of the vertices */
1197       for (i = 0; i < 4; i++)
1198         memcpy (vout + vb_stride * i + POS_STRIDE, vin, 4);
1199       vin++;
1200
1201       if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)))
1202         {
1203           vout[vb_stride * 0] = vin[0];
1204           vout[vb_stride * 0 + 1] = vin[1];
1205           vout[vb_stride * 1] = vin[0];
1206           vout[vb_stride * 1 + 1] = vin[array_stride + 1];
1207           vout[vb_stride * 2] = vin[array_stride];
1208           vout[vb_stride * 2 + 1] = vin[array_stride + 1];
1209           vout[vb_stride * 3] = vin[array_stride];
1210           vout[vb_stride * 3 + 1] = vin[1];
1211         }
1212       else
1213         {
1214           float v[8];
1215
1216           v[0] = vin[0];
1217           v[1] = vin[1];
1218           v[2] = vin[0];
1219           v[3] = vin[array_stride + 1];
1220           v[4] = vin[array_stride];
1221           v[5] = vin[array_stride + 1];
1222           v[6] = vin[array_stride];
1223           v[7] = vin[1];
1224
1225           cogl_matrix_transform_points (&entry->model_view,
1226                                         2, /* n_components */
1227                                         sizeof (float) * 2, /* stride_in */
1228                                         v, /* points_in */
1229                                         /* strideout */
1230                                         vb_stride * sizeof (float),
1231                                         vout, /* points_out */
1232                                         4 /* n_points */);
1233         }
1234
1235       for (i = 0; i < entry->n_layers; i++)
1236         {
1237           const float *tin = vin + 2;
1238           float *tout = vout + POS_STRIDE + COLOR_STRIDE;
1239
1240           tout[vb_stride * 0 + i * 2] = tin[i * 2];
1241           tout[vb_stride * 0 + 1 + i * 2] = tin[i * 2 + 1];
1242           tout[vb_stride * 1 + i * 2] = tin[i * 2];
1243           tout[vb_stride * 1 + 1 + i * 2] = tin[array_stride + i * 2 + 1];
1244           tout[vb_stride * 2 + i * 2] = tin[array_stride + i * 2];
1245           tout[vb_stride * 2 + 1 + i * 2] = tin[array_stride + i * 2 + 1];
1246           tout[vb_stride * 3 + i * 2] = tin[array_stride + i * 2];
1247           tout[vb_stride * 3 + 1 + i * 2] = tin[i * 2 + 1];
1248         }
1249
1250       vin += array_stride * 2;
1251       vout += vb_stride * 4;
1252     }
1253
1254   _cogl_buffer_unmap_for_fill_or_fallback (buffer);
1255
1256   return attribute_buffer;
1257 }
1258
1259 void
1260 _cogl_journal_discard (CoglJournal *journal)
1261 {
1262   int i;
1263
1264   if (journal->entries->len <= 0)
1265     return;
1266
1267   for (i = 0; i < journal->entries->len; i++)
1268     {
1269       CoglJournalEntry *entry =
1270         &g_array_index (journal->entries, CoglJournalEntry, i);
1271       _cogl_pipeline_journal_unref (entry->pipeline);
1272       _cogl_clip_stack_unref (entry->clip_stack);
1273     }
1274
1275   g_array_set_size (journal->entries, 0);
1276   g_array_set_size (journal->vertices, 0);
1277   journal->needed_vbo_len = 0;
1278   journal->fast_read_pixel_count = 0;
1279
1280   /* The journal only holds a reference to the framebuffer while the
1281      journal is not empty */
1282   cogl_object_unref (journal->framebuffer);
1283 }
1284
1285 /* Note: A return value of FALSE doesn't mean 'no' it means
1286  * 'unknown' */
1287 gboolean
1288 _cogl_journal_all_entries_within_bounds (CoglJournal *journal,
1289                                          float clip_x0,
1290                                          float clip_y0,
1291                                          float clip_x1,
1292                                          float clip_y1)
1293 {
1294   CoglJournalEntry *entry = (CoglJournalEntry *)journal->entries->data;
1295   CoglClipStack *clip_entry;
1296   CoglClipStack *reference = NULL;
1297   int bounds_x0;
1298   int bounds_y0;
1299   int bounds_x1;
1300   int bounds_y1;
1301   int i;
1302
1303   if (journal->entries->len == 0)
1304     return TRUE;
1305
1306   /* Find the shortest clip_stack ancestry that leaves us in the
1307    * required bounds */
1308   for (clip_entry = entry->clip_stack;
1309        clip_entry;
1310        clip_entry = clip_entry->parent)
1311     {
1312       _cogl_clip_stack_get_bounds (clip_entry,
1313                                    &bounds_x0, &bounds_y0,
1314                                    &bounds_x1, &bounds_y1);
1315
1316       if (bounds_x0 >= clip_x0 && bounds_y0 >= clip_y0 &&
1317           bounds_x1 <= clip_x1 && bounds_y1 <= clip_y1)
1318         reference = clip_entry;
1319       else
1320         break;
1321     }
1322
1323   if (!reference)
1324     return FALSE;
1325
1326   /* For the remaining journal entries we will only verify they share
1327    * 'reference' as an ancestor in their clip stack since that's
1328    * enough to know that they would be within the required bounds.
1329    */
1330   for (i = 1; i < journal->entries->len; i++)
1331     {
1332       gboolean found_reference = FALSE;
1333       entry = &g_array_index (journal->entries, CoglJournalEntry, i);
1334
1335       for (clip_entry = entry->clip_stack;
1336            clip_entry;
1337            clip_entry = clip_entry->parent)
1338         {
1339           if (clip_entry == reference)
1340             {
1341               found_reference = TRUE;
1342               break;
1343             }
1344         }
1345
1346       if (!found_reference)
1347         return FALSE;
1348     }
1349
1350   return TRUE;
1351 }
1352
1353 /* XXX NB: When _cogl_journal_flush() returns all state relating
1354  * to pipelines, all glEnable flags and current matrix state
1355  * is undefined.
1356  */
1357 void
1358 _cogl_journal_flush (CoglJournal *journal)
1359 {
1360   CoglJournalFlushState state;
1361   int                   i;
1362   CoglMatrixStack      *modelview_stack;
1363   COGL_STATIC_TIMER (flush_timer,
1364                      "Mainloop", /* parent */
1365                      "Journal Flush",
1366                      "The time spent flushing the Cogl journal",
1367                      0 /* no application private data */);
1368
1369   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
1370
1371   if (journal->entries->len == 0)
1372     return;
1373
1374   /* The entries in this journal may depend on images in other
1375    * framebuffers which may require that we flush the journals
1376    * associated with those framebuffers before we can flush
1377    * this journal... */
1378   _cogl_framebuffer_flush_dependency_journals (journal->framebuffer);
1379
1380   /* Note: we start the timer after flushing dependency journals so
1381    * that the timer isn't started recursively. */
1382   COGL_TIMER_START (_cogl_uprof_context, flush_timer);
1383
1384   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING)))
1385     g_print ("BATCHING: journal len = %d\n", journal->entries->len);
1386
1387   /* NB: the journal deals with flushing the modelview stack and clip
1388      state manually */
1389   _cogl_framebuffer_flush_state (journal->framebuffer,
1390                                  journal->framebuffer,
1391                                  COGL_FRAMEBUFFER_STATE_ALL &
1392                                  ~(COGL_FRAMEBUFFER_STATE_MODELVIEW |
1393                                    COGL_FRAMEBUFFER_STATE_CLIP));
1394
1395   state.journal = journal;
1396
1397   state.attributes = ctx->journal_flush_attributes_array;
1398
1399   modelview_stack =
1400     _cogl_framebuffer_get_modelview_stack (journal->framebuffer);
1401   state.modelview_stack = modelview_stack;
1402   state.projection_stack =
1403     _cogl_framebuffer_get_projection_stack (journal->framebuffer);
1404
1405   if (G_UNLIKELY ((COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_CLIP)) == 0))
1406     {
1407       /* We do an initial walk of the journal to analyse the clip stack
1408          batches to see if we can do software clipping. We do this as a
1409          separate walk of the journal because we can modify entries and
1410          this may end up joining together clip stack batches in the next
1411          iteration. */
1412       batch_and_call ((CoglJournalEntry *)journal->entries->data, /* first entry */
1413                       journal->entries->len, /* max number of entries to consider */
1414                       compare_entry_clip_stacks,
1415                       _cogl_journal_maybe_software_clip_entries, /* callback */
1416                       &state); /* data */
1417     }
1418
1419   /* We upload the vertices after the clip stack pass in case it
1420      modifies the entries */
1421   state.attribute_buffer =
1422     upload_vertices (journal,
1423                      &g_array_index (journal->entries, CoglJournalEntry, 0),
1424                      journal->entries->len,
1425                      journal->needed_vbo_len,
1426                      journal->vertices);
1427   state.array_offset = 0;
1428
1429   /* batch_and_call() batches a list of journal entries according to some
1430    * given criteria and calls a callback once for each determined batch.
1431    *
1432    * The process of flushing the journal is staggered to reduce the amount
1433    * of driver/GPU state changes necessary:
1434    * 1) We split the entries according to the clip state.
1435    * 2) We split the entries according to the stride of the vertices:
1436    *      Each time the stride of our vertex data changes we need to call
1437    *      gl{Vertex,Color}Pointer to inform GL of new VBO offsets.
1438    *      Currently the only thing that affects the stride of our vertex data
1439    *      is the number of pipeline layers.
1440    * 3) We split the entries explicitly by the number of pipeline layers:
1441    *      We pad our vertex data when the number of layers is < 2 so that we
1442    *      can minimize changes in stride. Each time the number of layers
1443    *      changes we need to call glTexCoordPointer to inform GL of new VBO
1444    *      offsets.
1445    * 4) We then split according to compatible Cogl pipelines:
1446    *      This is where we flush pipeline state
1447    * 5) Finally we split according to modelview matrix changes:
1448    *      This is when we finally tell GL to draw something.
1449    *      Note: Splitting by modelview changes is skipped when are doing the
1450    *      vertex transformation in software at log time.
1451    */
1452   batch_and_call ((CoglJournalEntry *)journal->entries->data, /* first entry */
1453                   journal->entries->len, /* max number of entries to consider */
1454                   compare_entry_clip_stacks,
1455                   _cogl_journal_flush_clip_stacks_and_entries, /* callback */
1456                   &state); /* data */
1457
1458   for (i = 0; i < state.attributes->len; i++)
1459     cogl_object_unref (g_array_index (state.attributes, CoglAttribute *, i));
1460   g_array_set_size (state.attributes, 0);
1461
1462   cogl_object_unref (state.attribute_buffer);
1463
1464   _cogl_journal_discard (journal);
1465
1466   COGL_TIMER_STOP (_cogl_uprof_context, flush_timer);
1467 }
1468
1469 static gboolean
1470 add_framebuffer_deps_cb (CoglPipelineLayer *layer, void *user_data)
1471 {
1472   CoglFramebuffer *framebuffer = user_data;
1473   CoglTexture *texture = _cogl_pipeline_layer_get_texture_real (layer);
1474   const GList *l;
1475
1476   if (!texture)
1477     return TRUE;
1478
1479   for (l = _cogl_texture_get_associated_framebuffers (texture); l; l = l->next)
1480     _cogl_framebuffer_add_dependency (framebuffer, l->data);
1481
1482   return TRUE;
1483 }
1484
1485 void
1486 _cogl_journal_log_quad (CoglJournal  *journal,
1487                         const float  *position,
1488                         CoglPipeline *pipeline,
1489                         int           n_layers,
1490                         CoglTexture  *layer0_override_texture,
1491                         const float  *tex_coords,
1492                         unsigned int  tex_coords_len)
1493 {
1494   CoglFramebuffer *framebuffer = journal->framebuffer;
1495   gsize stride;
1496   int next_vert;
1497   float *v;
1498   int i;
1499   int next_entry;
1500   guint32 disable_layers;
1501   CoglJournalEntry *entry;
1502   CoglPipeline *final_pipeline;
1503   CoglClipStack *clip_stack;
1504   CoglPipelineFlushOptions flush_options;
1505   COGL_STATIC_TIMER (log_timer,
1506                      "Mainloop", /* parent */
1507                      "Journal Log",
1508                      "The time spent logging in the Cogl journal",
1509                      0 /* no application private data */);
1510
1511   COGL_TIMER_START (_cogl_uprof_context, log_timer);
1512
1513   /* If the framebuffer was previously empty then we'll take a
1514      reference to the current framebuffer. This reference will be
1515      removed when the journal is flushed */
1516   if (journal->vertices->len == 0)
1517     cogl_object_ref (framebuffer);
1518
1519   /* The vertex data is logged into a separate array. The data needs
1520      to be copied into a vertex array before it's given to GL so we
1521      only store two vertices per quad and expand it to four while
1522      uploading. */
1523
1524   /* XXX: See definition of GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS for details
1525    * about how we pack our vertex data */
1526   stride = GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (n_layers);
1527
1528   next_vert = journal->vertices->len;
1529   g_array_set_size (journal->vertices, next_vert + 2 * stride + 1);
1530   v = &g_array_index (journal->vertices, float, next_vert);
1531
1532   /* We calculate the needed size of the vbo as we go because it
1533      depends on the number of layers in each entry and it's not easy
1534      calculate based on the length of the logged vertices array */
1535   journal->needed_vbo_len += GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers) * 4;
1536
1537   /* XXX: All the jumping around to fill in this strided buffer doesn't
1538    * seem ideal. */
1539
1540   /* FIXME: This is a hacky optimization, since it will break if we
1541    * change the definition of CoglColor: */
1542   _cogl_pipeline_get_colorubv (pipeline, (guint8 *) v);
1543   v++;
1544
1545   memcpy (v, position, sizeof (float) * 2);
1546   memcpy (v + stride, position + 2, sizeof (float) * 2);
1547
1548   for (i = 0; i < n_layers; i++)
1549     {
1550       /* XXX: See definition of GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS
1551        * for details about how we pack our vertex data */
1552       GLfloat *t = v + 2 + i * 2;
1553
1554       memcpy (t, tex_coords + i * 4, sizeof (float) * 2);
1555       memcpy (t + stride, tex_coords + i * 4 + 2, sizeof (float) * 2);
1556     }
1557
1558   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_JOURNAL)))
1559     {
1560       g_print ("Logged new quad:\n");
1561       v = &g_array_index (journal->vertices, float, next_vert);
1562       _cogl_journal_dump_logged_quad ((guint8 *)v, n_layers);
1563     }
1564
1565   next_entry = journal->entries->len;
1566   g_array_set_size (journal->entries, next_entry + 1);
1567   entry = &g_array_index (journal->entries, CoglJournalEntry, next_entry);
1568
1569   entry->n_layers = n_layers;
1570   entry->array_offset = next_vert;
1571
1572   final_pipeline = pipeline;
1573
1574   flush_options.flags = 0;
1575   if (G_UNLIKELY (cogl_pipeline_get_n_layers (pipeline) != n_layers))
1576     {
1577       disable_layers = (1 << n_layers) - 1;
1578       disable_layers = ~disable_layers;
1579       flush_options.disable_layers = disable_layers;
1580       flush_options.flags |= COGL_PIPELINE_FLUSH_DISABLE_MASK;
1581     }
1582   if (G_UNLIKELY (layer0_override_texture))
1583     {
1584       flush_options.flags |= COGL_PIPELINE_FLUSH_LAYER0_OVERRIDE;
1585       flush_options.layer0_override_texture = layer0_override_texture;
1586     }
1587
1588   if (G_UNLIKELY (flush_options.flags))
1589     {
1590       final_pipeline = cogl_pipeline_copy (pipeline);
1591       _cogl_pipeline_apply_overrides (final_pipeline, &flush_options);
1592     }
1593
1594   entry->pipeline = _cogl_pipeline_journal_ref (final_pipeline);
1595
1596   clip_stack = _cogl_framebuffer_get_clip_stack (framebuffer);
1597   entry->clip_stack = _cogl_clip_stack_ref (clip_stack);
1598
1599   if (G_UNLIKELY (final_pipeline != pipeline))
1600     cogl_handle_unref (final_pipeline);
1601
1602   cogl_framebuffer_get_modelview_matrix (framebuffer,
1603                                          &entry->model_view);
1604
1605   _cogl_pipeline_foreach_layer_internal (pipeline,
1606                                          add_framebuffer_deps_cb,
1607                                          framebuffer);
1608
1609   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_BATCHING)))
1610     _cogl_journal_flush (journal);
1611
1612   COGL_TIMER_STOP (_cogl_uprof_context, log_timer);
1613 }
1614
1615 static void
1616 entry_to_screen_polygon (CoglFramebuffer *framebuffer,
1617                          const CoglJournalEntry *entry,
1618                          float *vertices,
1619                          float *poly)
1620 {
1621   size_t array_stride =
1622     GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (entry->n_layers);
1623   CoglMatrixStack *projection_stack;
1624   CoglMatrix projection;
1625   int i;
1626   float viewport[4];
1627
1628   poly[0] = vertices[0];
1629   poly[1] = vertices[1];
1630   poly[2] = 0;
1631   poly[3] = 1;
1632
1633   poly[4] = vertices[0];
1634   poly[5] = vertices[array_stride + 1];
1635   poly[6] = 0;
1636   poly[7] = 1;
1637
1638   poly[8] = vertices[array_stride];
1639   poly[9] = vertices[array_stride + 1];
1640   poly[10] = 0;
1641   poly[11] = 1;
1642
1643   poly[12] = vertices[array_stride];
1644   poly[13] = vertices[1];
1645   poly[14] = 0;
1646   poly[15] = 1;
1647
1648   /* TODO: perhaps split the following out into a more generalized
1649    * _cogl_transform_points utility...
1650    */
1651
1652   cogl_matrix_transform_points (&entry->model_view,
1653                                 2, /* n_components */
1654                                 sizeof (float) * 4, /* stride_in */
1655                                 poly, /* points_in */
1656                                 /* strideout */
1657                                 sizeof (float) * 4,
1658                                 poly, /* points_out */
1659                                 4 /* n_points */);
1660
1661   projection_stack =
1662     _cogl_framebuffer_get_projection_stack (framebuffer);
1663   _cogl_matrix_stack_get (projection_stack, &projection);
1664
1665   cogl_matrix_project_points (&projection,
1666                               3, /* n_components */
1667                               sizeof (float) * 4, /* stride_in */
1668                               poly, /* points_in */
1669                               /* strideout */
1670                               sizeof (float) * 4,
1671                               poly, /* points_out */
1672                               4 /* n_points */);
1673
1674   cogl_framebuffer_get_viewport4fv (framebuffer, viewport);
1675
1676 /* Scale from OpenGL normalized device coordinates (ranging from -1 to 1)
1677  * to Cogl window/framebuffer coordinates (ranging from 0 to buffer-size) with
1678  * (0,0) being top left. */
1679 #define VIEWPORT_TRANSFORM_X(x, vp_origin_x, vp_width) \
1680     (  ( ((x) + 1.0) * ((vp_width) / 2.0) ) + (vp_origin_x)  )
1681 /* Note: for Y we first flip all coordinates around the X axis while in
1682  * normalized device coodinates */
1683 #define VIEWPORT_TRANSFORM_Y(y, vp_origin_y, vp_height) \
1684     (  ( ((-(y)) + 1.0) * ((vp_height) / 2.0) ) + (vp_origin_y)  )
1685
1686   /* Scale from normalized device coordinates (in range [-1,1]) to
1687    * window coordinates ranging [0,window-size] ... */
1688   for (i = 0; i < 4; i++)
1689     {
1690       float w = poly[4 * i + 3];
1691
1692       /* Perform perspective division */
1693       poly[4 * i] /= w;
1694       poly[4 * i + 1] /= w;
1695
1696       /* Apply viewport transform */
1697       poly[4 * i] = VIEWPORT_TRANSFORM_X (poly[4 * i],
1698                                           viewport[0], viewport[2]);
1699       poly[4 * i + 1] = VIEWPORT_TRANSFORM_Y (poly[4 * i + 1],
1700                                               viewport[1], viewport[3]);
1701     }
1702
1703 #undef VIEWPORT_TRANSFORM_X
1704 #undef VIEWPORT_TRANSFORM_Y
1705 }
1706
1707 static gboolean
1708 try_checking_point_hits_entry_after_clipping (CoglFramebuffer *framebuffer,
1709                                               CoglJournalEntry *entry,
1710                                               float *vertices,
1711                                               float x,
1712                                               float y,
1713                                               gboolean *hit)
1714 {
1715   gboolean can_software_clip = TRUE;
1716   gboolean needs_software_clip = FALSE;
1717   CoglClipStack *clip_entry;
1718
1719   *hit = TRUE;
1720
1721   /* Verify that all of the clip stack entries are simple rectangle
1722    * clips */
1723   for (clip_entry = entry->clip_stack;
1724        clip_entry;
1725        clip_entry = clip_entry->parent)
1726     {
1727       if (x < clip_entry->bounds_x0 ||
1728           x >= clip_entry->bounds_x1 ||
1729           y < clip_entry->bounds_y0 ||
1730           y >= clip_entry->bounds_y1)
1731         {
1732           *hit = FALSE;
1733           return TRUE;
1734         }
1735
1736       if (clip_entry->type == COGL_CLIP_STACK_WINDOW_RECT)
1737         {
1738           /* XXX: technically we could still run the software clip in
1739            * this case because for our purposes we know this clip
1740            * can be ignored now, but [can_]sofware_clip_entry() doesn't
1741            * know this and will bail out. */
1742           can_software_clip = FALSE;
1743         }
1744       else if (clip_entry->type == COGL_CLIP_STACK_RECT)
1745         {
1746           CoglClipStackRect *rect_entry = (CoglClipStackRect *)entry;
1747
1748           if (rect_entry->can_be_scissor == FALSE)
1749             needs_software_clip = TRUE;
1750           /* If can_be_scissor is TRUE then we know it's screen
1751            * aligned and the hit test we did above has determined
1752            * that we are inside this clip. */
1753         }
1754       else
1755         return FALSE;
1756     }
1757
1758   if (needs_software_clip)
1759     {
1760       ClipBounds clip_bounds;
1761       float poly[16];
1762
1763       if (!can_software_clip)
1764         return FALSE;
1765
1766       if (!can_software_clip_entry (entry, NULL,
1767                                     entry->clip_stack, &clip_bounds))
1768         return FALSE;
1769
1770       software_clip_entry (entry, vertices, &clip_bounds);
1771       entry_to_screen_polygon (framebuffer, entry, vertices, poly);
1772
1773       *hit = _cogl_util_point_in_screen_poly (x, y, poly, sizeof (float) * 4, 4);
1774       return TRUE;
1775     }
1776
1777   return TRUE;
1778 }
1779
1780 gboolean
1781 _cogl_journal_try_read_pixel (CoglJournal *journal,
1782                               int x,
1783                               int y,
1784                               CoglBitmap *bitmap,
1785                               gboolean *found_intersection)
1786 {
1787   CoglPixelFormat format;
1788   int i;
1789
1790   _COGL_GET_CONTEXT (ctx, FALSE);
1791
1792   /* XXX: this number has been plucked out of thin air, but the idea
1793    * is that if so many pixels are being read from the same un-changed
1794    * journal than we expect that it will be more efficient to fail
1795    * here so we end up flushing and rendering the journal so that
1796    * further reads can directly read from the framebuffer. There will
1797    * be a bit more lag to flush the render but if there are going to
1798    * continue being lots of arbitrary single pixel reads they will end
1799    * up faster in the end. */
1800   if (journal->fast_read_pixel_count > 50)
1801     return FALSE;
1802
1803   format = cogl_bitmap_get_format (bitmap);
1804
1805   if (format != COGL_PIXEL_FORMAT_RGBA_8888_PRE &&
1806       format != COGL_PIXEL_FORMAT_RGBA_8888)
1807     return FALSE;
1808
1809   *found_intersection = FALSE;
1810
1811   /* NB: The most recently added journal entry is the last entry, and
1812    * assuming this is a simple scene only comprised of opaque coloured
1813    * rectangles with no special pipelines involved (e.g. enabling
1814    * depth testing) then we can assume painter's algorithm for the
1815    * entries and so our fast read-pixel just needs to walk backwards
1816    * through the journal entries trying to intersect each entry with
1817    * the given point of interest. */
1818   for (i = journal->entries->len - 1; i >= 0; i--)
1819     {
1820       CoglJournalEntry *entry =
1821         &g_array_index (journal->entries, CoglJournalEntry, i);
1822       guint8 *color = (guint8 *)&g_array_index (journal->vertices, float,
1823                                                 entry->array_offset);
1824       float *vertices = (float *)color + 1;
1825       float poly[16];
1826       CoglFramebuffer *framebuffer = journal->framebuffer;
1827       guint8 *pixel;
1828
1829       entry_to_screen_polygon (framebuffer, entry, vertices, poly);
1830
1831       if (!_cogl_util_point_in_screen_poly (x, y, poly, sizeof (float) * 4, 4))
1832         continue;
1833
1834       if (entry->clip_stack)
1835         {
1836           gboolean hit;
1837
1838           if (!try_checking_point_hits_entry_after_clipping (framebuffer,
1839                                                              entry,
1840                                                              vertices,
1841                                                              x, y, &hit))
1842             return FALSE; /* hit couldn't be determined */
1843
1844           if (!hit)
1845             continue;
1846         }
1847
1848       *found_intersection = TRUE;
1849
1850       /* If we find that the rectangle the point of interest
1851        * intersects has any state more complex than a constant opaque
1852        * color then we bail out. */
1853       if (!_cogl_pipeline_equal (ctx->opaque_color_pipeline, entry->pipeline,
1854                                  (COGL_PIPELINE_STATE_ALL &
1855                                   ~COGL_PIPELINE_STATE_COLOR),
1856                                  COGL_PIPELINE_LAYER_STATE_ALL,
1857                                  0))
1858         return FALSE;
1859
1860
1861       /* we currently only care about cases where the premultiplied or
1862        * unpremultipled colors are equivalent... */
1863       if (color[3] != 0xff)
1864         return FALSE;
1865
1866       pixel = _cogl_bitmap_map (bitmap,
1867                                 COGL_BUFFER_ACCESS_WRITE,
1868                                 COGL_BUFFER_MAP_HINT_DISCARD);
1869       if (pixel == NULL)
1870         return FALSE;
1871
1872       pixel[0] = color[0];
1873       pixel[1] = color[1];
1874       pixel[2] = color[2];
1875       pixel[3] = color[3];
1876
1877       _cogl_bitmap_unmap (bitmap);
1878
1879       goto success;
1880     }
1881
1882 success:
1883   journal->fast_read_pixel_count++;
1884   return TRUE;
1885 }