src/cmslut.c

   1 //---------------------------------------------------------------------------------
   2 //
   3 //  Little Color Management System
   4 //  Copyright (c) 1998-2011 Marti Maria Saguer
   5 //
   6 // Permission is hereby granted, free of charge, to any person obtaining
   7 // a copy of this software and associated documentation files (the "Software"),
   8 // to deal in the Software without restriction, including without limitation
   9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 // and/or sell copies of the Software, and to permit persons to whom the Software
  11 // is furnished to do so, subject to the following conditions:
  12 //
  13 // The above copyright notice and this permission notice shall be included in
  14 // all copies or substantial portions of the Software.
  15 //
  16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23 //
  24 //---------------------------------------------------------------------------------
  25 //
  26
  27 #include "lcms2_internal.h"
  28
  29
  30 // Allocates an empty multi profile element
  31 cmsStage* CMSEXPORT _cmsStageAllocPlaceholder(cmsContext ContextID,
  32                                 cmsStageSignature Type,
  33                                 cmsUInt32Number InputChannels,
  34                                 cmsUInt32Number OutputChannels,
  35                                 _cmsStageEvalFn     EvalPtr,
  36                                 _cmsStageDupElemFn  DupElemPtr,
  37                                 _cmsStageFreeElemFn FreePtr,
  38                                 void*             Data)
  39 {
  40     cmsStage* ph = (cmsStage*) _cmsMallocZero(ContextID, sizeof(cmsStage));
  41
  42     if (ph == NULL) return NULL;
  43
  44
  45     ph ->ContextID = ContextID;
  46
  47     ph ->Type       = Type;
  48     ph ->Implements = Type;   // By default, no clue on what is implementing
  49
  50     ph ->InputChannels  = InputChannels;
  51     ph ->OutputChannels = OutputChannels;
  52     ph ->EvalPtr        = EvalPtr;
  53     ph ->DupElemPtr     = DupElemPtr;
  54     ph ->FreePtr        = FreePtr;
  55     ph ->Data           = Data;
  56
  57     return ph;
  58 }
  59
  60
  61 static
  62 void EvaluateIdentity(const cmsFloat32Number In[],
  63                             cmsFloat32Number Out[],
  64                       const cmsStage *mpe)
  65 {
  66     memmove(Out, In, mpe ->InputChannels * sizeof(cmsFloat32Number));
  67 }
  68
  69
  70 cmsStage* CMSEXPORT cmsStageAllocIdentity(cmsContext ContextID, cmsUInt32Number nChannels)
  71 {
  72     return _cmsStageAllocPlaceholder(ContextID,
  73                                    cmsSigIdentityElemType,
  74                                    nChannels, nChannels,
  75                                    EvaluateIdentity,
  76                                    NULL,
  77                                    NULL,
  78                                    NULL);
  79  }
  80
  81 // Conversion functions. From floating point to 16 bits
  82 static
  83 void FromFloatTo16(const cmsFloat32Number In[], cmsUInt16Number Out[], cmsUInt32Number n)
  84 {
  85     cmsUInt32Number i;
  86
  87     for (i=0; i < n; i++) {
  88         Out[i] = _cmsQuickSaturateWord(In[i] * 65535.0);
  89     }
  90 }
  91
  92 // From 16 bits to floating point
  93 static
  94 void From16ToFloat(const cmsUInt16Number In[], cmsFloat32Number Out[], cmsUInt32Number n)
  95 {
  96     cmsUInt32Number i;
  97
  98     for (i=0; i < n; i++) {
  99         Out[i] = (cmsFloat32Number) In[i] / 65535.0F;
 100     }
 101 }
 102
 103
 104 // This function is quite useful to analyze the structure of a LUT and retrieve the MPE elements
 105 // that conform the LUT. It should be called with the LUT, the number of expected elements and
 106 // then a list of expected types followed with a list of cmsFloat64Number pointers to MPE elements. If
 107 // the function founds a match with current pipeline, it fills the pointers and returns TRUE
 108 // if not, returns FALSE without touching anything. Setting pointers to NULL does bypass
 109 // the storage process.
 110 cmsBool  CMSEXPORT cmsPipelineCheckAndRetreiveStages(const cmsPipeline* Lut, cmsUInt32Number n, ...)
 111 {
 112     va_list args;
 113     cmsUInt32Number i;
 114     cmsStage* mpe;
 115     cmsStageSignature Type;
 116     void** ElemPtr;
 117
 118     // Make sure same number of elements
 119     if (cmsPipelineStageCount(Lut) != n) return FALSE;
 120
 121     va_start(args, n);
 122
 123     // Iterate across asked types
 124     mpe = Lut ->Elements;
 125     for (i=0; i < n; i++) {
 126
 127         // Get asked type
 128         Type  = (cmsStageSignature)va_arg(args, cmsStageSignature);
 129         if (mpe ->Type != Type) {
 130
 131             va_end(args);       // Mismatch. We are done.
 132             return FALSE;
 133         }
 134         mpe = mpe ->Next;
 135     }
 136
 137     // Found a combination, fill pointers if not NULL
 138     mpe = Lut ->Elements;
 139     for (i=0; i < n; i++) {
 140
 141         ElemPtr = va_arg(args, void**);
 142         if (ElemPtr != NULL)
 143             *ElemPtr = mpe;
 144
 145         mpe = mpe ->Next;
 146     }
 147
 148     va_end(args);
 149     return TRUE;
 150 }
 151
 152 // Below there are implementations for several types of elements. Each type may be implemented by a
 153 // evaluation function, a duplication function, a function to free resources and a constructor.
 154
 155 // *************************************************************************************************
 156 // Type cmsSigCurveSetElemType (curves)
 157 // *************************************************************************************************
 158
 159 cmsToneCurve** _cmsStageGetPtrToCurveSet(const cmsStage* mpe)
 160 {
 161     _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
 162
 163     return Data ->TheCurves;
 164 }
 165
 166 static
 167 void EvaluateCurves(const cmsFloat32Number In[],
 168                     cmsFloat32Number Out[],
 169                     const cmsStage *mpe)
 170 {
 171     _cmsStageToneCurvesData* Data;
 172     cmsUInt32Number i;
 173
 174     _cmsAssert(mpe != NULL);
 175
 176     Data = (_cmsStageToneCurvesData*) mpe ->Data;
 177     if (Data == NULL) return;
 178
 179     if (Data ->TheCurves == NULL) return;
 180
 181     for (i=0; i < Data ->nCurves; i++) {
 182         Out[i] = cmsEvalToneCurveFloat(Data ->TheCurves[i], In[i]);
 183     }
 184 }
 185
 186 static
 187 void CurveSetElemTypeFree(cmsStage* mpe)
 188 {
 189     _cmsStageToneCurvesData* Data;
 190     cmsUInt32Number i;
 191
 192     _cmsAssert(mpe != NULL);
 193
 194     Data = (_cmsStageToneCurvesData*) mpe ->Data;
 195     if (Data == NULL) return;
 196
 197     if (Data ->TheCurves != NULL) {
 198         for (i=0; i < Data ->nCurves; i++) {
 199             if (Data ->TheCurves[i] != NULL)
 200                 cmsFreeToneCurve(Data ->TheCurves[i]);
 201         }
 202     }
 203     _cmsFree(mpe ->ContextID, Data ->TheCurves);
 204     _cmsFree(mpe ->ContextID, Data);
 205 }
 206
 207
 208 static
 209 void* CurveSetDup(cmsStage* mpe)
 210 {
 211     _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) mpe ->Data;
 212     _cmsStageToneCurvesData* NewElem;
 213     cmsUInt32Number i;
 214
 215     NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageToneCurvesData));
 216     if (NewElem == NULL) return NULL;
 217
 218     NewElem ->nCurves   = Data ->nCurves;
 219     NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(mpe ->ContextID, NewElem ->nCurves, sizeof(cmsToneCurve*));
 220
 221     if (NewElem ->TheCurves == NULL) goto Error;
 222
 223     for (i=0; i < NewElem ->nCurves; i++) {
 224
 225         // Duplicate each curve. It may fail.
 226         NewElem ->TheCurves[i] = cmsDupToneCurve(Data ->TheCurves[i]);
 227         if (NewElem ->TheCurves[i] == NULL) goto Error;
 228
 229
 230     }
 231     return (void*) NewElem;
 232
 233 Error:
 234
 235     if (NewElem ->TheCurves != NULL) {
 236         for (i=0; i < NewElem ->nCurves; i++) {
 237             if (NewElem ->TheCurves[i])
 238                 cmsFreeToneCurve(Data ->TheCurves[i]);
 239         }
 240     }
 241     _cmsFree(mpe ->ContextID, Data ->TheCurves);
 242     _cmsFree(mpe ->ContextID, NewElem);
 243     return NULL;
 244 }
 245
 246
 247 // Curves == NULL forces identity curves
 248 cmsStage* CMSEXPORT cmsStageAllocToneCurves(cmsContext ContextID, cmsUInt32Number nChannels, cmsToneCurve* const Curves[])
 249 {
 250     cmsUInt32Number i;
 251     _cmsStageToneCurvesData* NewElem;
 252     cmsStage* NewMPE;
 253
 254
 255     NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCurveSetElemType, nChannels, nChannels,
 256                                      EvaluateCurves, CurveSetDup, CurveSetElemTypeFree, NULL );
 257     if (NewMPE == NULL) return NULL;
 258
 259     NewElem = (_cmsStageToneCurvesData*) _cmsMallocZero(ContextID, sizeof(_cmsStageToneCurvesData));
 260     if (NewElem == NULL) {
 261         cmsStageFree(NewMPE);
 262         return NULL;
 263     }
 264
 265     NewMPE ->Data  = (void*) NewElem;
 266
 267     NewElem ->nCurves   = nChannels;
 268     NewElem ->TheCurves = (cmsToneCurve**) _cmsCalloc(ContextID, nChannels, sizeof(cmsToneCurve*));
 269     if (NewElem ->TheCurves == NULL) {
 270         cmsStageFree(NewMPE);
 271         return NULL;
 272     }
 273
 274     for (i=0; i < nChannels; i++) {
 275
 276         if (Curves == NULL) {
 277             NewElem ->TheCurves[i] = cmsBuildGamma(ContextID, 1.0);
 278         }
 279         else {
 280             NewElem ->TheCurves[i] = cmsDupToneCurve(Curves[i]);
 281         }
 282
 283         if (NewElem ->TheCurves[i] == NULL) {
 284             cmsStageFree(NewMPE);
 285             return NULL;
 286         }
 287     }
 288
 289    return NewMPE;
 290 }
 291
 292
 293 // Create a bunch of identity curves
 294 cmsStage* _cmsStageAllocIdentityCurves(cmsContext ContextID, int nChannels)
 295 {
 296     cmsStage* mpe = cmsStageAllocToneCurves(ContextID, nChannels, NULL);
 297
 298     if (mpe == NULL) return NULL;
 299     mpe ->Implements = cmsSigIdentityElemType;
 300     return mpe;
 301 }
 302
 303
 304 // *************************************************************************************************
 305 // Type cmsSigMatrixElemType (Matrices)
 306 // *************************************************************************************************
 307
 308
 309 // Special care should be taken here because precision loss. A temporary cmsFloat64Number buffer is being used
 310 static
 311 void EvaluateMatrix(const cmsFloat32Number In[],
 312                     cmsFloat32Number Out[],
 313                     const cmsStage *mpe)
 314 {
 315     cmsUInt32Number i, j;
 316     _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
 317     cmsFloat64Number Tmp;
 318
 319     // Input is already in 0..1.0 notation
 320     for (i=0; i < mpe ->OutputChannels; i++) {
 321
 322         Tmp = 0;
 323         for (j=0; j < mpe->InputChannels; j++) {
 324             Tmp += In[j] * Data->Double[i*mpe->InputChannels + j];
 325         }
 326
 327         if (Data ->Offset != NULL)
 328             Tmp += Data->Offset[i];
 329
 330         Out[i] = (cmsFloat32Number) Tmp;
 331     }
 332
 333
 334     // Output in 0..1.0 domain
 335 }
 336
 337
 338 // Duplicate a yet-existing matrix element
 339 static
 340 void* MatrixElemDup(cmsStage* mpe)
 341 {
 342     _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
 343     _cmsStageMatrixData* NewElem;
 344     cmsUInt32Number sz;
 345
 346     NewElem = (_cmsStageMatrixData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageMatrixData));
 347     if (NewElem == NULL) return NULL;
 348
 349     sz = mpe ->InputChannels * mpe ->OutputChannels;
 350
 351     NewElem ->Double = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID, Data ->Double, sz * sizeof(cmsFloat64Number)) ;
 352
 353     if (Data ->Offset)
 354         NewElem ->Offset = (cmsFloat64Number*) _cmsDupMem(mpe ->ContextID,
 355                                                 Data ->Offset, mpe -> OutputChannels * sizeof(cmsFloat64Number)) ;
 356
 357     return (void*) NewElem;
 358 }
 359
 360
 361 static
 362 void MatrixElemTypeFree(cmsStage* mpe)
 363 {
 364     _cmsStageMatrixData* Data = (_cmsStageMatrixData*) mpe ->Data;
 365     if (Data ->Double)
 366         _cmsFree(mpe ->ContextID, Data ->Double);
 367
 368     if (Data ->Offset)
 369         _cmsFree(mpe ->ContextID, Data ->Offset);
 370
 371     _cmsFree(mpe ->ContextID, mpe ->Data);
 372 }
 373
 374
 375
 376 cmsStage*  CMSEXPORT cmsStageAllocMatrix(cmsContext ContextID, cmsUInt32Number Rows, cmsUInt32Number Cols,
 377                                      const cmsFloat64Number* Matrix, const cmsFloat64Number* Offset)
 378 {
 379     cmsUInt32Number i, n;
 380     _cmsStageMatrixData* NewElem;
 381     cmsStage* NewMPE;
 382
 383     n = Rows * Cols;
 384
 385     // Check for overflow
 386     if (n == 0) return NULL;
 387     if (n >= UINT_MAX / Cols) return NULL;
 388     if (n >= UINT_MAX / Rows) return NULL;
 389     if (n < Rows || n < Cols) return NULL;
 390
 391     NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigMatrixElemType, Cols, Rows,
 392                                      EvaluateMatrix, MatrixElemDup, MatrixElemTypeFree, NULL );
 393     if (NewMPE == NULL) return NULL;
 394
 395
 396     NewElem = (_cmsStageMatrixData*) _cmsMallocZero(ContextID, sizeof(_cmsStageMatrixData));
 397     if (NewElem == NULL) return NULL;
 398
 399
 400     NewElem ->Double = (cmsFloat64Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat64Number));
 401
 402     if (NewElem->Double == NULL) {
 403         MatrixElemTypeFree(NewMPE);
 404         return NULL;
 405     }
 406
 407     for (i=0; i < n; i++) {
 408         NewElem ->Double[i] = Matrix[i];
 409     }
 410
 411
 412     if (Offset != NULL) {
 413
 414         NewElem ->Offset = (cmsFloat64Number*) _cmsCalloc(ContextID, Cols, sizeof(cmsFloat64Number));
 415         if (NewElem->Offset == NULL) {
 416            MatrixElemTypeFree(NewMPE);
 417            return NULL;
 418         }
 419
 420         for (i=0; i < Cols; i++) {
 421                 NewElem ->Offset[i] = Offset[i];
 422         }
 423
 424     }
 425
 426     NewMPE ->Data  = (void*) NewElem;
 427     return NewMPE;
 428 }
 429
 430
 431 // *************************************************************************************************
 432 // Type cmsSigCLutElemType
 433 // *************************************************************************************************
 434
 435
 436 // Evaluate in true floating point
 437 static
 438 void EvaluateCLUTfloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
 439 {
 440     _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
 441
 442     Data -> Params ->Interpolation.LerpFloat(In, Out, Data->Params);
 443 }
 444
 445
 446 // Convert to 16 bits, evaluate, and back to floating point
 447 static
 448 void EvaluateCLUTfloatIn16(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
 449 {
 450     _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
 451     cmsUInt16Number In16[MAX_STAGE_CHANNELS], Out16[MAX_STAGE_CHANNELS];
 452
 453     _cmsAssert(mpe ->InputChannels  <= MAX_STAGE_CHANNELS);
 454     _cmsAssert(mpe ->OutputChannels <= MAX_STAGE_CHANNELS);
 455
 456     FromFloatTo16(In, In16, mpe ->InputChannels);
 457     Data -> Params ->Interpolation.Lerp16(In16, Out16, Data->Params);
 458     From16ToFloat(Out16, Out,  mpe ->OutputChannels);
 459 }
 460
 461
 462 // Given an hypercube of b dimensions, with Dims[] number of nodes by dimension, calculate the total amount of nodes
 463 static
 464 cmsUInt32Number CubeSize(const cmsUInt32Number Dims[], cmsUInt32Number b)
 465 {
 466     cmsUInt32Number rv, dim;
 467
 468     _cmsAssert(Dims != NULL);
 469
 470     for (rv = 1; b > 0; b--) {
 471
 472         dim = Dims[b-1];
 473         if (dim == 0) return 0;  // Error
 474
 475         rv *= dim;
 476
 477         // Check for overflow
 478         if (rv > UINT_MAX / dim) return 0;
 479     }
 480
 481     return rv;
 482 }
 483
 484 static
 485 void* CLUTElemDup(cmsStage* mpe)
 486 {
 487     _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
 488     _cmsStageCLutData* NewElem;
 489
 490
 491     NewElem = (_cmsStageCLutData*) _cmsMallocZero(mpe ->ContextID, sizeof(_cmsStageCLutData));
 492     if (NewElem == NULL) return NULL;
 493
 494     NewElem ->nEntries       = Data ->nEntries;
 495     NewElem ->HasFloatValues = Data ->HasFloatValues;
 496
 497     if (Data ->Tab.T) {
 498
 499         if (Data ->HasFloatValues)
 500             NewElem ->Tab.TFloat = (cmsFloat32Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.TFloat, Data ->nEntries * sizeof (cmsFloat32Number));
 501         else
 502             NewElem ->Tab.T = (cmsUInt16Number*) _cmsDupMem(mpe ->ContextID, Data ->Tab.T, Data ->nEntries * sizeof (cmsUInt16Number));
 503     }
 504
 505     NewElem ->Params   = _cmsComputeInterpParamsEx(mpe ->ContextID,
 506                                                    Data ->Params ->nSamples,
 507                                                    Data ->Params ->nInputs,
 508                                                    Data ->Params ->nOutputs,
 509                                                    NewElem ->Tab.T,
 510                                                    Data ->Params ->dwFlags);
 511
 512     return (void*) NewElem;
 513 }
 514
 515
 516 static
 517 void CLutElemTypeFree(cmsStage* mpe)
 518 {
 519
 520     _cmsStageCLutData* Data = (_cmsStageCLutData*) mpe ->Data;
 521
 522     // Already empty
 523     if (Data == NULL) return;
 524
 525     // This works for both types
 526     if (Data -> Tab.T)
 527         _cmsFree(mpe ->ContextID, Data -> Tab.T);
 528
 529     _cmsFreeInterpParams(Data ->Params);
 530     _cmsFree(mpe ->ContextID, mpe ->Data);
 531 }
 532
 533
 534 // Allocates a 16-bit multidimensional CLUT. This is evaluated at 16-bit precision. Table may have different
 535 // granularity on each dimension.
 536 cmsStage* CMSEXPORT cmsStageAllocCLut16bitGranular(cmsContext ContextID,
 537                                          const cmsUInt32Number clutPoints[],
 538                                          cmsUInt32Number inputChan,
 539                                          cmsUInt32Number outputChan,
 540                                          const cmsUInt16Number* Table)
 541 {
 542     cmsUInt32Number i, n;
 543     _cmsStageCLutData* NewElem;
 544     cmsStage* NewMPE;
 545
 546     NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
 547                                      EvaluateCLUTfloatIn16, CLUTElemDup, CLutElemTypeFree, NULL );
 548
 549     if (NewMPE == NULL) return NULL;
 550
 551     NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
 552     if (NewElem == NULL) {
 553         cmsStageFree(NewMPE);
 554         return NULL;
 555     }
 556
 557     NewMPE ->Data  = (void*) NewElem;
 558
 559     NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
 560     NewElem -> HasFloatValues = FALSE;
 561
 562     if (n == 0) {
 563         cmsStageFree(NewMPE);
 564         return NULL;
 565     }
 566
 567
 568     NewElem ->Tab.T  = (cmsUInt16Number*) _cmsCalloc(ContextID, n, sizeof(cmsUInt16Number));
 569     if (NewElem ->Tab.T == NULL) {
 570         cmsStageFree(NewMPE);
 571         return NULL;
 572     }
 573
 574     if (Table != NULL) {
 575         for (i=0; i < n; i++) {
 576             NewElem ->Tab.T[i] = Table[i];
 577         }
 578     }
 579
 580     NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints, inputChan, outputChan, NewElem ->Tab.T, CMS_LERP_FLAGS_16BITS);
 581     if (NewElem ->Params == NULL) {
 582         cmsStageFree(NewMPE);
 583         return NULL;
 584     }
 585
 586     return NewMPE;
 587 }
 588
 589 cmsStage* CMSEXPORT cmsStageAllocCLut16bit(cmsContext ContextID,
 590                                     cmsUInt32Number nGridPoints,
 591                                     cmsUInt32Number inputChan,
 592                                     cmsUInt32Number outputChan,
 593                                     const cmsUInt16Number* Table)
 594 {
 595     cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
 596     int i;
 597
 598    // Our resulting LUT would be same gridpoints on all dimensions
 599     for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
 600         Dimensions[i] = nGridPoints;
 601
 602
 603     return cmsStageAllocCLut16bitGranular(ContextID, Dimensions, inputChan, outputChan, Table);
 604 }
 605
 606
 607 cmsStage* CMSEXPORT cmsStageAllocCLutFloat(cmsContext ContextID,
 608                                        cmsUInt32Number nGridPoints,
 609                                        cmsUInt32Number inputChan,
 610                                        cmsUInt32Number outputChan,
 611                                        const cmsFloat32Number* Table)
 612 {
 613    cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
 614    int i;
 615
 616     // Our resulting LUT would be same gridpoints on all dimensions
 617     for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
 618         Dimensions[i] = nGridPoints;
 619
 620     return cmsStageAllocCLutFloatGranular(ContextID, Dimensions, inputChan, outputChan, Table);
 621 }
 622
 623
 624
 625 cmsStage* CMSEXPORT cmsStageAllocCLutFloatGranular(cmsContext ContextID, const cmsUInt32Number clutPoints[], cmsUInt32Number inputChan, cmsUInt32Number outputChan, const cmsFloat32Number* Table)
 626 {
 627     cmsUInt32Number i, n;
 628     _cmsStageCLutData* NewElem;
 629     cmsStage* NewMPE;
 630
 631     _cmsAssert(clutPoints != NULL);
 632
 633     NewMPE = _cmsStageAllocPlaceholder(ContextID, cmsSigCLutElemType, inputChan, outputChan,
 634                                              EvaluateCLUTfloat, CLUTElemDup, CLutElemTypeFree, NULL);
 635     if (NewMPE == NULL) return NULL;
 636
 637
 638     NewElem = (_cmsStageCLutData*) _cmsMallocZero(ContextID, sizeof(_cmsStageCLutData));
 639     if (NewElem == NULL) {
 640         cmsStageFree(NewMPE);
 641         return NULL;
 642     }
 643
 644     NewMPE ->Data  = (void*) NewElem;
 645
 646     // There is a potential integer overflow on conputing n and nEntries.
 647     NewElem -> nEntries = n = outputChan * CubeSize(clutPoints, inputChan);
 648     NewElem -> HasFloatValues = TRUE;
 649
 650     if (n == 0) {
 651         cmsStageFree(NewMPE);
 652         return NULL;
 653     }
 654
 655     NewElem ->Tab.TFloat  = (cmsFloat32Number*) _cmsCalloc(ContextID, n, sizeof(cmsFloat32Number));
 656     if (NewElem ->Tab.TFloat == NULL) {
 657         cmsStageFree(NewMPE);
 658         return NULL;
 659     }
 660
 661     if (Table != NULL) {
 662         for (i=0; i < n; i++) {
 663             NewElem ->Tab.TFloat[i] = Table[i];
 664         }
 665     }
 666
 667
 668
 669     NewElem ->Params = _cmsComputeInterpParamsEx(ContextID, clutPoints,  inputChan, outputChan, NewElem ->Tab.TFloat, CMS_LERP_FLAGS_FLOAT);
 670     if (NewElem ->Params == NULL) {
 671         cmsStageFree(NewMPE);
 672         return NULL;
 673     }
 674
 675
 676
 677     return NewMPE;
 678 }
 679
 680
 681 static
 682 int IdentitySampler(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void * Cargo)
 683 {
 684     int nChan = *(int*) Cargo;
 685     int i;
 686
 687     for (i=0; i < nChan; i++)
 688         Out[i] = In[i];
 689
 690     return 1;
 691 }
 692
 693 // Creates an MPE that just copies input to output
 694 cmsStage* _cmsStageAllocIdentityCLut(cmsContext ContextID, int nChan)
 695 {
 696     cmsUInt32Number Dimensions[MAX_INPUT_DIMENSIONS];
 697     cmsStage* mpe ;
 698     int i;
 699
 700     for (i=0; i < MAX_INPUT_DIMENSIONS; i++)
 701         Dimensions[i] = 2;
 702
 703     mpe = cmsStageAllocCLut16bitGranular(ContextID, Dimensions, nChan, nChan, NULL);
 704     if (mpe == NULL) return NULL;
 705
 706     if (!cmsStageSampleCLut16bit(mpe, IdentitySampler, &nChan, 0)) {
 707         cmsStageFree(mpe);
 708         return NULL;
 709     }
 710
 711     mpe ->Implements = cmsSigIdentityElemType;
 712     return mpe;
 713 }
 714
 715
 716
 717 // Quantize a value 0 <= i < MaxSamples to 0..0xffff
 718 cmsUInt16Number _cmsQuantizeVal(cmsFloat64Number i, int MaxSamples)
 719 {
 720     cmsFloat64Number x;
 721
 722     x = ((cmsFloat64Number) i * 65535.) / (cmsFloat64Number) (MaxSamples - 1);
 723     return _cmsQuickSaturateWord(x);
 724 }
 725
 726
 727 // This routine does a sweep on whole input space, and calls its callback
 728 // function on knots. returns TRUE if all ok, FALSE otherwise.
 729 cmsBool CMSEXPORT cmsStageSampleCLut16bit(cmsStage* mpe, cmsSAMPLER16 Sampler, void * Cargo, cmsUInt32Number dwFlags)
 730 {
 731     int i, t, nTotalPoints, index, rest;
 732     int nInputs, nOutputs;
 733     cmsUInt32Number* nSamples;
 734     cmsUInt16Number In[cmsMAXCHANNELS], Out[MAX_STAGE_CHANNELS];
 735     _cmsStageCLutData* clut;
 736
 737     if (mpe == NULL) return FALSE;
 738
 739     clut = (_cmsStageCLutData*) mpe->Data;
 740
 741     if (clut == NULL) return FALSE;
 742
 743     nSamples = clut->Params ->nSamples;
 744     nInputs  = clut->Params ->nInputs;
 745     nOutputs = clut->Params ->nOutputs;
 746
 747     if (nInputs >= cmsMAXCHANNELS) return FALSE;
 748     if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
 749
 750     nTotalPoints = CubeSize(nSamples, nInputs);
 751     if (nTotalPoints == 0) return FALSE;
 752
 753     index = 0;
 754     for (i = 0; i < nTotalPoints; i++) {
 755
 756         rest = i;
 757         for (t = nInputs-1; t >=0; --t) {
 758
 759             cmsUInt32Number  Colorant = rest % nSamples[t];
 760
 761             rest /= nSamples[t];
 762
 763             In[t] = _cmsQuantizeVal(Colorant, nSamples[t]);
 764         }
 765
 766         if (clut ->Tab.T != NULL) {
 767             for (t=0; t < nOutputs; t++)
 768                 Out[t] = clut->Tab.T[index + t];
 769         }
 770
 771         if (!Sampler(In, Out, Cargo))
 772             return FALSE;
 773
 774         if (!(dwFlags & SAMPLER_INSPECT)) {
 775
 776             if (clut ->Tab.T != NULL) {
 777                 for (t=0; t < nOutputs; t++)
 778                     clut->Tab.T[index + t] = Out[t];
 779             }
 780         }
 781
 782         index += nOutputs;
 783     }
 784
 785     return TRUE;
 786 }
 787
 788 // Same as anterior, but for floting point
 789 cmsBool CMSEXPORT cmsStageSampleCLutFloat(cmsStage* mpe, cmsSAMPLERFLOAT Sampler, void * Cargo, cmsUInt32Number dwFlags)
 790 {
 791     int i, t, nTotalPoints, index, rest;
 792     int nInputs, nOutputs;
 793     cmsUInt32Number* nSamples;
 794     cmsFloat32Number In[cmsMAXCHANNELS], Out[MAX_STAGE_CHANNELS];
 795     _cmsStageCLutData* clut = (_cmsStageCLutData*) mpe->Data;
 796
 797     nSamples = clut->Params ->nSamples;
 798     nInputs  = clut->Params ->nInputs;
 799     nOutputs = clut->Params ->nOutputs;
 800
 801     if (nInputs >= cmsMAXCHANNELS) return FALSE;
 802     if (nOutputs >= MAX_STAGE_CHANNELS) return FALSE;
 803
 804     nTotalPoints = CubeSize(nSamples, nInputs);
 805     if (nTotalPoints == 0) return FALSE;
 806
 807     index = 0;
 808     for (i = 0; i < nTotalPoints; i++) {
 809
 810         rest = i;
 811         for (t = nInputs-1; t >=0; --t) {
 812
 813             cmsUInt32Number  Colorant = rest % nSamples[t];
 814
 815             rest /= nSamples[t];
 816
 817             In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, nSamples[t]) / 65535.0);
 818         }
 819
 820         if (clut ->Tab.TFloat != NULL) {
 821             for (t=0; t < nOutputs; t++)
 822                 Out[t] = clut->Tab.TFloat[index + t];
 823         }
 824
 825         if (!Sampler(In, Out, Cargo))
 826             return FALSE;
 827
 828         if (!(dwFlags & SAMPLER_INSPECT)) {
 829
 830             if (clut ->Tab.TFloat != NULL) {
 831                 for (t=0; t < nOutputs; t++)
 832                     clut->Tab.TFloat[index + t] = Out[t];
 833             }
 834         }
 835
 836         index += nOutputs;
 837     }
 838
 839     return TRUE;
 840 }
 841
 842
 843
 844 // This routine does a sweep on whole input space, and calls its callback
 845 // function on knots. returns TRUE if all ok, FALSE otherwise.
 846 cmsBool CMSEXPORT cmsSliceSpace16(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
 847                                          cmsSAMPLER16 Sampler, void * Cargo)
 848 {
 849     int i, t, nTotalPoints, rest;
 850     cmsUInt16Number In[cmsMAXCHANNELS];
 851
 852     if (nInputs >= cmsMAXCHANNELS) return FALSE;
 853
 854     nTotalPoints = CubeSize(clutPoints, nInputs);
 855     if (nTotalPoints == 0) return FALSE;
 856
 857     for (i = 0; i < nTotalPoints; i++) {
 858
 859         rest = i;
 860         for (t = nInputs-1; t >=0; --t) {
 861
 862             cmsUInt32Number  Colorant = rest % clutPoints[t];
 863
 864             rest /= clutPoints[t];
 865             In[t] = _cmsQuantizeVal(Colorant, clutPoints[t]);
 866
 867         }
 868
 869         if (!Sampler(In, NULL, Cargo))
 870             return FALSE;
 871     }
 872
 873     return TRUE;
 874 }
 875
 876 cmsInt32Number CMSEXPORT cmsSliceSpaceFloat(cmsUInt32Number nInputs, const cmsUInt32Number clutPoints[],
 877                                             cmsSAMPLERFLOAT Sampler, void * Cargo)
 878 {
 879     int i, t, nTotalPoints, rest;
 880     cmsFloat32Number In[cmsMAXCHANNELS];
 881
 882     if (nInputs >= cmsMAXCHANNELS) return FALSE;
 883
 884     nTotalPoints = CubeSize(clutPoints, nInputs);
 885     if (nTotalPoints == 0) return FALSE;
 886
 887     for (i = 0; i < nTotalPoints; i++) {
 888
 889         rest = i;
 890         for (t = nInputs-1; t >=0; --t) {
 891
 892             cmsUInt32Number  Colorant = rest % clutPoints[t];
 893
 894             rest /= clutPoints[t];
 895             In[t] =  (cmsFloat32Number) (_cmsQuantizeVal(Colorant, clutPoints[t]) / 65535.0);
 896
 897         }
 898
 899         if (!Sampler(In, NULL, Cargo))
 900             return FALSE;
 901     }
 902
 903     return TRUE;
 904 }
 905
 906 // ********************************************************************************
 907 // Type cmsSigLab2XYZElemType
 908 // ********************************************************************************
 909
 910
 911 static
 912 void EvaluateLab2XYZ(const cmsFloat32Number In[],
 913                      cmsFloat32Number Out[],
 914                      const cmsStage *mpe)
 915 {
 916     cmsCIELab Lab;
 917     cmsCIEXYZ XYZ;
 918     const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
 919
 920     // V4 rules
 921     Lab.L = In[0] * 100.0;
 922     Lab.a = In[1] * 255.0 - 128.0;
 923     Lab.b = In[2] * 255.0 - 128.0;
 924
 925     cmsLab2XYZ(NULL, &XYZ, &Lab);
 926
 927     // From XYZ, range 0..19997 to 0..1.0, note that 1.99997 comes from 0xffff
 928     // encoded as 1.15 fixed point, so 1 + (32767.0 / 32768.0)
 929
 930     Out[0] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.X / XYZadj);
 931     Out[1] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Y / XYZadj);
 932     Out[2] = (cmsFloat32Number) ((cmsFloat64Number) XYZ.Z / XYZadj);
 933     return;
 934
 935     cmsUNUSED_PARAMETER(mpe);
 936 }
 937
 938
 939 // No dup or free routines needed, as the structure has no pointers in it.
 940 cmsStage* _cmsStageAllocLab2XYZ(cmsContext ContextID)
 941 {
 942     return _cmsStageAllocPlaceholder(ContextID, cmsSigLab2XYZElemType, 3, 3, EvaluateLab2XYZ, NULL, NULL, NULL);
 943 }
 944
 945 // ********************************************************************************
 946
 947 // v2 L=100 is supposed to be placed on 0xFF00. There is no reasonable
 948 // number of gridpoints that would make exact match. However, a prelinearization
 949 // of 258 entries, would map 0xFF00 exactly on entry 257, and this is good to avoid scum dot.
 950 // Almost all what we need but unfortunately, the rest of entries should be scaled by
 951 // (255*257/256) and this is not exact.
 952
 953 cmsStage* _cmsStageAllocLabV2ToV4curves(cmsContext ContextID)
 954 {
 955     cmsStage* mpe;
 956     cmsToneCurve* LabTable[3];
 957     int i, j;
 958
 959     LabTable[0] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
 960     LabTable[1] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
 961     LabTable[2] = cmsBuildTabulatedToneCurve16(ContextID, 258, NULL);
 962
 963     for (j=0; j < 3; j++) {
 964
 965         if (LabTable[j] == NULL) {
 966             cmsFreeToneCurveTriple(LabTable);
 967             return NULL;
 968         }
 969
 970         // We need to map * (0xffff / 0xff00), thats same as (257 / 256)
 971         // So we can use 258-entry tables to do the trick (i / 257) * (255 * 257) * (257 / 256);
 972         for (i=0; i < 257; i++)  {
 973
 974             LabTable[j]->Table16[i] = (cmsUInt16Number) ((i * 0xffff + 0x80) >> 8);
 975         }
 976
 977         LabTable[j] ->Table16[257] = 0xffff;
 978     }
 979
 980     mpe = cmsStageAllocToneCurves(ContextID, 3, LabTable);
 981     cmsFreeToneCurveTriple(LabTable);
 982
 983     mpe ->Implements = cmsSigLabV2toV4;
 984     return mpe;
 985 }
 986
 987 // ********************************************************************************
 988
 989 // Matrix-based conversion, which is more accurate, but slower and cannot properly be saved in devicelink profiles
 990 cmsStage* _cmsStageAllocLabV2ToV4(cmsContext ContextID)
 991 {
 992     static const cmsFloat64Number V2ToV4[] = { 65535.0/65280.0, 0, 0,
 993                                      0, 65535.0/65280.0, 0,
 994                                      0, 0, 65535.0/65280.0
 995                                      };
 996
 997     cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V2ToV4, NULL);
 998
 999     if (mpe == NULL) return mpe;
1000     mpe ->Implements = cmsSigLabV2toV4;
1001     return mpe;
1002 }
1003
1004
1005 // Reverse direction
1006 cmsStage* _cmsStageAllocLabV4ToV2(cmsContext ContextID)
1007 {
1008     static const cmsFloat64Number V4ToV2[] = { 65280.0/65535.0, 0, 0,
1009                                      0, 65280.0/65535.0, 0,
1010                                      0, 0, 65280.0/65535.0
1011                                      };
1012
1013      cmsStage *mpe = cmsStageAllocMatrix(ContextID, 3, 3, V4ToV2, NULL);
1014
1015     if (mpe == NULL) return mpe;
1016     mpe ->Implements = cmsSigLabV4toV2;
1017     return mpe;
1018 }
1019
1020
1021 // To Lab to float. Note that the MPE gives numbers in normal Lab range
1022 // and we need 0..1.0 range for the formatters
1023 // L* : 0...100 => 0...1.0  (L* / 100)
1024 // ab* : -128..+127 to 0..1  ((ab* + 128) / 255)
1025
1026 cmsStage* _cmsStageNormalizeFromLabFloat(cmsContext ContextID)
1027 {
1028     static const cmsFloat64Number a1[] = {
1029         1.0/100.0, 0, 0,
1030         0, 1.0/255.0, 0,
1031         0, 0, 1.0/255.0
1032     };
1033
1034     static const cmsFloat64Number o1[] = {
1035         0,
1036         128.0/255.0,
1037         128.0/255.0
1038     };
1039
1040     return cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
1041 }
1042
1043 cmsStage* _cmsStageNormalizeFromXyzFloat(cmsContext ContextID)
1044 {
1045     static const cmsFloat64Number a1[] = {
1046         1.0/100.0, 0, 0,
1047         0, 1.0/100.0, 0,
1048         0, 0, 1.0/100.0
1049     };
1050
1051
1052     return cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
1053 }
1054
1055 cmsStage* _cmsStageNormalizeToLabFloat(cmsContext ContextID)
1056 {
1057     static const cmsFloat64Number a1[] = {
1058         100.0, 0, 0,
1059         0, 255.0, 0,
1060         0, 0, 255.0
1061     };
1062
1063     static const cmsFloat64Number o1[] = {
1064         0,
1065         -128.0,
1066         -128.0
1067     };
1068
1069     return cmsStageAllocMatrix(ContextID, 3, 3, a1, o1);
1070 }
1071
1072 cmsStage* _cmsStageNormalizeToXyzFloat(cmsContext ContextID)
1073 {
1074     static const cmsFloat64Number a1[] = {
1075         100.0, 0, 0,
1076         0, 100.0, 0,
1077         0, 0, 100.0
1078     };
1079
1080     return cmsStageAllocMatrix(ContextID, 3, 3, a1, NULL);
1081 }
1082
1083
1084
1085 // ********************************************************************************
1086 // Type cmsSigXYZ2LabElemType
1087 // ********************************************************************************
1088
1089 static
1090 void EvaluateXYZ2Lab(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsStage *mpe)
1091 {
1092     cmsCIELab Lab;
1093     cmsCIEXYZ XYZ;
1094     const cmsFloat64Number XYZadj = MAX_ENCODEABLE_XYZ;
1095
1096     // From 0..1.0 to XYZ
1097
1098     XYZ.X = In[0] * XYZadj;
1099     XYZ.Y = In[1] * XYZadj;
1100     XYZ.Z = In[2] * XYZadj;
1101
1102     cmsXYZ2Lab(NULL, &Lab, &XYZ);
1103
1104     // From V4 Lab to 0..1.0
1105
1106     Out[0] = (cmsFloat32Number) (Lab.L / 100.0);
1107     Out[1] = (cmsFloat32Number) ((Lab.a + 128.0) / 255.0);
1108     Out[2] = (cmsFloat32Number) ((Lab.b + 128.0) / 255.0);
1109     return;
1110
1111     cmsUNUSED_PARAMETER(mpe);
1112 }
1113
1114 cmsStage* _cmsStageAllocXYZ2Lab(cmsContext ContextID)
1115 {
1116     return _cmsStageAllocPlaceholder(ContextID, cmsSigXYZ2LabElemType, 3, 3, EvaluateXYZ2Lab, NULL, NULL, NULL);
1117
1118 }
1119
1120 // ********************************************************************************
1121
1122 // For v4, S-Shaped curves are placed in a/b axis to increase resolution near gray
1123
1124 cmsStage* _cmsStageAllocLabPrelin(cmsContext ContextID)
1125 {
1126     cmsToneCurve* LabTable[3];
1127     cmsFloat64Number Params[1] =  {2.4} ;
1128
1129     LabTable[0] = cmsBuildGamma(ContextID, 1.0);
1130     LabTable[1] = cmsBuildParametricToneCurve(ContextID, 108, Params);
1131     LabTable[2] = cmsBuildParametricToneCurve(ContextID, 108, Params);
1132
1133     return cmsStageAllocToneCurves(ContextID, 3, LabTable);
1134 }
1135
1136
1137 // Free a single MPE
1138 void CMSEXPORT cmsStageFree(cmsStage* mpe)
1139 {
1140     if (mpe ->FreePtr)
1141         mpe ->FreePtr(mpe);
1142
1143     _cmsFree(mpe ->ContextID, mpe);
1144 }
1145
1146
1147 cmsUInt32Number  CMSEXPORT cmsStageInputChannels(const cmsStage* mpe)
1148 {
1149     return mpe ->InputChannels;
1150 }
1151
1152 cmsUInt32Number  CMSEXPORT cmsStageOutputChannels(const cmsStage* mpe)
1153 {
1154     return mpe ->OutputChannels;
1155 }
1156
1157 cmsStageSignature CMSEXPORT cmsStageType(const cmsStage* mpe)
1158 {
1159     return mpe -> Type;
1160 }
1161
1162 void* CMSEXPORT cmsStageData(const cmsStage* mpe)
1163 {
1164     return mpe -> Data;
1165 }
1166
1167 cmsStage*  CMSEXPORT cmsStageNext(const cmsStage* mpe)
1168 {
1169     return mpe -> Next;
1170 }
1171
1172
1173 // Duplicates an MPE
1174 cmsStage* CMSEXPORT cmsStageDup(cmsStage* mpe)
1175 {
1176     cmsStage* NewMPE;
1177
1178     if (mpe == NULL) return NULL;
1179     NewMPE = _cmsStageAllocPlaceholder(mpe ->ContextID,
1180                                      mpe ->Type,
1181                                      mpe ->InputChannels,
1182                                      mpe ->OutputChannels,
1183                                      mpe ->EvalPtr,
1184                                      mpe ->DupElemPtr,
1185                                      mpe ->FreePtr,
1186                                      NULL);
1187     if (NewMPE == NULL) return NULL;
1188
1189     NewMPE ->Implements     = mpe ->Implements;
1190
1191     if (mpe ->DupElemPtr)
1192         NewMPE ->Data       = mpe ->DupElemPtr(mpe);
1193     else
1194         NewMPE ->Data       = NULL;
1195
1196     return NewMPE;
1197 }
1198
1199
1200 // ***********************************************************************************************************
1201
1202 // This function sets up the channel count
1203
1204 static
1205 void BlessLUT(cmsPipeline* lut)
1206 {
1207     // We can set the input/ouput channels only if we have elements.
1208     if (lut ->Elements != NULL) {
1209
1210         cmsStage *First, *Last;
1211
1212         First  = cmsPipelineGetPtrToFirstStage(lut);
1213         Last   = cmsPipelineGetPtrToLastStage(lut);
1214
1215         if (First != NULL)lut ->InputChannels = First ->InputChannels;
1216         if (Last != NULL) lut ->OutputChannels = Last ->OutputChannels;
1217     }
1218 }
1219
1220
1221 // Default to evaluate the LUT on 16 bit-basis. Precision is retained.
1222 static
1223 void _LUTeval16(register const cmsUInt16Number In[], register cmsUInt16Number Out[],  register const void* D)
1224 {
1225     cmsPipeline* lut = (cmsPipeline*) D;
1226     cmsStage *mpe;
1227     cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
1228     int Phase = 0, NextPhase;
1229
1230     From16ToFloat(In, &Storage[Phase][0], lut ->InputChannels);
1231
1232     for (mpe = lut ->Elements;
1233          mpe != NULL;
1234          mpe = mpe ->Next) {
1235
1236              NextPhase = Phase ^ 1;
1237              mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
1238              Phase = NextPhase;
1239     }
1240
1241
1242     FromFloatTo16(&Storage[Phase][0], Out, lut ->OutputChannels);
1243 }
1244
1245
1246
1247 // Does evaluate the LUT on cmsFloat32Number-basis.
1248 static
1249 void _LUTevalFloat(register const cmsFloat32Number In[], register cmsFloat32Number Out[], const void* D)
1250 {
1251     cmsPipeline* lut = (cmsPipeline*) D;
1252     cmsStage *mpe;
1253     cmsFloat32Number Storage[2][MAX_STAGE_CHANNELS];
1254     int Phase = 0, NextPhase;
1255
1256     memmove(&Storage[Phase][0], In, lut ->InputChannels  * sizeof(cmsFloat32Number));
1257
1258     for (mpe = lut ->Elements;
1259          mpe != NULL;
1260          mpe = mpe ->Next) {
1261
1262               NextPhase = Phase ^ 1;
1263               mpe ->EvalPtr(&Storage[Phase][0], &Storage[NextPhase][0], mpe);
1264               Phase = NextPhase;
1265     }
1266
1267     memmove(Out, &Storage[Phase][0], lut ->OutputChannels * sizeof(cmsFloat32Number));
1268 }
1269
1270
1271
1272
1273 // LUT Creation & Destruction
1274
1275 cmsPipeline* CMSEXPORT cmsPipelineAlloc(cmsContext ContextID, cmsUInt32Number InputChannels, cmsUInt32Number OutputChannels)
1276 {
1277        cmsPipeline* NewLUT;
1278
1279        if (InputChannels >= cmsMAXCHANNELS ||
1280            OutputChannels >= cmsMAXCHANNELS) return NULL;
1281
1282        NewLUT = (cmsPipeline*) _cmsMallocZero(ContextID, sizeof(cmsPipeline));
1283        if (NewLUT == NULL) return NULL;
1284
1285
1286        NewLUT -> InputChannels  = InputChannels;
1287        NewLUT -> OutputChannels = OutputChannels;
1288
1289        NewLUT ->Eval16Fn    = _LUTeval16;
1290        NewLUT ->EvalFloatFn = _LUTevalFloat;
1291        NewLUT ->DupDataFn   = NULL;
1292        NewLUT ->FreeDataFn  = NULL;
1293        NewLUT ->Data        = NewLUT;
1294        NewLUT ->ContextID   = ContextID;
1295
1296        BlessLUT(NewLUT);
1297
1298        return NewLUT;
1299 }
1300
1301
1302 cmsUInt32Number CMSEXPORT cmsPipelineInputChannels(const cmsPipeline* lut)
1303 {
1304     return lut ->InputChannels;
1305 }
1306
1307 cmsUInt32Number CMSEXPORT cmsPipelineOutputChannels(const cmsPipeline* lut)
1308 {
1309     return lut ->OutputChannels;
1310 }
1311
1312 // Free a profile elements LUT
1313 void CMSEXPORT cmsPipelineFree(cmsPipeline* lut)
1314 {
1315     cmsStage *mpe, *Next;
1316
1317     if (lut == NULL) return;
1318
1319     for (mpe = lut ->Elements;
1320         mpe != NULL;
1321         mpe = Next) {
1322
1323             Next = mpe ->Next;
1324             cmsStageFree(mpe);
1325     }
1326
1327     if (lut ->FreeDataFn) lut ->FreeDataFn(lut ->ContextID, lut ->Data);
1328
1329     _cmsFree(lut ->ContextID, lut);
1330 }
1331
1332
1333 // Default to evaluate the LUT on 16 bit-basis.
1334 void CMSEXPORT cmsPipelineEval16(const cmsUInt16Number In[], cmsUInt16Number Out[],  const cmsPipeline* lut)
1335 {
1336     lut ->Eval16Fn(In, Out, lut->Data);
1337 }
1338
1339
1340 // Does evaluate the LUT on cmsFloat32Number-basis.
1341 void CMSEXPORT cmsPipelineEvalFloat(const cmsFloat32Number In[], cmsFloat32Number Out[], const cmsPipeline* lut)
1342 {
1343     lut ->EvalFloatFn(In, Out, lut);
1344 }
1345
1346
1347
1348 // Duplicates a LUT
1349 cmsPipeline* CMSEXPORT cmsPipelineDup(const cmsPipeline* lut)
1350 {
1351     cmsPipeline* NewLUT;
1352     cmsStage *NewMPE, *Anterior = NULL, *mpe;
1353     cmsBool  First = TRUE;
1354
1355     if (lut == NULL) return NULL;
1356
1357     NewLUT = cmsPipelineAlloc(lut ->ContextID, lut ->InputChannels, lut ->OutputChannels);
1358     for (mpe = lut ->Elements;
1359          mpe != NULL;
1360          mpe = mpe ->Next) {
1361
1362              NewMPE = cmsStageDup(mpe);
1363
1364              if (NewMPE == NULL) {
1365                  cmsPipelineFree(NewLUT);
1366                  return NULL;
1367              }
1368
1369              if (First) {
1370                  NewLUT ->Elements = NewMPE;
1371                  First = FALSE;
1372              }
1373              else {
1374                 Anterior ->Next = NewMPE;
1375              }
1376
1377             Anterior = NewMPE;
1378     }
1379
1380     NewLUT ->DupDataFn  = lut ->DupDataFn;
1381     NewLUT ->FreeDataFn = lut ->FreeDataFn;
1382
1383     if (NewLUT ->DupDataFn != NULL)
1384         NewLUT ->Data = NewLUT ->DupDataFn(lut ->ContextID, lut->Data);
1385
1386
1387     NewLUT ->SaveAs8Bits    = lut ->SaveAs8Bits;
1388
1389     BlessLUT(NewLUT);
1390     return NewLUT;
1391 }
1392
1393
1394 void CMSEXPORT cmsPipelineInsertStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage* mpe)
1395 {
1396     cmsStage* Anterior = NULL, *pt;
1397
1398     _cmsAssert(lut != NULL);
1399     _cmsAssert(mpe != NULL);
1400
1401     switch (loc) {
1402
1403         case cmsAT_BEGIN:
1404             mpe ->Next = lut ->Elements;
1405             lut ->Elements = mpe;
1406             break;
1407
1408         case cmsAT_END:
1409
1410             if (lut ->Elements == NULL)
1411                 lut ->Elements = mpe;
1412             else {
1413
1414                 for (pt = lut ->Elements;
1415                      pt != NULL;
1416                      pt = pt -> Next) Anterior = pt;
1417
1418                 Anterior ->Next = mpe;
1419                 mpe ->Next = NULL;
1420             }
1421             break;
1422         default:;
1423     }
1424
1425     BlessLUT(lut);
1426 }
1427
1428 // Unlink an element and return the pointer to it
1429 void CMSEXPORT cmsPipelineUnlinkStage(cmsPipeline* lut, cmsStageLoc loc, cmsStage** mpe)
1430 {
1431     cmsStage *Anterior, *pt, *Last;
1432     cmsStage *Unlinked = NULL;
1433
1434
1435     // If empty LUT, there is nothing to remove
1436     if (lut ->Elements == NULL) {
1437         if (mpe) *mpe = NULL;
1438         return;
1439     }
1440
1441     // On depending on the strategy...
1442     switch (loc) {
1443
1444         case cmsAT_BEGIN:
1445             {
1446                 cmsStage* elem = lut ->Elements;
1447
1448                 lut ->Elements = elem -> Next;
1449                 elem ->Next = NULL;
1450                 Unlinked = elem;
1451
1452             }
1453             break;
1454
1455         case cmsAT_END:
1456             Anterior = Last = NULL;
1457             for (pt = lut ->Elements;
1458                 pt != NULL;
1459                 pt = pt -> Next) {
1460                     Anterior = Last;
1461                     Last = pt;
1462             }
1463
1464             Unlinked = Last;  // Next already points to NULL
1465
1466             // Truncate the chain
1467             if (Anterior)
1468                 Anterior ->Next = NULL;
1469             else
1470                 lut ->Elements = NULL;
1471             break;
1472         default:;
1473     }
1474
1475     if (mpe)
1476         *mpe = Unlinked;
1477     else
1478         cmsStageFree(Unlinked);
1479
1480     BlessLUT(lut);
1481 }
1482
1483
1484 // Concatenate two LUT into a new single one
1485 cmsBool  CMSEXPORT cmsPipelineCat(cmsPipeline* l1, const cmsPipeline* l2)
1486 {
1487     cmsStage* mpe, *NewMPE;
1488
1489     // If both LUTS does not have elements, we need to inherit
1490     // the number of channels
1491     if (l1 ->Elements == NULL && l2 ->Elements == NULL) {
1492         l1 ->InputChannels  = l2 ->InputChannels;
1493         l1 ->OutputChannels = l2 ->OutputChannels;
1494     }
1495
1496     // Cat second
1497     for (mpe = l2 ->Elements;
1498          mpe != NULL;
1499          mpe = mpe ->Next) {
1500
1501             // We have to dup each element
1502              NewMPE = cmsStageDup(mpe);
1503
1504              if (NewMPE == NULL) {
1505                  return FALSE;
1506              }
1507
1508              cmsPipelineInsertStage(l1, cmsAT_END, NewMPE);
1509     }
1510
1511   BlessLUT(l1);
1512   return TRUE;
1513 }
1514
1515
1516 cmsBool CMSEXPORT cmsPipelineSetSaveAs8bitsFlag(cmsPipeline* lut, cmsBool On)
1517 {
1518     cmsBool Anterior = lut ->SaveAs8Bits;
1519
1520     lut ->SaveAs8Bits = On;
1521     return Anterior;
1522 }
1523
1524
1525 cmsStage* CMSEXPORT cmsPipelineGetPtrToFirstStage(const cmsPipeline* lut)
1526 {
1527     return lut ->Elements;
1528 }
1529
1530 cmsStage* CMSEXPORT cmsPipelineGetPtrToLastStage(const cmsPipeline* lut)
1531 {
1532     cmsStage *mpe, *Anterior = NULL;
1533
1534     for (mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
1535         Anterior = mpe;
1536
1537     return Anterior;
1538 }
1539
1540 cmsUInt32Number CMSEXPORT cmsPipelineStageCount(const cmsPipeline* lut)
1541 {
1542     cmsStage *mpe;
1543     cmsUInt32Number n;
1544
1545     for (n=0, mpe = lut ->Elements; mpe != NULL; mpe = mpe ->Next)
1546             n++;
1547
1548     return n;
1549 }
1550
1551 // This function may be used to set the optional evaluator and a block of private data. If private data is being used, an optional
1552 // duplicator and free functions should also be specified in order to duplicate the LUT construct. Use NULL to inhibit such functionality.
1553 void CMSEXPORT _cmsPipelineSetOptimizationParameters(cmsPipeline* Lut,
1554                                         _cmsOPTeval16Fn Eval16,
1555                                         void* PrivateData,
1556                                         _cmsOPTfreeDataFn FreePrivateDataFn,
1557                                         _cmsOPTdupDataFn DupPrivateDataFn)
1558 {
1559
1560     Lut ->Eval16Fn = Eval16;
1561     Lut ->DupDataFn = DupPrivateDataFn;
1562     Lut ->FreeDataFn = FreePrivateDataFn;
1563     Lut ->Data = PrivateData;
1564 }
1565
1566
1567 // ----------------------------------------------------------- Reverse interpolation
1568 // Here's how it goes. The derivative Df(x) of the function f is the linear
1569 // transformation that best approximates f near the point x. It can be represented
1570 // by a matrix A whose entries are the partial derivatives of the components of f
1571 // with respect to all the coordinates. This is know as the Jacobian
1572 //
1573 // The best linear approximation to f is given by the matrix equation:
1574 //
1575 // y-y0 = A (x-x0)
1576 //
1577 // So, if x0 is a good "guess" for the zero of f, then solving for the zero of this
1578 // linear approximation will give a "better guess" for the zero of f. Thus let y=0,
1579 // and since y0=f(x0) one can solve the above equation for x. This leads to the
1580 // Newton's method formula:
1581 //
1582 // xn+1 = xn - A-1 f(xn)
1583 //
1584 // where xn+1 denotes the (n+1)-st guess, obtained from the n-th guess xn in the
1585 // fashion described above. Iterating this will give better and better approximations
1586 // if you have a "good enough" initial guess.
1587
1588
1589 #define JACOBIAN_EPSILON            0.001f
1590 #define INVERSION_MAX_ITERATIONS    30
1591
1592 // Increment with reflexion on boundary
1593 static
1594 void IncDelta(cmsFloat32Number *Val)
1595 {
1596     if (*Val < (1.0 - JACOBIAN_EPSILON))
1597
1598         *Val += JACOBIAN_EPSILON;
1599
1600     else
1601         *Val -= JACOBIAN_EPSILON;
1602
1603 }
1604
1605
1606
1607 // Euclidean distance between two vectors of n elements each one
1608 static
1609 cmsFloat32Number EuclideanDistance(cmsFloat32Number a[], cmsFloat32Number b[], int n)
1610 {
1611     cmsFloat32Number sum = 0;
1612     int i;
1613
1614     for (i=0; i < n; i++) {
1615         cmsFloat32Number dif = b[i] - a[i];
1616         sum +=  dif * dif;
1617     }
1618
1619     return sqrtf(sum);
1620 }
1621
1622
1623 // Evaluate a LUT in reverse direction. It only searches on 3->3 LUT. Uses Newton method
1624 //
1625 // x1 <- x - [J(x)]^-1 * f(x)
1626 //
1627 // lut: The LUT on where to do the search
1628 // Target: LabK, 3 values of Lab plus destination K which is fixed
1629 // Result: The obtained CMYK
1630 // Hint:   Location where begin the search
1631
1632 cmsBool CMSEXPORT cmsPipelineEvalReverseFloat(cmsFloat32Number Target[],
1633                                               cmsFloat32Number Result[],
1634                                               cmsFloat32Number Hint[],
1635                                               const cmsPipeline* lut)
1636 {
1637     cmsUInt32Number  i, j;
1638     cmsFloat64Number  error, LastError = 1E20;
1639     cmsFloat32Number  fx[4], x[4], xd[4], fxd[4];
1640     cmsVEC3 tmp, tmp2;
1641     cmsMAT3 Jacobian;
1642     cmsFloat64Number LastResult[4];
1643
1644
1645     // Only 3->3 and 4->3 are supported
1646     if (lut ->InputChannels != 3 && lut ->InputChannels != 4) return FALSE;
1647     if (lut ->OutputChannels != 3) return FALSE;
1648
1649     // Mark result of -1
1650     LastResult[0] = LastResult[1] = LastResult[2] = -1.0f;
1651
1652     // Take the hint as starting point if specified
1653     if (Hint == NULL) {
1654
1655         // Begin at any point, we choose 1/3 of CMY axis
1656         x[0] = x[1] = x[2] = 0.3f;
1657     }
1658     else {
1659
1660         // Only copy 3 channels from hint...
1661         for (j=0; j < 3; j++)
1662             x[j] = Hint[j];
1663     }
1664
1665     // If Lut is 4-dimensions, then grab target[3], which is fixed
1666     if (lut ->InputChannels == 4) {
1667         x[3] = Target[3];
1668     }
1669     else x[3] = 0; // To keep lint happy
1670
1671
1672     // Iterate
1673     for (i = 0; i < INVERSION_MAX_ITERATIONS; i++) {
1674
1675         // Get beginning fx
1676         cmsPipelineEvalFloat(x, fx, lut);
1677
1678         // Compute error
1679         error = EuclideanDistance(fx, Target, 3);
1680
1681         // If not convergent, return last safe value
1682         if (error >= LastError)
1683             break;
1684
1685         // Keep latest values
1686         LastError     = error;
1687         for (j=0; j < lut ->InputChannels; j++)
1688                 Result[j] = x[j];
1689
1690         // Found an exact match?
1691         if (error <= 0)
1692             break;
1693
1694         // Obtain slope (the Jacobian)
1695         for (j = 0; j < 3; j++) {
1696
1697             xd[0] = x[0];
1698             xd[1] = x[1];
1699             xd[2] = x[2];
1700             xd[3] = x[3];  // Keep fixed channel
1701
1702             IncDelta(&xd[j]);
1703
1704             cmsPipelineEvalFloat(xd, fxd, lut);
1705
1706             Jacobian.v[0].n[j] = ((fxd[0] - fx[0]) / JACOBIAN_EPSILON);
1707             Jacobian.v[1].n[j] = ((fxd[1] - fx[1]) / JACOBIAN_EPSILON);
1708             Jacobian.v[2].n[j] = ((fxd[2] - fx[2]) / JACOBIAN_EPSILON);
1709         }
1710
1711         // Solve system
1712         tmp2.n[0] = fx[0] - Target[0];
1713         tmp2.n[1] = fx[1] - Target[1];
1714         tmp2.n[2] = fx[2] - Target[2];
1715
1716         if (!_cmsMAT3solve(&tmp, &Jacobian, &tmp2))
1717             return FALSE;
1718
1719         // Move our guess
1720         x[0] -= (cmsFloat32Number) tmp.n[0];
1721         x[1] -= (cmsFloat32Number) tmp.n[1];
1722         x[2] -= (cmsFloat32Number) tmp.n[2];
1723
1724         // Some clipping....
1725         for (j=0; j < 3; j++) {
1726             if (x[j] < 0) x[j] = 0;
1727             else
1728                 if (x[j] > 1.0) x[j] = 1.0;
1729         }
1730     }
1731
1732     return TRUE;
1733 }
1734