From 75b47ec45d8076732b3f30c1b3ebc3d35e6ed7fe Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <ppaalanen@gmail.com>
Date: Mon, 16 Jan 2012 14:27:00 +0200
Subject: [PATCH] compositor: implement inverse matrix transformation

Implement 4x4 matrix inversion based on LU-decomposition with partial
pivoting.

Instead of simply computing the inverse matrix explicitly, introduce the
type struct weston_inverse_matrix for storing the LU-decomposition and
the permutation from pivoting. Using doubles, this struct has greater
precision than struct weston_matrix.

If you need only few (less than 5, presumably) multiplications with the
inverse matrix, is it cheaper to use weston_inverse_matrix, and not
compute the inverse matrix explicitly into a weston_matrix.

Signed-off-by: Pekka Paalanen <ppaalanen@gmail.com>
---
 src/compositor.c |   2 +-
 src/compositor.h |   2 +-
 src/matrix.c     | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/matrix.h     |  11 ++++-
 4 files changed, 133 insertions(+), 5 deletions(-)

diff --git a/src/compositor.c b/src/compositor.c
index 69ad60e..cb19f34 100644
--- a/src/compositor.c
+++ b/src/compositor.c
@@ -545,7 +545,7 @@ static void
 weston_surface_update_transform(struct weston_surface *surface)
 {
 	struct weston_matrix *matrix = &surface->transform.matrix;
-	struct weston_matrix *inverse = &surface->transform.inverse;
+	struct weston_inverse_matrix *inverse = &surface->transform.inverse;
 	struct weston_transform *tform;
 
 	if (!surface->transform.dirty)
diff --git a/src/compositor.h b/src/compositor.h
index 5312665..2064d67 100644
--- a/src/compositor.h
+++ b/src/compositor.h
@@ -230,7 +230,7 @@ struct weston_surface {
 
 		/* derived state, set up by weston_surface_update_transform */
 		struct weston_matrix matrix;
-		struct weston_matrix inverse;
+		struct weston_inverse_matrix inverse;
 		int enabled;
 	} transform;
 
diff --git a/src/matrix.c b/src/matrix.c
index 941e958..06cdc5e 100644
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -1,5 +1,6 @@
 /*
  * Copyright Â© 2011 Intel Corporation
+ * Copyright Â© 2012 Collabora, Ltd.
  *
  * Permission to use, copy, modify, distribute, and sell this software and
  * its documentation for any purpose is hereby granted without fee, provided
@@ -22,6 +23,7 @@
 
 #include <string.h>
 #include <stdlib.h>
+#include <math.h>
 #include <GLES2/gl2.h>
 #include <wayland-server.h>
 
@@ -101,9 +103,126 @@ weston_matrix_transform(struct weston_matrix *matrix, struct weston_vector *v)
 	*v = t;
 }
 
+static inline void
+swap_rows(double *a, double *b)
+{
+	unsigned k;
+	double tmp;
+
+	for (k = 0; k < 13; k += 4) {
+		tmp = a[k];
+		a[k] = b[k];
+		b[k] = tmp;
+	}
+}
+
+static inline unsigned
+find_pivot(double *column, unsigned k)
+{
+	unsigned p = k;
+	for (++k; k < 4; ++k)
+		if (fabs(column[p]) < fabs(column[k]))
+			p = k;
+
+	return p;
+}
+
+/*
+ * reference: Gene H. Golub and Charles F. van Loan. Matrix computations.
+ * 3rd ed. The Johns Hopkins University Press. 1996.
+ * LU decomposition, forward and back substitution: Chapter 3.
+ */
+
 WL_EXPORT int
-weston_matrix_invert(struct weston_matrix *inverse,
+weston_matrix_invert(struct weston_inverse_matrix *inverse,
 		     const struct weston_matrix *matrix)
 {
-	return -1; /* fail */
+	double A[16];
+	unsigned p[4] = { 0, 1, 2, 3 };
+	unsigned i, j, k;
+	unsigned pivot;
+	double pv;
+
+	for (i = 16; i--; )
+		A[i] = matrix->d[i];
+
+	/* LU decomposition with partial pivoting */
+	for (k = 0; k < 4; ++k) {
+		pivot = find_pivot(&A[k * 4], k);
+		if (pivot != k) {
+			unsigned tmp = p[k];
+			p[k] = p[pivot];
+			p[pivot] = tmp;
+			swap_rows(&A[k], &A[pivot]);
+		}
+
+		pv = A[k * 4 + k];
+		if (fabs(pv) < 1e-9)
+			return -1; /* zero pivot, not invertible */
+
+		for (i = k + 1; i < 4; ++i) {
+			A[i + k * 4] /= pv;
+
+			for (j = k + 1; j < 4; ++j)
+				A[i + j * 4] -= A[i + k * 4] * A[k + j * 4];
+		}
+	}
+
+	memcpy(inverse->LU, A, sizeof(A));
+	memcpy(inverse->p, p, sizeof(p));
+	return 0;
+}
+
+WL_EXPORT void
+weston_matrix_inverse_transform(struct weston_inverse_matrix *inverse,
+				struct weston_vector *v)
+{
+	/* Solve A * x = v, when we have P * A = L * U.
+	 * P * A * x = P * v  =>  L * U * x = P * v
+	 * Let U * x = b, then L * b = P * v.
+	 */
+	unsigned *p = inverse->p;
+	double *LU = inverse->LU;
+	double b[4];
+	unsigned k, j;
+
+	/* Forward substitution, column version, solves L * b = P * v */
+	/* The diagonal of L is all ones, and not explicitly stored. */
+	b[0] = v->f[p[0]];
+	b[1] = (double)v->f[p[1]] - b[0] * LU[1 + 0 * 4];
+	b[2] = (double)v->f[p[2]] - b[0] * LU[2 + 0 * 4];
+	b[3] = (double)v->f[p[3]] - b[0] * LU[3 + 0 * 4];
+	b[2] -= b[1] * LU[2 + 1 * 4];
+	b[3] -= b[1] * LU[3 + 1 * 4];
+	b[3] -= b[2] * LU[3 + 2 * 4];
+
+	/* backward substitution, column version, solves U * y = b */
+#if 1
+	/* hand-unrolled, 25% faster for whole function */
+	b[3] /= LU[3 + 3 * 4];
+	b[0] -= b[3] * LU[0 + 3 * 4];
+	b[1] -= b[3] * LU[1 + 3 * 4];
+	b[2] -= b[3] * LU[2 + 3 * 4];
+
+	b[2] /= LU[2 + 2 * 4];
+	b[0] -= b[2] * LU[0 + 2 * 4];
+	b[1] -= b[2] * LU[1 + 2 * 4];
+
+	b[1] /= LU[1 + 1 * 4];
+	b[0] -= b[1] * LU[0 + 1 * 4];
+
+	b[0] /= LU[0 + 0 * 4];
+#else
+	for (j = 3; j > 0; --j) {
+		b[j] /= LU[j + j * 4];
+		for (k = 0; k < j; ++k)
+			b[k] -= b[j] * LU[k + j * 4];
+	}
+
+	b[0] /= LU[0 + 0 * 4];
+#endif
+
+	/* the result */
+	for (j = 0; j < 4; ++j)
+		v->f[j] = b[j];
 }
diff --git a/src/matrix.h b/src/matrix.h
index 2c3285a..21c1e78 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -1,5 +1,6 @@
 /*
  * Copyright Â© 2008-2011 Kristian HÃ¸gsberg
+ * Copyright Â© 2012 Collabora, Ltd.
  *
  * Permission to use, copy, modify, distribute, and sell this software and
  * its documentation for any purpose is hereby granted without fee, provided
@@ -27,6 +28,11 @@ struct weston_matrix {
 	GLfloat d[16];
 };
 
+struct weston_inverse_matrix {
+	double LU[16];	/* column-major */
+	unsigned p[4];	/* permutation */
+};
+
 struct weston_vector {
 	GLfloat f[4];
 };
@@ -44,7 +50,10 @@ void
 weston_matrix_transform(struct weston_matrix *matrix, struct weston_vector *v);
 
 int
-weston_matrix_invert(struct weston_matrix *inverse,
+weston_matrix_invert(struct weston_inverse_matrix *inverse,
 		     const struct weston_matrix *matrix);
+void
+weston_matrix_inverse_transform(struct weston_inverse_matrix *inverse,
+				struct weston_vector *v);
 
 #endif /* WESTON_MATRIX_H */
-- 
2.7.4