* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
-#include <vecintrin.h>
+#include "vector-common.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+
#ifdef COMPLEX
#error "Handling for complex numbers is not supported in this kernel"
#endif
* 3, May 2008.
*/
-#define VLEN_BYTES 16
-#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
-
-typedef FLOAT vector_float __attribute__ ((vector_size (16)));
-
-/**
- * Load a vector into register, and hint on 8-byte alignment to improve
- * performance. gcc-9 and newer will create these hints by itself. For older
- * compiler versions, use inline assembly to explicitly express the hint.
- * Provide explicit hex encoding to cater for binutils versions that do not know
- * about vector-load with alignment hints yet.
- *
- * Note that, for block sizes where we apply vectorization, vectors in A will
- * always be 8-byte aligned.
- */
-static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
- vector_float const *restrict addr = (vector_float const *restrict)a;
- vector_float y;
-
-#if __GNUC__ < 9 && !defined(__clang__)
- // hex-encode vl %[out],%[addr],3
- asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
- : [ out ] "=v"(y)
- : [ addr ] "R"(*addr));
-#else
- y = *addr;
-#endif
-
- return y;
-}
-
/**
* Calculate for a row-block in C_i of size ROWSxCOLS using vector intrinsics.
*
--- /dev/null
+/*
+ * Copyright (c) IBM Corporation 2020.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of the OpenBLAS project nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <vecintrin.h>
+
+#define VLEN_BYTES 16
+#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
+
+typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES)));
+
+/**
+ * Load a vector into register, and hint on 8-byte alignment to improve
+ * performance. gcc-9 and newer will create these hints by itself. For older
+ * compiler versions, use inline assembly to explicitly express the hint.
+ * Provide explicit hex encoding to cater for binutils versions that do not know
+ * about vector-load with alignment hints yet.
+ *
+ * Note that, for block sizes where we apply vectorization, vectors in A will
+ * always be 8-byte aligned.
+ */
+static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
+ vector_float const *restrict addr = (vector_float const *restrict)a;
+ vector_float y;
+
+#if __GNUC__ < 9 && !defined(__clang__)
+ // hex-encode vl %[out],%[addr],3
+ asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
+ : [ out ] "=v"(y)
+ : [ addr ] "R"(*addr));
+#else
+ y = *addr;
+#endif
+
+ return y;
+}