From: Gheorghe-Teodor Bercea Date: Thu, 8 Mar 2018 18:44:02 +0000 (+0000) Subject: [OpenMP][libomptarget] Fix union. X-Git-Tag: llvmorg-7.0.0-rc1~11085 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d5e5992f9a2ef811536d42571de7d312774ad928;p=platform%2Fupstream%2Fllvm.git [OpenMP][libomptarget] Fix union. Summary: To make the two parts of the union have the same size, the size of vect needs to be increased by 16 bits. Reviewers: grokos, carlo.bertolli, caomhin, ABataev Reviewed By: grokos, ABataev Subscribers: fedor.sergeev, guansong, openmp-commits Differential Revision: https://reviews.llvm.org/D44254 llvm-svn: 327040 --- diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 3ee32f9..9ceebfc 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -107,27 +107,27 @@ public: // methods for flags INLINE omp_sched_t GetRuntimeSched(); INLINE void SetRuntimeSched(omp_sched_t sched); - INLINE int IsDynamic() { return data.items.flags & TaskDescr_IsDynamic; } + INLINE int IsDynamic() { return items.flags & TaskDescr_IsDynamic; } INLINE void SetDynamic() { - data.items.flags = data.items.flags | TaskDescr_IsDynamic; + items.flags = items.flags | TaskDescr_IsDynamic; } INLINE void ClearDynamic() { - data.items.flags = data.items.flags & (~TaskDescr_IsDynamic); + items.flags = items.flags & (~TaskDescr_IsDynamic); } - INLINE int InParallelRegion() { return data.items.flags & TaskDescr_InPar; } + INLINE int InParallelRegion() { return items.flags & TaskDescr_InPar; } INLINE int InL2OrHigherParallelRegion() { - return data.items.flags & TaskDescr_InParL2P; + return items.flags & TaskDescr_InParL2P; } INLINE int IsParallelConstruct() { - return data.items.flags & TaskDescr_IsParConstr; + return items.flags & TaskDescr_IsParConstr; } INLINE int IsTaskConstruct() { return !IsParallelConstruct(); } // methods for other fields - INLINE uint16_t &NThreads() { return data.items.nthreads; } - INLINE uint16_t &ThreadLimit() { return data.items.threadlimit; } - INLINE uint16_t &ThreadId() { return data.items.threadId; } - INLINE uint16_t &ThreadsInTeam() { return data.items.threadsInTeam; } - INLINE uint64_t &RuntimeChunkSize() { return data.items.runtimeChunkSize; } + INLINE uint16_t &NThreads() { return items.nthreads; } + INLINE uint16_t &ThreadLimit() { return items.threadlimit; } + INLINE uint16_t &ThreadId() { return items.threadId; } + INLINE uint16_t &ThreadsInTeam() { return items.threadsInTeam; } + INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; } INLINE omptarget_nvptx_TaskDescr *GetPrevTaskDescr() { return prev; } INLINE void SetPrevTaskDescr(omptarget_nvptx_TaskDescr *taskDescr) { prev = taskDescr; @@ -160,18 +160,15 @@ private: static const uint8_t TaskDescr_IsParConstr = 0x20; static const uint8_t TaskDescr_InParL2P = 0x40; - union { // both have same size - uint64_t vect[2]; - struct TaskDescr_items { - uint8_t flags; // 6 bit used (see flag above) - uint8_t unused; - uint16_t nthreads; // thread num for subsequent parallel regions - uint16_t threadlimit; // thread limit ICV - uint16_t threadId; // thread id - uint16_t threadsInTeam; // threads in current team - uint64_t runtimeChunkSize; // runtime chunk size - } items; - } data; + struct TaskDescr_items { + uint8_t flags; // 6 bit used (see flag above) + uint8_t unused; + uint16_t nthreads; // thread num for subsequent parallel regions + uint16_t threadlimit; // thread limit ICV + uint16_t threadId; // thread id + uint16_t threadsInTeam; // threads in current team + uint64_t runtimeChunkSize; // runtime chunk size + } items; omptarget_nvptx_TaskDescr *prev; }; diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h index 7c786b7..435a034 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h @@ -18,7 +18,7 @@ INLINE omp_sched_t omptarget_nvptx_TaskDescr::GetRuntimeSched() { // sched starts from 1..4; encode it as 0..3; so add 1 here - uint8_t rc = (data.items.flags & TaskDescr_SchedMask) + 1; + uint8_t rc = (items.flags & TaskDescr_SchedMask) + 1; return (omp_sched_t)rc; } @@ -26,9 +26,9 @@ INLINE void omptarget_nvptx_TaskDescr::SetRuntimeSched(omp_sched_t sched) { // sched starts from 1..4; encode it as 0..3; so sub 1 here uint8_t val = ((uint8_t)sched) - 1; // clear current sched - data.items.flags &= ~TaskDescr_SchedMask; + items.flags &= ~TaskDescr_SchedMask; // set new sched - data.items.flags |= val; + items.flags |= val; } INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() { @@ -38,12 +38,12 @@ INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() { // dyn is off (unused now anyway, but may need to sample from host ?) // not in parallel - data.items.flags = 0; - data.items.nthreads = GetNumberOfProcsInTeam(); + items.flags = 0; + items.nthreads = GetNumberOfProcsInTeam(); ; // threads: whatever was alloc by kernel - data.items.threadId = 0; // is master - data.items.threadsInTeam = 1; // sequential - data.items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1 + items.threadId = 0; // is master + items.threadsInTeam = 1; // sequential + items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1 } // This is called when all threads are started together in SPMD mode. @@ -56,20 +56,19 @@ INLINE void omptarget_nvptx_TaskDescr::InitLevelOneTaskDescr( // dyn is off (unused now anyway, but may need to sample from host ?) // in L1 parallel - data.items.flags = + items.flags = TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel - data.items.nthreads = 0; // # threads for subsequent parallel region - data.items.threadId = + items.nthreads = 0; // # threads for subsequent parallel region + items.threadId = GetThreadIdInBlock(); // get ids from cuda (only called for 1st level) - data.items.threadsInTeam = tnum; - data.items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1 + items.threadsInTeam = tnum; + items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1 prev = parentTaskDescr; } INLINE void omptarget_nvptx_TaskDescr::CopyData( omptarget_nvptx_TaskDescr *sourceTaskDescr) { - data.vect[0] = sourceTaskDescr->data.vect[0]; - data.vect[1] = sourceTaskDescr->data.vect[1]; + items = sourceTaskDescr->items; } INLINE void @@ -87,7 +86,7 @@ INLINE void omptarget_nvptx_TaskDescr::CopyParent( INLINE void omptarget_nvptx_TaskDescr::CopyForExplicitTask( omptarget_nvptx_TaskDescr *parentTaskDescr) { CopyParent(parentTaskDescr); - data.items.flags = data.items.flags & ~TaskDescr_IsParConstr; + items.flags = items.flags & ~TaskDescr_IsParConstr; ASSERT0(LT_FUSSY, IsTaskConstruct(), "expected task"); } @@ -95,9 +94,9 @@ INLINE void omptarget_nvptx_TaskDescr::CopyToWorkDescr( omptarget_nvptx_TaskDescr *masterTaskDescr, uint16_t tnum) { CopyParent(masterTaskDescr); // overrwrite specific items; - data.items.flags |= + items.flags |= TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel - data.items.threadsInTeam = tnum; // set number of threads + items.threadsInTeam = tnum; // set number of threads } INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr( @@ -114,16 +113,16 @@ INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr( // never enters this region. When a parallel region is executed serially, // the threadId is set to 0 elsewhere and the kmpc_serialized_* functions // are called, which never activate this region. - data.items.threadId = + items.threadId = GetThreadIdInBlock(); // get ids from cuda (only called for 1st level) } INLINE void omptarget_nvptx_TaskDescr::CopyConvergentParent( omptarget_nvptx_TaskDescr *parentTaskDescr, uint16_t tid, uint16_t tnum) { CopyParent(parentTaskDescr); - data.items.flags |= TaskDescr_InParL2P; // In L2+ parallelism - data.items.threadsInTeam = tnum; // set number of threads - data.items.threadId = tid; + items.flags |= TaskDescr_InParL2P; // In L2+ parallelism + items.threadsInTeam = tnum; // set number of threads + items.threadId = tid; } ////////////////////////////////////////////////////////////////////////////////